Skip to content

Commit

Permalink
test: update tests to reflect new libxml2 HTML5 parsing behaviors
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Sep 19, 2024
1 parent d992447 commit 5a4a9c3
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 12 deletions.
55 changes: 50 additions & 5 deletions test/html4/test_comments.rb
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,19 @@ class TestComment < Nokogiri::TestCase
let(:doc) { Nokogiri::HTML4(html) }
let(:subject) { doc.at_css("div#under-test") }

if Nokogiri.uses_libxml?
if Nokogiri.uses_libxml?(">= 2.14.0")
it "behaves as if the comment is closed immediately before the end of the input stream" do # COMPLIANT
assert_pattern do
subject => {
name: "div",
attributes: [{ name: "id", value: "under-test" }],
children: [
{ name: "comment", content: "start of unterminated comment" }
]
}
end
end
elsif Nokogiri.uses_libxml?
it "behaves as if the comment is unterminated and doesn't exist" do # NON-COMPLIANT
assert_equal 0, subject.children.length
assert_equal 1, doc.errors.length
Expand Down Expand Up @@ -132,8 +144,12 @@ class TestComment < Nokogiri::TestCase
assert_equal inner_div, subject.children[1]
assert_predicate subject.children[2], :comment?
assert_equal "bar", subject.children[2].content
assert_equal 1, doc.errors.length
assert_match(/Comment incorrectly closed/, doc.errors.first.to_s)
if Nokogiri.uses_libxml?(">= 2.14.0")
assert_empty doc.errors
else
assert_equal 1, doc.errors.length
assert_match(/Comment incorrectly closed/, doc.errors.first.to_s)
end
end
else # jruby, or libxml2 system lib less than 2.9.11
it "behaves as if the comment encompasses the inner div" do # NON-COMPLIANT
Expand Down Expand Up @@ -161,7 +177,22 @@ class TestComment < Nokogiri::TestCase
let(:body) { doc.at_css("body") }
let(:subject) { doc.at_css("div#under-test") }

if Nokogiri.uses_libxml?("= 2.9.14")
if Nokogiri.uses_libxml?(">= 2.14.0")
it "parses as comments" do # COMPLIANT
assert_pattern do
body.children => [
{
name: "div",
children: [
{ name: "comment", content: " comment <div id=do-i-exist" },
{ name: "text", content: "inner content" },
]
},
{ name: "text", content: "-->hello" },
]
end
end
elsif Nokogiri.uses_libxml?("= 2.9.14")
it "parses as PCDATA" do # NON-COMPLIANT
assert_equal 1, body.children.length
assert_equal subject, body.children.first
Expand Down Expand Up @@ -212,7 +243,21 @@ class TestComment < Nokogiri::TestCase
let(:body) { doc.at_css("body") }
let(:subject) { doc.at_css("div#under-test") }

if Nokogiri.uses_libxml?("= 2.9.14")
if Nokogiri.uses_libxml?(">= 2.14.0")
it "parses the <! tags as comments" do
assert_pattern do
body.children => [
{
name: "div", children: [
{ name: "comment", content: "[if foo]" },
{ name: "div", attributes: [{name: "id", value: "do-i-exist"}] },
{ name: "comment", content: "[endif]" },
]
}
]
end
end
elsif Nokogiri.uses_libxml?("= 2.9.14")
it "parses the <! tags as PCDATA" do
assert_equal(1, body.children.length)
assert_equal(subject, body.children.first)
Expand Down
2 changes: 2 additions & 0 deletions test/html4/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,8 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
doc = Nokogiri::HTML4::Document.parse(html)
expected = if Nokogiri.jruby?
[Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::PI_NODE]
elsif Nokogiri.uses_libxml?(">= 2.14.0")
[Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE]
elsif Nokogiri.uses_libxml?(">= 2.10.0")
[Nokogiri::XML::Node::COMMENT_NODE]
else
Expand Down
24 changes: 23 additions & 1 deletion test/html4/test_document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,29 @@ def test_element_children_counts

def test_malformed_fragment_is_corrected
fragment = Nokogiri::HTML4::DocumentFragment.parse("<div </div>")
assert_equal("<div></div>", fragment.to_s)

if Nokogiri.uses_libxml?(">= 2.14.0")
assert_pattern do
fragment => [
{ name: "div", attributes: [
{ name: "<", value: ""},
{ name: "div", value: ""},
]}
]
end
else
assert_equal("<div></div>", fragment.to_s)
end
end

def test_malformed_html5_fragment_serializes_like_gumbo
skip_unless_libxml2(">= 2.14.0")

fragment = Nokogiri::HTML4::DocumentFragment.parse("<div </div>")

pending "libxml2 does not serialize HTML5 like gumbo (yet)" do
assert_equal('<div <="" div=""></div>', fragment.to_s)
end
end

def test_unclosed_script_tag
Expand Down
33 changes: 27 additions & 6 deletions test/xml/test_node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,19 @@ def test_node_context_parsing_of_malformed_html_fragment
context_node = doc.at_css("div")
nodeset = context_node.parse("<div </div>")

assert_equal(1, doc.errors.length)
assert_equal(1, nodeset.length)
assert_equal("<div></div>", nodeset.to_s)
if Nokogiri.uses_libxml?(">= 2.14.0")
assert_empty(doc.errors)
assert_pattern do
nodeset => [
{ name: "div", attributes: [{name: "<", value: ""}, { name: "div", value: ""}] },
]
end
else
assert_equal(1, doc.errors.length)
assert_equal(1, nodeset.length)
assert_equal("<div></div>", nodeset.to_s)
end

assert_instance_of(Nokogiri::HTML4::Document, nodeset.document)
assert_instance_of(Nokogiri::HTML4::Document, nodeset.first.document)
end
Expand All @@ -117,14 +127,25 @@ def test_node_context_parsing_of_malformed_html_fragment_with_recover_is_correct
context_node = doc.at_css("div")
nodeset = context_node.parse("<div </div>", &:recover)

assert_equal(1, doc.errors.length)
assert_equal(1, nodeset.length)
assert_equal("<div></div>", nodeset.to_s)
if Nokogiri.uses_libxml?(">= 2.14.0")
assert_empty(doc.errors)
assert_pattern do
nodeset => [
{ name: "div", attributes: [{name: "<", value: ""}, { name: "div", value: ""}] },
]
end
else
assert_equal(1, doc.errors.length)
assert_equal(1, nodeset.length)
assert_equal("<div></div>", nodeset.to_s)
end
assert_instance_of(Nokogiri::HTML4::Document, nodeset.document)
assert_instance_of(Nokogiri::HTML4::Document, nodeset.first.document)
end

def test_node_context_parsing_of_malformed_html_fragment_without_recover_is_not_corrected
skip("libxml2 2.14.0 no longer raises this error") if Nokogiri.uses_libxml?(">= 2.14.0")

doc = HTML4.parse("<html><body><div></div></body></html>")
context_node = doc.at_css("div")
assert_raises(Nokogiri::XML::SyntaxError) do
Expand Down

0 comments on commit 5a4a9c3

Please sign in to comment.