diff --git a/CHANGELOG.md b/CHANGELOG.md index 17dd5c6a13..4d723481f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA * [CRuby] When compiling packaged libraries from source, allow users' `AR` and `LD` environment variables to set the archiver and linker commands, respectively. This augments the existing `CC` environment variable to set the compiler command. [#3165] @ziggythehamster * [CRuby] The HTML5 parse methods accept a `:parse_noscript_content_as_text` keyword argument which will emulate the parsing behavior of a browser which has scripting enabled. [#3178, #3231] @stevecheckoway * [CRuby] `HTML5::DocumentFragment.parse` and `.new` accept a `:context` keyword argument that is the parse context node or element name. Previously this could only be passed in as a positional argument to `.new` and not at all to `.parse`. @flavorjones +* [CRuby] The update to libxml v2.13 improves "in context" fragment parsing recovery. We removed our hacky workaround for recovery that led to silently-degraded functionality when parsing fragments with parse errors. Specifically, malformed XML fragments that used implicit namespace prefixes will now "link up" to the namespaces in the parent document or node, where previously they did not. [#2092] @flavorjones ### Fixed diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb index 7f0761217d..e62e8b9722 100644 --- a/lib/nokogiri/xml/node.rb +++ b/lib/nokogiri/xml/node.rb @@ -1086,9 +1086,11 @@ def parse(string_or_io, options = nil) error_count = document.errors.length node_set = in_context(contents, options.to_i) + if document.errors.length > error_count raise document.errors[error_count] unless options.recover? + # TODO: remove this block when libxml2 < 2.13 is no longer supported if node_set.empty? # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery @@ -1115,6 +1117,7 @@ def parse(string_or_io, options = nil) node_set = fragment.children end end + node_set end diff --git a/test/xml/test_document_fragment.rb b/test/xml/test_document_fragment.rb index 46ab633f0c..02eb8fc581 100644 --- a/test/xml/test_document_fragment.rb +++ b/test/xml/test_document_fragment.rb @@ -309,6 +309,20 @@ def test_dup_creates_mutable_tree refute_nil(duplicate.at_css("b")) end + def test_in_context_fragment_parsing_recovery + skip("This tests behavior in libxml 2.13") unless Nokogiri.uses_libxml?(">= 2.13.0") + + # https://github.com/sparklemotion/nokogiri/issues/2092 + context_xml = "" + context_doc = Nokogiri::XML::Document.parse(context_xml) + invalid_xml_fragment = "" # note missing closing tag for `b` + fragment = context_doc.root.parse(invalid_xml_fragment) + + assert_equal("a", fragment.first.name) + assert_equal("n", fragment.first.namespace.prefix) + assert_equal("https://example.com/foo", fragment.first.namespace.href) + end + def test_for_libxml_in_context_fragment_parsing_bug_workaround skip_unless_libxml2("valgrind tests should only run with libxml2")