Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not ignore XML comments when parsing #549

Merged
merged 1 commit into from
Sep 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform)
// we compare classes built on JDK 16 (which we only do on CI, not at release time)
// to previous-version artifacts that were built on 8. see scala/scala-xml#501
exclude[DirectMissingMethodProblem]("scala.xml.include.sax.XIncluder.declaration"),

// caused by the switch from DefaultHandler to DefaultHandler2:
exclude[MissingTypesProblem]("scala.xml.parsing.FactoryAdapter"),
exclude[MissingTypesProblem]("scala.xml.parsing.NoBindingFactoryAdapter"),

exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.comment"),
exclude[ReversedMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
exclude[DirectMissingMethodProblem]("scala.xml.parsing.NoBindingFactoryAdapter.createComment")
)
},

Expand Down
17 changes: 15 additions & 2 deletions jvm/src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import org.junit.{Test => UnitTest}
import org.junit.Assert.assertTrue
import org.junit.Assert.assertFalse
import org.junit.Assert.assertEquals
import scala.xml.parsing.ConstructingParser
import java.io.StringWriter
import java.io.ByteArrayOutputStream
import java.io.StringReader
Expand Down Expand Up @@ -581,6 +580,21 @@ class XMLTestJVM {
XML.loadString(broken)
}

@UnitTest
def issue508: Unit = {
def check(xml: String): Unit = assertEquals(xml, XML.loadString(xml).toString)

check("<a><!-- comment --> suffix</a>")
check("<a>prefix <!-- comment --> suffix</a>")
check("<a>prefix <b><!-- comment --></b> suffix</a>")

// TODO since XMLLoader retrieves FactoryAdapter.rootNode,
// capturing comments before and after the root element is not currently possible
// (by the way, the same applies to processing instructions).
//check("<!-- prologue --><a>text</a>")
//check("<a>text</a><!-- epilogue -->")
}

@UnitTest
def nodeSeqNs: Unit = {
val x = {
Expand Down Expand Up @@ -746,5 +760,4 @@ class XMLTestJVM {

assertEquals("", x.xEntityValue())
}

}
4 changes: 2 additions & 2 deletions shared/src/main/scala/scala/xml/factory/NodeFactory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ trait NodeFactory[A <: Node] {
def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean =
ch1.view.zipAll(ch2.view, null, null) forall { case (x, y) => x eq y }

def nodeEquals(n: Node, pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]) =
def nodeEquals(n: Node, pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): Boolean =
n.prefix == pre &&
n.label == name &&
n.attributes == attrSeq &&
Expand All @@ -55,7 +55,7 @@ trait NodeFactory[A <: Node] {
}
}

def makeText(s: String) = Text(s)
def makeText(s: String): Text = Text(s)
def makeComment(s: String): Seq[Comment] =
if (ignoreComments) Nil else List(Comment(s))
def makeProcInstr(t: String, s: String): Seq[ProcInstr] =
Expand Down
13 changes: 10 additions & 3 deletions shared/src/main/scala/scala/xml/factory/XMLLoader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ package scala
package xml
package factory

import org.xml.sax.SAXNotRecognizedException
import javax.xml.parsers.SAXParserFactory
import parsing.{ FactoryAdapter, NoBindingFactoryAdapter }
import java.io.{ InputStream, Reader, File, FileDescriptor }
import parsing.{FactoryAdapter, NoBindingFactoryAdapter}
import java.io.{File, FileDescriptor, InputStream, Reader}
import java.net.URL

/**
Expand All @@ -28,7 +29,7 @@ trait XMLLoader[T <: Node] {
def adapter: FactoryAdapter = new NoBindingFactoryAdapter()

private lazy val parserInstance = new ThreadLocal[SAXParser] {
override def initialValue = {
override def initialValue: SAXParser = {
val parser = SAXParserFactory.newInstance()
parser.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true)
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
Expand All @@ -52,6 +53,12 @@ trait XMLLoader[T <: Node] {
def loadXML(source: InputSource, parser: SAXParser): T = {
val newAdapter = adapter

try {
parser.setProperty("http://xml.org/sax/properties/lexical-handler", newAdapter)
} catch {
case _: SAXNotRecognizedException =>
}

newAdapter.scopeStack = TopScope :: newAdapter.scopeStack
parser.parse(source, newAdapter)
newAdapter.scopeStack = newAdapter.scopeStack.tail
Expand Down
27 changes: 19 additions & 8 deletions shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ package parsing

import scala.collection.Seq
import org.xml.sax.Attributes
import org.xml.sax.helpers.DefaultHandler
import org.xml.sax.ext.DefaultHandler2

// can be mixed into FactoryAdapter if desired
trait ConsoleErrorHandler extends DefaultHandler {
trait ConsoleErrorHandler extends DefaultHandler2 {
// ignore warning, crimson warns even for entity resolution!
override def warning(ex: SAXParseException): Unit = {}
override def error(ex: SAXParseException): Unit = printError("Error", ex)
Expand All @@ -39,8 +39,8 @@ trait ConsoleErrorHandler extends DefaultHandler {
* namespace bindings, without relying on namespace handling of the
* underlying SAX parser.
*/
abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] {
var rootElem: Node = null
abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] {
var rootElem: Node = _

val buffer = new StringBuilder()
/** List of attributes
Expand Down Expand Up @@ -72,7 +72,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
*/
var scopeStack = List.empty[NamespaceBinding]

var curTag: String = null
var curTag: String = _
var capture: Boolean = false

// abstract methods
Expand Down Expand Up @@ -105,6 +105,11 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
*/
def createProcInstr(target: String, data: String): Seq[ProcInstr]

/**
* creates a new comment node.
*/
def createComment(characters: String): Seq[Comment]

//
// ContentHandler methods
//
Expand All @@ -118,7 +123,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
* @param length
*/
override def characters(ch: Array[Char], offset: Int, length: Int): Unit = {
if (!capture) return
if (!capture) ()
// compliant: report every character
else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length)
// normalizing whitespace is not compliant, but useful
Expand Down Expand Up @@ -170,7 +175,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node

if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
val arg = if (pre == null) null else key
scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe)
scpe = NamespaceBinding(arg, nullIfEmpty(value), scpe)
} else
m = Attribute(Option(pre), key, Text(value), m)
}
Expand All @@ -183,7 +188,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
* captures text, possibly normalizing whitespace
*/
def captureText(): Unit = {
if (capture && buffer.length > 0)
if (capture && buffer.nonEmpty)
hStack = createText(buffer.toString) :: hStack

buffer.clear()
Expand Down Expand Up @@ -226,4 +231,10 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
captureText()
hStack = hStack.reverse_:::(createProcInstr(target, data).toList)
}

override def comment(ch: Array[Char], start: Int, length: Int): Unit = {
captureText()
val commentText: String = String.valueOf(ch.slice(start, start + length))
hStack = hStack.reverse_:::(createComment(commentText).toList)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,23 @@ import factory.NodeFactory
*/
class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] {
/** True. Every XML node may contain text that the application needs */
def nodeContainsText(label: String) = true
override def nodeContainsText(label: String) = true

/** From NodeFactory. Constructs an instance of scala.xml.Elem -- TODO: deprecate as in Elem */
protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem =
override protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem =
Elem(pre, label, attrs, scope, children.isEmpty, children: _*)

/** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing.
TODO: deprecate as in Elem, or forward to create?? */
def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem =
override def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem =
Elem(pre, label, attrs, scope, children.isEmpty, children: _*)

/** Creates a text node. */
def createText(text: String) = Text(text)
override def createText(text: String): Text = makeText(text)

/** Creates a processing instruction. */
def createProcInstr(target: String, data: String) = makeProcInstr(target, data)
override def createProcInstr(target: String, data: String): Seq[ProcInstr] = makeProcInstr(target, data)

/** Creates a comment. */
override def createComment(characters: String): Seq[Comment] = makeComment(characters)
}