Skip to content

Commit

Permalink
Merge pull request #549 from dubinsky/parse-comments
Browse files Browse the repository at this point in the history
Do not ignore XML comments when parsing
  • Loading branch information
ashawley authored Sep 1, 2021
2 parents deff368 + f92ae49 commit 622fadf
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 20 deletions.
9 changes: 9 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform)
// we compare classes built on JDK 16 (which we only do on CI, not at release time)
// to previous-version artifacts that were built on 8. see scala/scala-xml#501
exclude[DirectMissingMethodProblem]("scala.xml.include.sax.XIncluder.declaration"),

// caused by the switch from DefaultHandler to DefaultHandler2:
exclude[MissingTypesProblem]("scala.xml.parsing.FactoryAdapter"),
exclude[MissingTypesProblem]("scala.xml.parsing.NoBindingFactoryAdapter"),

exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.comment"),
exclude[ReversedMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
exclude[DirectMissingMethodProblem]("scala.xml.parsing.NoBindingFactoryAdapter.createComment")
)
},

Expand Down
17 changes: 15 additions & 2 deletions jvm/src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import org.junit.{Test => UnitTest}
import org.junit.Assert.assertTrue
import org.junit.Assert.assertFalse
import org.junit.Assert.assertEquals
import scala.xml.parsing.ConstructingParser
import java.io.StringWriter
import java.io.ByteArrayOutputStream
import java.io.StringReader
Expand Down Expand Up @@ -581,6 +580,21 @@ class XMLTestJVM {
XML.loadString(broken)
}

@UnitTest
def issue508: Unit = {
def check(xml: String): Unit = assertEquals(xml, XML.loadString(xml).toString)

check("<a><!-- comment --> suffix</a>")
check("<a>prefix <!-- comment --> suffix</a>")
check("<a>prefix <b><!-- comment --></b> suffix</a>")

// TODO since XMLLoader retrieves FactoryAdapter.rootNode,
// capturing comments before and after the root element is not currently possible
// (by the way, the same applies to processing instructions).
//check("<!-- prologue --><a>text</a>")
//check("<a>text</a><!-- epilogue -->")
}

@UnitTest
def nodeSeqNs: Unit = {
val x = {
Expand Down Expand Up @@ -746,5 +760,4 @@ class XMLTestJVM {

assertEquals("", x.xEntityValue())
}

}
4 changes: 2 additions & 2 deletions shared/src/main/scala/scala/xml/factory/NodeFactory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ trait NodeFactory[A <: Node] {
def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean =
ch1.view.zipAll(ch2.view, null, null) forall { case (x, y) => x eq y }

def nodeEquals(n: Node, pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]) =
def nodeEquals(n: Node, pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): Boolean =
n.prefix == pre &&
n.label == name &&
n.attributes == attrSeq &&
Expand All @@ -55,7 +55,7 @@ trait NodeFactory[A <: Node] {
}
}

def makeText(s: String) = Text(s)
def makeText(s: String): Text = Text(s)
def makeComment(s: String): Seq[Comment] =
if (ignoreComments) Nil else List(Comment(s))
def makeProcInstr(t: String, s: String): Seq[ProcInstr] =
Expand Down
13 changes: 10 additions & 3 deletions shared/src/main/scala/scala/xml/factory/XMLLoader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ package scala
package xml
package factory

import org.xml.sax.SAXNotRecognizedException
import javax.xml.parsers.SAXParserFactory
import parsing.{ FactoryAdapter, NoBindingFactoryAdapter }
import java.io.{ InputStream, Reader, File, FileDescriptor }
import parsing.{FactoryAdapter, NoBindingFactoryAdapter}
import java.io.{File, FileDescriptor, InputStream, Reader}
import java.net.URL

/**
Expand All @@ -28,7 +29,7 @@ trait XMLLoader[T <: Node] {
def adapter: FactoryAdapter = new NoBindingFactoryAdapter()

private lazy val parserInstance = new ThreadLocal[SAXParser] {
override def initialValue = {
override def initialValue: SAXParser = {
val parser = SAXParserFactory.newInstance()
parser.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true)
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
Expand All @@ -52,6 +53,12 @@ trait XMLLoader[T <: Node] {
def loadXML(source: InputSource, parser: SAXParser): T = {
val newAdapter = adapter

try {
parser.setProperty("http://xml.org/sax/properties/lexical-handler", newAdapter)
} catch {
case _: SAXNotRecognizedException =>
}

newAdapter.scopeStack = TopScope :: newAdapter.scopeStack
parser.parse(source, newAdapter)
newAdapter.scopeStack = newAdapter.scopeStack.tail
Expand Down
27 changes: 19 additions & 8 deletions shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ package parsing

import scala.collection.Seq
import org.xml.sax.Attributes
import org.xml.sax.helpers.DefaultHandler
import org.xml.sax.ext.DefaultHandler2

// can be mixed into FactoryAdapter if desired
trait ConsoleErrorHandler extends DefaultHandler {
trait ConsoleErrorHandler extends DefaultHandler2 {
// ignore warning, crimson warns even for entity resolution!
override def warning(ex: SAXParseException): Unit = {}
override def error(ex: SAXParseException): Unit = printError("Error", ex)
Expand All @@ -39,8 +39,8 @@ trait ConsoleErrorHandler extends DefaultHandler {
* namespace bindings, without relying on namespace handling of the
* underlying SAX parser.
*/
abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] {
var rootElem: Node = null
abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] {
var rootElem: Node = _

val buffer = new StringBuilder()
/** List of attributes
Expand Down Expand Up @@ -72,7 +72,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
*/
var scopeStack = List.empty[NamespaceBinding]

var curTag: String = null
var curTag: String = _
var capture: Boolean = false

// abstract methods
Expand Down Expand Up @@ -105,6 +105,11 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
*/
def createProcInstr(target: String, data: String): Seq[ProcInstr]

/**
* creates a new comment node.
*/
def createComment(characters: String): Seq[Comment]

//
// ContentHandler methods
//
Expand All @@ -118,7 +123,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
* @param length
*/
override def characters(ch: Array[Char], offset: Int, length: Int): Unit = {
if (!capture) return
if (!capture) ()
// compliant: report every character
else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length)
// normalizing whitespace is not compliant, but useful
Expand Down Expand Up @@ -170,7 +175,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node

if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
val arg = if (pre == null) null else key
scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe)
scpe = NamespaceBinding(arg, nullIfEmpty(value), scpe)
} else
m = Attribute(Option(pre), key, Text(value), m)
}
Expand All @@ -183,7 +188,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
* captures text, possibly normalizing whitespace
*/
def captureText(): Unit = {
if (capture && buffer.length > 0)
if (capture && buffer.nonEmpty)
hStack = createText(buffer.toString) :: hStack

buffer.clear()
Expand Down Expand Up @@ -226,4 +231,10 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
captureText()
hStack = hStack.reverse_:::(createProcInstr(target, data).toList)
}

override def comment(ch: Array[Char], start: Int, length: Int): Unit = {
captureText()
val commentText: String = String.valueOf(ch.slice(start, start + length))
hStack = hStack.reverse_:::(createComment(commentText).toList)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,23 @@ import factory.NodeFactory
*/
class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] {
/** True. Every XML node may contain text that the application needs */
def nodeContainsText(label: String) = true
override def nodeContainsText(label: String) = true

/** From NodeFactory. Constructs an instance of scala.xml.Elem -- TODO: deprecate as in Elem */
protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem =
override protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem =
Elem(pre, label, attrs, scope, children.isEmpty, children: _*)

/** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing.
TODO: deprecate as in Elem, or forward to create?? */
def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem =
override def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem =
Elem(pre, label, attrs, scope, children.isEmpty, children: _*)

/** Creates a text node. */
def createText(text: String) = Text(text)
override def createText(text: String): Text = makeText(text)

/** Creates a processing instruction. */
def createProcInstr(target: String, data: String) = makeProcInstr(target, data)
override def createProcInstr(target: String, data: String): Seq[ProcInstr] = makeProcInstr(target, data)

/** Creates a comment. */
override def createComment(characters: String): Seq[Comment] = makeComment(characters)
}

0 comments on commit 622fadf

Please sign in to comment.