Closed
Description
To reproduce, create an App.java
file with the contents as below, and run it with htmlparser
's jar available on the classpath.
Note that the XPath expression only works when parsing the source with the JDK's DocumentBuilder
. When using htmlparser's HtmlDocumentBuilder
, it always returns an empty result.
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public class App {
private static final String SOURCE =
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>" +
"<html>" +
"<body>" +
"<h3>foo</h3>" +
"<h3>bar</h3>" +
"<h3>baz</h3>" +
"</body>" +
"</html>";
private static final String QUERY = "//h3";
public static void main(String... args) throws Exception {
query(DocumentBuilderFactory.newInstance().newDocumentBuilder());
query(new HtmlDocumentBuilder(XmlViolationPolicy.FATAL));
query(new HtmlDocumentBuilder(XmlViolationPolicy.ALLOW));
query(new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET));
}
private static void query(DocumentBuilder builder) throws Exception {
Document document = builder.parse(new InputSource(new StringReader(SOURCE)));
XPathExpression query = XPathFactory.newInstance().newXPath().compile(QUERY);
var numResults = ((NodeList) query.evaluate(document, XPathConstants.NODESET)).getLength();
System.out.println(numResults);
}
}
Metadata
Metadata
Assignees
Labels
No labels