Skip to content

Commit

Permalink
Added forms(), comments(), textNodes(), dataNodes()
Browse files Browse the repository at this point in the history
  • Loading branch information
jhy committed Feb 22, 2020
1 parent d9dfc4a commit a657ae0
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 9 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ jsoup changelog
selector if using the same evaluator many times.
<https://github.com/jhy/jsoup/issues/1319>

* Improvement: added Elements#forms(), Elements#textNodes(), Elements#dataNodes(), and Elements#comments(), as a
convenient way to get access to these node types directly from an element selection.

* Bugfix: in a <select> tag, a second <optgroup> would not automatically close an earlier open <optgroup>
<https://github.com/jhy/jsoup/issues/1313>

Expand Down
52 changes: 46 additions & 6 deletions src/main/java/org/jsoup/select/Elements.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package org.jsoup.select;

import org.jsoup.internal.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.FormElement;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;

import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -633,11 +636,48 @@ public Elements filter(NodeFilter nodeFilter) {
* no forms.
*/
public List<FormElement> forms() {
ArrayList<FormElement> forms = new ArrayList<>();
for (Element el: this)
if (el instanceof FormElement)
forms.add((FormElement) el);
return forms;
return nodesOfType(FormElement.class);
}

/**
* Get {@link Comment} nodes that are direct child nodes of the selected elements.
* @return Comment nodes, or an empty list if none.
*/
public List<Comment> comments() {
return nodesOfType(Comment.class);
}

/**
* Get {@link TextNode} nodes that are direct child nodes of the selected elements.
* @return TextNode nodes, or an empty list if none.
*/
public List<TextNode> textNodes() {
return nodesOfType(TextNode.class);
}

/**
* Get {@link DataNode} nodes that are direct child nodes of the selected elements. DataNode nodes contain the
* content of tags such as {@code script}, {@code style} etc and are distinct from {@link TextNode}s.
* @return Comment nodes, or an empty list if none.
*/
public List<DataNode> dataNodes() {
return nodesOfType(DataNode.class);
}

private <T extends Node> List<T> nodesOfType(Class<T> tClass) {
ArrayList<T> nodes = new ArrayList<>();
for (Element el: this) {
if (el.getClass().isInstance(tClass)) { // Handles FormElements
nodes.add(tClass.cast(el));
} else if (Node.class.isAssignableFrom(tClass)) { // check if child nodes match
for (int i = 0; i < el.childNodeSize(); i++) {
Node node = el.childNode(i);
if (tClass.isInstance(node))
nodes.add(tClass.cast(node));
}
}
}
return nodes;
}

}
56 changes: 53 additions & 3 deletions src/test/java/org/jsoup/select/ElementsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@

import org.jsoup.Jsoup;
import org.jsoup.TextUtil;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.FormElement;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.junit.Test;

import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.*;

/**
Tests for ElementList.
Expand Down Expand Up @@ -298,6 +299,55 @@ public void tail(Node node, int depth) {
assertEquals("2", forms.get(1).id());
}

@Test public void comments() {
Document doc = Jsoup.parse("<!-- comment1 --><p><!-- comment2 --><p class=two><!-- comment3 -->");
List<Comment> comments = doc.select("p").comments();
assertEquals(2, comments.size());
assertEquals(" comment2 ", comments.get(0).getData());
assertEquals(" comment3 ", comments.get(1).getData());

List<Comment> comments1 = doc.select("p.two").comments();
assertEquals(1, comments1.size());
assertEquals(" comment3 ", comments1.get(0).getData());
}

@Test public void textNodes() {
Document doc = Jsoup.parse("One<p>Two<a>Three</a><p>Four</p>Five");
List<TextNode> textNodes = doc.select("p").textNodes();
assertEquals(2, textNodes.size());
assertEquals("Two", textNodes.get(0).text());
assertEquals("Four", textNodes.get(1).text());
}

@Test public void dataNodes() {
Document doc = Jsoup.parse("<p>One</p><script>Two</script><style>Three</style>");
List<DataNode> dataNodes = doc.select("p, script, style").dataNodes();
assertEquals(2, dataNodes.size());
assertEquals("Two", dataNodes.get(0).getWholeData());
assertEquals("Three", dataNodes.get(1).getWholeData());

doc = Jsoup.parse("<head><script type=application/json><crux></script><script src=foo>Blah</script>");
Elements script = doc.select("script[type=application/json]");
List<DataNode> scriptNode = script.dataNodes();
assertEquals(1, scriptNode.size());
DataNode dataNode = scriptNode.get(0);
assertEquals("<crux>", dataNode.getWholeData());

// check if they're live
dataNode.setWholeData("<cromulent>");
assertEquals("<script type=\"application/json\"><cromulent></script>", script.outerHtml());
}

@Test public void nodesEmpty() {
Document doc = Jsoup.parse("<p>");
assertEquals(0, doc.select("form").textNodes().size());
}

@Test public void formElementsDescendButNotAccumulate() {
Document doc = Jsoup.parse("<div><div><form id=1>");
assertEquals(1, doc.select("div").forms().size());
}

@Test public void classWithHyphen() {
Document doc = Jsoup.parse("<p class='tab-nav'>Check</p>");
Elements els = doc.getElementsByClass("tab-nav");
Expand Down

0 comments on commit a657ae0

Please sign in to comment.