Skip to content

Commit

Permalink
Fixed a performance regression in 1.12
Browse files Browse the repository at this point in the history
Was building useless stringbuilders on every token read!
  • Loading branch information
jhy committed Feb 23, 2020
1 parent a657ae0 commit 9d9e53c
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 11 deletions.
19 changes: 9 additions & 10 deletions src/main/java/org/jsoup/parser/Tokeniser.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,20 @@ final class Tokeniser {
}

Token read() {
int pos = reader.pos(); // count how many reads we do in a row without making progress, and bail if stuck in a loop
final CharacterReader r = this.reader;
final int pos = r.pos(); // count how many reads we do in a row without making progress, and bail if stuck in a loop
int consecutiveReads = 0;
while (!isEmitPending) {
state.read(this, reader);
if (reader.pos() <= pos) {
consecutiveReads++;
}
Validate.isTrue(consecutiveReads < 10,
"BUG: Not making progress from state: " + this.state.name() + " with current char=" + reader.current());
state.read(this, r);
if (++consecutiveReads > 10 && r.pos() <= pos)
Validate.wtf("BUG: Not making progress from state: " + this.state.name() + " with current char=" + r.current());
}

// if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
if (charsBuilder.length() > 0) {
String str = charsBuilder.toString();
charsBuilder.delete(0, charsBuilder.length());
final StringBuilder cb = this.charsBuilder;
if (cb.length() != 0) {
String str = cb.toString();
cb.delete(0, cb.length());
charsString = null;
return charPending.data(str);
} else if (charsString != null) {
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/org/jsoup/parser/TreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,26 +58,31 @@ Document parse(Reader input, String baseUri, Parser parser) {
abstract List<Node> parseFragment(String inputFragment, Element context, String baseUri, Parser parser);

protected void runParser() {
final Tokeniser tokeniser = this.tokeniser;
final Token.TokenType eof = Token.TokenType.EOF;

while (true) {
Token token = tokeniser.read();
process(token);
token.reset();

if (token.type == Token.TokenType.EOF)
if (token.type == eof)
break;
}
}

protected abstract boolean process(Token token);

protected boolean processStartTag(String name) {
final Token.StartTag start = this.start;
if (currentToken == start) { // don't recycle an in-use token
return process(new Token.StartTag().name(name));
}
return process(start.reset().name(name));
}

public boolean processStartTag(String name, Attributes attrs) {
final Token.StartTag start = this.start;
if (currentToken == start) { // don't recycle an in-use token
return process(new Token.StartTag().nameAttr(name, attrs));
}
Expand Down

0 comments on commit 9d9e53c

Please sign in to comment.