Skip to content

Commit

Permalink
Limit reading CharBuffer.array() to CharBuffer.limit() to avoid addit…
Browse files Browse the repository at this point in the history
…ional NULL characters at the end of some inputs (#1452)
  • Loading branch information
hannibal218bc authored Dec 10, 2020
1 parent c52f831 commit 22405b7
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/helper/DataUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static Document parseInputStream(InputStream input, String charsetName, String b
try {
CharBuffer defaultDecoded = Charset.forName(defaultCharset).decode(firstBytes);
if (defaultDecoded.hasArray())
doc = parser.parseInput(new CharArrayReader(defaultDecoded.array()), baseUri);
doc = parser.parseInput(new CharArrayReader(defaultDecoded.array(), defaultDecoded.arrayOffset(), defaultDecoded.limit()), baseUri);
else
doc = parser.parseInput(defaultDecoded.toString(), baseUri);
} catch (UncheckedIOException e) {
Expand Down
8 changes: 8 additions & 0 deletions src/test/java/org/jsoup/helper/DataUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,14 @@ public void supportsUTF8BOM() throws IOException {
assertEquals("OK", doc.head().select("title").text());
}

@Test
public void noExtraNULLBytes() throws IOException {
final byte[] b = "<html><head><meta charset=\"UTF-8\"></head><body><div><u>ü</u>ü</div></body></html>".getBytes("UTF-8");

Document doc = Jsoup.parse(new ByteArrayInputStream(b), null, "");
assertFalse( doc.outerHtml().contains("\u0000") );
}

@Test
public void supportsZippedUTF8BOM() throws IOException {
File in = getFile("/bomtests/bom_utf8.html.gz");
Expand Down

0 comments on commit 22405b7

Please sign in to comment.