Skip to content

Commit 519d495

Browse files
authored
Fix #497 wrt TOML too (last one) (#572)
1 parent d729b2c commit 519d495

File tree

3 files changed

+28
-33
lines changed

3 files changed

+28
-33
lines changed

release-notes/VERSION-2.x

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ Active Maintainers:
1616

1717
(not yet released)
1818

19-
#497: (csv, yaml): `UTF8Reader` throws "Need to move partially decoded character;
19+
#497: (csv, toml, yaml): `UTF8Reader` throws "Need to move partially decoded character;
2020
buffer not modifiable" when read only one chinese char
21-
NOTE: csv part fixed in 2.19.0, yaml in 2.20.0
21+
NOTE: csv part fixed in 2.19.0, toml and yaml in 2.20.0
2222
(reported by @mrtaolili)
2323

2424
2.20.0-rc1 (04-Aug-2025)

toml/src/main/java/com/fasterxml/jackson/dataformat/toml/UTF8Reader.java

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ public final class UTF8Reader
2424

2525
private byte[] _inputBuffer;
2626

27+
/**
28+
* Flag set to indicate {@code inputBuffer} is read-only, and its
29+
* content should not be modified. This is the case when caller
30+
* has passed in a buffer of contents already read, instead of Jackson
31+
* allocating read buffer.
32+
*
33+
* @since 2.20
34+
*/
35+
private final boolean _inputBufferReadOnly;
36+
2737
/**
2838
* Pointer to the next available byte (if any), iff less than
2939
* <code>mByteBufferEnd</code>
@@ -59,12 +69,14 @@ public final class UTF8Reader
5969

6070
// Constructor used when caller gives us
6171
private UTF8Reader(IOContext ctxt, InputStream in, boolean autoClose,
62-
byte[] buf, int ptr, int end)
72+
byte[] buf, int ptr, int end,
73+
boolean inputBufferReadOnly)
6374
{
6475
super((in == null) ? buf : in);
6576
_ioContext = ctxt;
6677
_inputSource = in;
6778
_inputBuffer = buf;
79+
_inputBufferReadOnly = inputBufferReadOnly;
6880
_inputPtr = ptr;
6981
_inputEnd = end;
7082
_autoClose = autoClose;
@@ -75,14 +87,16 @@ private UTF8Reader(IOContext ctxt, InputStream in, boolean autoClose,
7587
public static UTF8Reader construct(IOContext ctxt, InputStream in, boolean autoClose)
7688
{
7789
final byte[] buf = ctxt.allocReadIOBuffer();
78-
return new UTF8Reader(ctxt, in, autoClose, buf, 0, 0);
90+
// We manage input buffer; read-only -> false
91+
return new UTF8Reader(ctxt, in, autoClose, buf, 0, 0, false);
7992
}
8093

8194
// Factory method used when user passes us input in static pre-filled
8295
// input buffer: no InputStream nor buffer recycling used
8396
public static UTF8Reader construct(byte[] buf, int ptr, int len)
8497
{
85-
return new UTF8Reader(null, null, true, buf, ptr, ptr+len);
98+
// We are passed input buffer; read-only -> true
99+
return new UTF8Reader(null, null, true, buf, ptr, ptr+len, true);
86100
}
87101

88102
/**
@@ -101,15 +115,6 @@ private void freeBuffers()
101115
}
102116
}
103117

104-
/**
105-
* Method that can be used to see if we can actually modify the
106-
* underlying buffer. This is the case if we are managing the buffer,
107-
* but not if it was just given to us.
108-
*/
109-
protected final boolean canModifyBuffer() {
110-
return (_ioContext != null);
111-
}
112-
113118
/*
114119
/**********************************************************************
115120
/* Reader API
@@ -326,27 +331,17 @@ private boolean loadMore(int available) throws IOException
326331
{
327332
_byteCount += (_inputEnd - available);
328333

329-
// Bytes that need to be moved to the beginning of buffer?
330334
if (available > 0) {
335+
// Should we move bytes to the beginning of buffer?
331336
if (_inputPtr > 0) {
332-
if (!canModifyBuffer()) {
333-
// 15-Aug-2022, tatu: Occurs (only) if we have half-decoded UTF-8
334-
// characters; uncovered by:
335-
//
336-
// https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=50036
337-
//
338-
// and need to be reported as IOException
339-
if (_inputSource == null) {
340-
throw new IOException(String.format(
341-
"End-of-input after first %d byte(s) of a UTF-8 character: needed at least one more",
342-
available));
337+
// Can only do so if buffer mutable
338+
if (!_inputBufferReadOnly) {
339+
for (int i = 0; i < available; ++i) {
340+
_inputBuffer[i] = _inputBuffer[_inputPtr+i];
343341
}
342+
_inputPtr = 0;
343+
_inputEnd = available;
344344
}
345-
for (int i = 0; i < available; ++i) {
346-
_inputBuffer[i] = _inputBuffer[_inputPtr+i];
347-
}
348-
_inputPtr = 0;
349-
_inputEnd = available;
350345
}
351346
} else {
352347
// Ok; here we can actually reasonably expect an EOF, so let's do a separate read right away:

toml/src/test/java/com/fasterxml/jackson/dataformat/toml/FuzzTomlReadTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ public void testUTF8Decoding50036() throws Exception
4949
fail("Should not pass");
5050
// NOTE! This is an actual IOException in Jackson 2.x
5151
} catch (IOException e) {
52-
verifyException(e, "End-of-input after first 1 byte");
53-
verifyException(e, "of a UTF-8 character");
52+
verifyException(e, "Unexpected EOF in the middle of a multi-byte");
53+
verifyException(e, "got 1, needed 2");
5454
}
5555
}
5656

0 commit comments

Comments
 (0)