Skip to content

Commit

Permalink
string_decoder: fix number of replacement chars
Browse files Browse the repository at this point in the history
Fixes: #22626

PR-URL: #22709
Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
  • Loading branch information
addaleax committed Sep 24, 2018
1 parent ab6ddc0 commit 06f6ac1
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
13 changes: 7 additions & 6 deletions src/string_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,17 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
kIncompleteCharactersEnd);
if (Encoding() == UTF8) {
// For UTF-8, we need special treatment to align with the V8 decoder:
// If an incomplete character is found at a chunk boundary, we turn
// that character into a single invalid one.
// If an incomplete character is found at a chunk boundary, we use
// its remainder and pass it to V8 as-is.
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
if ((data[i] & 0xC0) != 0x80) {
// This byte is not a continuation byte even though it should have
// been one.
// Act as if there was a 1-byte incomplete character, which does
// not make sense but works here because we know it's invalid.
// been one. We stop decoding of the incomplete character at this
// point (but still use the rest of the incomplete bytes from this
// chunk) and assume that the new, unexpected byte starts a new one.
state_[kMissingBytes] = 0;
state_[kBufferedBytes] = 1;
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
state_[kBufferedBytes] += i;
data += i;
nread -= i;
break;
Expand Down
11 changes: 11 additions & 0 deletions test/parallel/test-string-decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,17 @@ assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
assert.strictEqual(decoder.end(), '');

// Regression tests for https://github.com/nodejs/node/issues/22626
// (not enough replacement chars when having seen more than one byte of an
// incomplete multibyte characters).
decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('f69b', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('d1', 'hex')), '\ufffd\ufffd');
assert.strictEqual(decoder.end(), '\ufffd');
assert.strictEqual(decoder.write(Buffer.from('f4', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('bde5', 'hex')), '\ufffd\ufffd');
assert.strictEqual(decoder.end(), '\ufffd');

common.expectsError(
() => new StringDecoder(1),
{
Expand Down

0 comments on commit 06f6ac1

Please sign in to comment.