@@ -45,8 +45,10 @@ function StringDecoder(encoding) {
45
45
case 'utf16le' :
46
46
this . text = utf16Text ;
47
47
this . end = utf16End ;
48
- // fall through
48
+ nb = 4 ;
49
+ break ;
49
50
case 'utf8' :
51
+ this . fillLast = utf8FillLast ;
50
52
nb = 4 ;
51
53
break ;
52
54
case 'base64' :
@@ -88,7 +90,7 @@ StringDecoder.prototype.end = utf8End;
88
90
// Returns only complete characters in a Buffer
89
91
StringDecoder . prototype . text = utf8Text ;
90
92
91
- // Attempts to complete a partial character using bytes from a Buffer
93
+ // Attempts to complete a partial non-UTF-8 character using bytes from a Buffer
92
94
StringDecoder . prototype . fillLast = function ( buf ) {
93
95
if ( this . lastNeed <= buf . length ) {
94
96
buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , this . lastNeed ) ;
@@ -112,38 +114,83 @@ function utf8CheckByte(byte) {
112
114
return - 1 ;
113
115
}
114
116
115
- // Checks at most the last 3 bytes of a Buffer for an incomplete UTF-8
116
- // character, returning the total number of bytes needed to complete the partial
117
- // character (if applicable).
117
+ // Checks at most 3 bytes at the end of a Buffer in order to detect an
118
+ // incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4)
119
+ // needed to complete the UTF-8 character (if applicable) are returned .
118
120
function utf8CheckIncomplete ( self , buf , i ) {
119
121
var j = buf . length - 1 ;
120
122
if ( j < i )
121
123
return 0 ;
122
- var nb = utf8CheckByte ( buf [ j -- ] ) ;
124
+ var nb = utf8CheckByte ( buf [ j ] ) ;
123
125
if ( nb >= 0 ) {
124
126
if ( nb > 0 )
125
- self . lastNeed = nb + 1 - ( buf . length - j ) ;
127
+ self . lastNeed = nb - 1 ;
126
128
return nb ;
127
129
}
128
- if ( j < i )
130
+ if ( -- j < i )
129
131
return 0 ;
130
- nb = utf8CheckByte ( buf [ j -- ] ) ;
132
+ nb = utf8CheckByte ( buf [ j ] ) ;
131
133
if ( nb >= 0 ) {
132
134
if ( nb > 0 )
133
- self . lastNeed = nb + 1 - ( buf . length - j ) ;
135
+ self . lastNeed = nb - 2 ;
134
136
return nb ;
135
137
}
136
- if ( j < i )
138
+ if ( -- j < i )
137
139
return 0 ;
138
- nb = utf8CheckByte ( buf [ j -- ] ) ;
140
+ nb = utf8CheckByte ( buf [ j ] ) ;
139
141
if ( nb >= 0 ) {
140
- if ( nb > 0 )
141
- self . lastNeed = nb + 1 - ( buf . length - j ) ;
142
+ if ( nb > 0 ) {
143
+ if ( nb === 2 )
144
+ nb = 0 ;
145
+ else
146
+ self . lastNeed = nb - 3 ;
147
+ }
142
148
return nb ;
143
149
}
144
150
return 0 ;
145
151
}
146
152
153
+ // Validates as many continuation bytes for a multi-byte UTF-8 character as
154
+ // needed or are available. If we see a non-continuation byte where we expect
155
+ // one, we "replace" the validated continuation bytes we've seen so far with
156
+ // UTF-8 replacement characters ('\ufffd'), to match v8's UTF-8 decoding
157
+ // behavior. The continuation byte check is included three times in the case
158
+ // where all of the continuation bytes for a character exist in the same buffer.
159
+ // It is also done this way as a slight performance increase instead of using a
160
+ // loop.
161
+ function utf8CheckExtraBytes ( self , buf , p ) {
162
+ if ( ( buf [ 0 ] & 0xC0 ) !== 0x80 ) {
163
+ self . lastNeed = 0 ;
164
+ return '\ufffd' . repeat ( p ) ;
165
+ }
166
+ if ( self . lastNeed > 1 && buf . length > 1 ) {
167
+ if ( ( buf [ 1 ] & 0xC0 ) !== 0x80 ) {
168
+ self . lastNeed = 1 ;
169
+ return '\ufffd' . repeat ( p + 1 ) ;
170
+ }
171
+ if ( self . lastNeed > 2 && buf . length > 2 ) {
172
+ if ( ( buf [ 2 ] & 0xC0 ) !== 0x80 ) {
173
+ self . lastNeed = 2 ;
174
+ return '\ufffd' . repeat ( p + 2 ) ;
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ // Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer.
181
+ function utf8FillLast ( buf ) {
182
+ const p = this . lastTotal - this . lastNeed ;
183
+ var r = utf8CheckExtraBytes ( this , buf , p ) ;
184
+ if ( r !== undefined )
185
+ return r ;
186
+ if ( this . lastNeed <= buf . length ) {
187
+ buf . copy ( this . lastChar , p , 0 , this . lastNeed ) ;
188
+ return this . lastChar . toString ( this . encoding , 0 , this . lastTotal ) ;
189
+ }
190
+ buf . copy ( this . lastChar , p , 0 , buf . length ) ;
191
+ this . lastNeed -= buf . length ;
192
+ }
193
+
147
194
// Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
148
195
// partial character, the character's bytes are buffered until the required
149
196
// number of bytes are available.
0 commit comments