@@ -40,13 +40,24 @@ var KEY = C.KEY = 0x72;
40
40
// Parser Modes
41
41
var OBJECT = C . OBJECT = 0x81 ;
42
42
var ARRAY = C . ARRAY = 0x82 ;
43
+ // Character constants
44
+ var BACK_SLASH = "\\" . charCodeAt ( 0 ) ;
45
+ var FORWARD_SLASH = "\/" . charCodeAt ( 0 ) ;
46
+ var BACKSPACE = "\b" . charCodeAt ( 0 ) ;
47
+ var FORM_FEED = "\f" . charCodeAt ( 0 ) ;
48
+ var NEWLINE = "\n" . charCodeAt ( 0 ) ;
49
+ var CARRIAGE_RETURN = "\r" . charCodeAt ( 0 ) ;
50
+ var TAB = "\t" . charCodeAt ( 0 ) ;
43
51
52
+ var STRING_BUFFER_SIZE = 64 * 1024 ;
44
53
45
54
function Parser ( ) {
46
55
this . tState = START ;
47
56
this . value = undefined ;
48
57
49
58
this . string = undefined ; // string data
59
+ this . stringBuffer = Buffer . alloc ? Buffer . alloc ( STRING_BUFFER_SIZE ) : new Buffer ( STRING_BUFFER_SIZE ) ;
60
+ this . stringBufferOffset = 0 ;
50
61
this . unicode = undefined ; // unicode escapes
51
62
52
63
this . key = undefined ;
@@ -77,6 +88,41 @@ proto.charError = function (buffer, i) {
77
88
this . tState = STOP ;
78
89
this . onError ( new Error ( "Unexpected " + JSON . stringify ( String . fromCharCode ( buffer [ i ] ) ) + " at position " + i + " in state " + Parser . toknam ( this . tState ) ) ) ;
79
90
} ;
91
+ proto . appendStringChar = function ( char ) {
92
+ if ( this . stringBufferOffset >= STRING_BUFFER_SIZE ) {
93
+ this . string += this . stringBuffer . toString ( 'utf8' ) ;
94
+ this . stringBufferOffset = 0 ;
95
+ }
96
+
97
+ this . stringBuffer [ this . stringBufferOffset ++ ] = char ;
98
+ } ;
99
+ proto . appendStringBuf = function ( buf , start , end ) {
100
+ var size = buf . length ;
101
+ if ( typeof start === 'number' ) {
102
+ if ( typeof end === 'number' ) {
103
+ if ( end < 0 ) {
104
+ // adding a negative end decreeses the size
105
+ size = buf . length - start + end ;
106
+ } else {
107
+ size = end - start ;
108
+ }
109
+ } else {
110
+ size = buf . length - start ;
111
+ }
112
+ }
113
+
114
+ if ( size < 0 ) {
115
+ size = 0 ;
116
+ }
117
+
118
+ if ( this . stringBufferOffset + size > STRING_BUFFER_SIZE ) {
119
+ this . string += this . stringBuffer . toString ( 'utf8' , 0 , this . stringBufferOffset ) ;
120
+ this . stringBufferOffset = 0 ;
121
+ }
122
+
123
+ buf . copy ( this . stringBuffer , this . stringBufferOffset , start , end ) ;
124
+ this . stringBufferOffset += size ;
125
+ } ;
80
126
proto . write = function ( buffer ) {
81
127
if ( typeof buffer === "string" ) buffer = new Buffer ( buffer ) ;
82
128
var n ;
@@ -93,7 +139,10 @@ proto.write = function (buffer) {
93
139
} else if ( n === 0x74 ) { this . tState = TRUE1 ; // t
94
140
} else if ( n === 0x66 ) { this . tState = FALSE1 ; // f
95
141
} else if ( n === 0x6e ) { this . tState = NULL1 ; // n
96
- } else if ( n === 0x22 ) { this . string = "" ; this . tState = STRING1 ; // "
142
+ } else if ( n === 0x22 ) { // "
143
+ this . string = "" ;
144
+ this . stringBufferOffset = 0 ;
145
+ this . tState = STRING1 ;
97
146
} else if ( n === 0x2d ) { this . string = "-" ; this . tState = NUMBER1 ; // -
98
147
} else {
99
148
if ( n >= 0x30 && n < 0x40 ) { // 1-9
@@ -112,7 +161,8 @@ proto.write = function (buffer) {
112
161
for ( var j = 0 ; j < this . bytes_remaining ; j ++ ) {
113
162
this . temp_buffs [ this . bytes_in_sequence ] [ this . bytes_in_sequence - this . bytes_remaining + j ] = buffer [ j ] ;
114
163
}
115
- this . string += this . temp_buffs [ this . bytes_in_sequence ] . toString ( ) ;
164
+
165
+ this . appendStringBuf ( this . temp_buffs [ this . bytes_in_sequence ] ) ;
116
166
this . bytes_in_sequence = this . bytes_remaining = 0 ;
117
167
i = i + j - 1 ;
118
168
} else if ( this . bytes_remaining === 0 && n >= 128 ) { // else if no remainder bytes carried over, parse multi byte (>=128) chars one at a time
@@ -129,38 +179,47 @@ proto.write = function (buffer) {
129
179
this . bytes_remaining = ( i + this . bytes_in_sequence ) - buffer . length ;
130
180
i = buffer . length - 1 ;
131
181
} else {
132
- this . string += buffer . slice ( i , ( i + this . bytes_in_sequence ) ) . toString ( ) ;
182
+ this . appendStringBuf ( buffer , i , i + this . bytes_in_sequence ) ;
133
183
i = i + this . bytes_in_sequence - 1 ;
134
184
}
135
- } else if ( n === 0x22 ) { this . tState = START ; this . onToken ( STRING , this . string ) ; this . offset += Buffer . byteLength ( this . string , 'utf8' ) + 1 ; this . string = undefined ; }
136
- else if ( n === 0x5c ) { this . tState = STRING2 ; }
137
- else if ( n >= 0x20 ) { this . string += String . fromCharCode ( n ) ; }
185
+ } else if ( n === 0x22 ) {
186
+ this . tState = START ;
187
+ this . string += this . stringBuffer . toString ( 'utf8' , 0 , this . stringBufferOffset ) ;
188
+ this . stringBufferOffset = 0 ;
189
+ this . onToken ( STRING , this . string ) ;
190
+ this . offset += Buffer . byteLength ( this . string , 'utf8' ) + 1 ;
191
+ this . string = undefined ;
192
+ }
193
+ else if ( n === 0x5c ) {
194
+ this . tState = STRING2 ;
195
+ }
196
+ else if ( n >= 0x20 ) { this . appendStringChar ( n ) ; }
138
197
else {
139
198
return this . charError ( buffer , i ) ;
140
199
}
141
200
} else if ( this . tState === STRING2 ) { // After backslash
142
201
n = buffer [ i ] ;
143
- if ( n === 0x22 ) { this . string += "\"" ; this . tState = STRING1 ;
144
- } else if ( n === 0x5c ) { this . string += "\\" ; this . tState = STRING1 ;
145
- } else if ( n === 0x2f ) { this . string += "\/" ; this . tState = STRING1 ;
146
- } else if ( n === 0x62 ) { this . string += "\b" ; this . tState = STRING1 ;
147
- } else if ( n === 0x66 ) { this . string += "\f" ; this . tState = STRING1 ;
148
- } else if ( n === 0x6e ) { this . string += "\n" ; this . tState = STRING1 ;
149
- } else if ( n === 0x72 ) { this . string += "\r" ; this . tState = STRING1 ;
150
- } else if ( n === 0x74 ) { this . string += "\t" ; this . tState = STRING1 ;
202
+ if ( n === 0x22 ) { this . appendStringChar ( n ) ; this . tState = STRING1 ;
203
+ } else if ( n === 0x5c ) { this . appendStringChar ( BACK_SLASH ) ; this . tState = STRING1 ;
204
+ } else if ( n === 0x2f ) { this . appendStringChar ( FORWARD_SLASH ) ; this . tState = STRING1 ;
205
+ } else if ( n === 0x62 ) { this . appendStringChar ( BACKSPACE ) ; this . tState = STRING1 ;
206
+ } else if ( n === 0x66 ) { this . appendStringChar ( FORM_FEED ) ; this . tState = STRING1 ;
207
+ } else if ( n === 0x6e ) { this . appendStringChar ( NEWLINE ) ; this . tState = STRING1 ;
208
+ } else if ( n === 0x72 ) { this . appendStringChar ( CARRIAGE_RETURN ) ; this . tState = STRING1 ;
209
+ } else if ( n === 0x74 ) { this . appendStringChar ( TAB ) ; this . tState = STRING1 ;
151
210
} else if ( n === 0x75 ) { this . unicode = "" ; this . tState = STRING3 ;
152
- } else {
153
- return this . charError ( buffer , i ) ;
211
+ } else {
212
+ return this . charError ( buffer , i ) ;
154
213
}
155
214
} else if ( this . tState === STRING3 || this . tState === STRING4 || this . tState === STRING5 || this . tState === STRING6 ) { // unicode hex codes
156
215
n = buffer [ i ] ;
157
216
// 0-9 A-F a-f
158
217
if ( ( n >= 0x30 && n < 0x40 ) || ( n > 0x40 && n <= 0x46 ) || ( n > 0x60 && n <= 0x66 ) ) {
159
218
this . unicode += String . fromCharCode ( n ) ;
160
219
if ( this . tState ++ === STRING6 ) {
161
- this . string += String . fromCharCode ( parseInt ( this . unicode , 16 ) ) ;
220
+ this . appendStringBuf ( Buffer ( String . fromCharCode ( parseInt ( this . unicode , 16 ) ) ) ) ;
162
221
this . unicode = undefined ;
163
- this . tState = STRING1 ;
222
+ this . tState = STRING1 ;
164
223
}
165
224
} else {
166
225
return this . charError ( buffer , i ) ;
@@ -266,14 +325,14 @@ proto.emit = function (value) {
266
325
} ;
267
326
proto . onValue = function ( value ) {
268
327
// Override me
269
- } ;
328
+ } ;
270
329
proto . onToken = function ( token , value ) {
271
330
if ( this . state === VALUE ) {
272
331
if ( token === STRING || token === NUMBER || token === TRUE || token === FALSE || token === NULL ) {
273
332
if ( this . value ) {
274
333
this . value [ this . key ] = value ;
275
334
}
276
- this . emit ( value ) ;
335
+ this . emit ( value ) ;
277
336
} else if ( token === LEFT_BRACE ) {
278
337
this . push ( ) ;
279
338
if ( this . value ) {
@@ -322,7 +381,7 @@ proto.onToken = function (token, value) {
322
381
if ( token === COLON ) { this . state = VALUE ; }
323
382
else { return this . parseError ( token , value ) ; }
324
383
} else if ( this . state === COMMA ) {
325
- if ( token === COMMA ) {
384
+ if ( token === COMMA ) {
326
385
if ( this . mode === ARRAY ) { this . key ++ ; this . state = VALUE ; }
327
386
else if ( this . mode === OBJECT ) { this . state = KEY ; }
328
387
0 commit comments