44
55package kotlinx.serialization.json.internal
66
7+ import kotlinx.serialization.json.internal.*
78import kotlinx.serialization.json.internal.CharMappings.CHAR_TO_TOKEN
89import kotlinx.serialization.json.internal.CharMappings.ESCAPE_2_CHAR
9- import kotlin.jvm.JvmField
10+ import kotlin.js.*
11+ import kotlin.jvm.*
1012
1113internal const val lenientHint = " Use 'isLenient = true' in 'Json {}` builder to accept non-compliant JSON."
1214internal const val coerceInputValuesHint = " Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values."
@@ -118,60 +120,47 @@ internal fun charToTokenClass(c: Char) = if (c.code < CTC_MAX) CHAR_TO_TOKEN[c.c
118120
119121internal fun escapeToChar (c : Int ): Char = if (c < ESC2C_MAX ) ESCAPE_2_CHAR [c] else INVALID
120122
121- // Streaming JSON reader
122- internal open class JsonLexer (@JvmField protected var source : CharSequence ) {
123+ /* *
124+ * The base class that reads the JSON from the given char sequence source.
125+ * It has two implementations: one over the raw [String] instance, [StringJsonLexer],
126+ * and one over an arbitrary stream of data, [ReaderJsonLexer] (JVM-only).
127+ *
128+ * [AbstractJsonLexer] contains base implementation for cold or not performance-sensitive
129+ * methods on top of [CharSequence], but [StringJsonLexer] overrides some
130+ * of them for the performance reasons (devirtualization of [CharSequence] and avoid
131+ * of additional spills).
132+ */
133+ internal abstract class AbstractJsonLexer {
134+
135+ protected abstract val source: CharSequence
123136
124137 @JvmField
125138 protected var currentPosition: Int = 0 // position in source
126139
127140 open fun ensureHaveChars () {}
128141
129- fun expectEof () {
130- val nextToken = consumeNextToken()
131- if (nextToken != TC_EOF )
132- fail(" Expected EOF, but had ${source[currentPosition - 1 ]} instead" )
133- }
142+ // Used as bound check in loops
143+ abstract fun definitelyNotEof (position : Int ): Int
134144
135- // should be used inside loops instead of range checks
136- protected open fun definitelyNotEof (position : Int ): Int = if (position < source.length) position else - 1
145+ abstract fun tryConsumeComma (): Boolean
137146
147+ abstract fun canConsumeValue (): Boolean
138148
139- fun tryConsumeComma (): Boolean {
140- val current = skipWhitespaces()
141- if (current >= source.length || current == - 1 ) return false
142- if (source[current] == ' ,' ) {
143- ++ currentPosition
144- return true
145- }
146- return false
147- }
148-
149- fun canConsumeValue (): Boolean {
150- ensureHaveChars()
151- var current = currentPosition
152- while (true ) {
153- current = definitelyNotEof(current)
154- if (current == - 1 ) break // could be inline function but KT-1436
155- val c = source[current]
156- // Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
157- if (c == ' ' || c == ' \n ' || c == ' \r ' || c == ' \t ' ) {
158- ++ current
159- continue
160- }
161- currentPosition = current
162- return isValidValueStart(c)
163- }
164- currentPosition = current
165- return false
166- }
149+ abstract fun consumeNextToken (): Byte
167150
168- private fun isValidValueStart (c : Char ): Boolean {
151+ protected fun isValidValueStart (c : Char ): Boolean {
169152 return when (c) {
170153 ' }' , ' ]' , ' :' , ' ,' -> false
171154 else -> true
172155 }
173156 }
174157
158+ fun expectEof () {
159+ val nextToken = consumeNextToken()
160+ if (nextToken != TC_EOF )
161+ fail(" Expected EOF, but had ${source[currentPosition - 1 ]} instead" )
162+ }
163+
175164 /*
176165 * Peeked string for coerced enums.
177166 * If the value was picked, 'consumeString' will take it without scanning the source.
@@ -188,7 +177,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
188177 return token
189178 }
190179
191- fun consumeNextToken (expected : Char ) {
180+ open fun consumeNextToken (expected : Char ) {
192181 ensureHaveChars()
193182 val source = source
194183 var cpos = currentPosition
@@ -205,15 +194,15 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
205194 unexpectedToken(expected) // EOF
206195 }
207196
208- private fun unexpectedToken (expected : Char ) {
197+ protected fun unexpectedToken (expected : Char ) {
209198 -- currentPosition // To properly handle null
210199 if (expected == STRING && consumeStringLenient() == NULL ) {
211200 fail(" Expected string literal but 'null' literal was found.\n $coerceInputValuesHint " , currentPosition - 4 )
212201 }
213202 fail(charToTokenClass(expected))
214203 }
215204
216- private fun fail (expectedToken : Byte ) {
205+ protected fun fail (expectedToken : Byte ) {
217206 // We know that the token was consumed prior to this call
218207 // Slow path, never called in normal code, can avoid optimizing it
219208 val expected = when (expectedToken) {
@@ -248,26 +237,6 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
248237 return TC_EOF
249238 }
250239
251- fun consumeNextToken (): Byte {
252- ensureHaveChars()
253- val source = source
254- var cpos = currentPosition
255- while (true ) {
256- cpos = definitelyNotEof(cpos)
257- if (cpos == - 1 ) break
258- val ch = source[cpos++ ]
259- return when (val tc = charToTokenClass(ch)) {
260- TC_WHITESPACE -> continue
261- else -> {
262- currentPosition = cpos
263- tc
264- }
265- }
266- }
267- currentPosition = cpos
268- return TC_EOF
269- }
270-
271240 /* *
272241 * Tries to consume `null` token from input.
273242 * Returns `true` if the next 4 chars in input are not `null`,
@@ -291,7 +260,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
291260 return false
292261 }
293262
294- private fun skipWhitespaces (): Int {
263+ open fun skipWhitespaces (): Int {
295264 var current = currentPosition
296265 // Skip whitespaces
297266 while (true ) {
@@ -329,33 +298,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
329298 * This method is a copy of consumeString, but used for key of json objects, so there
330299 * is no need to lookup peeked string.
331300 */
332- fun consumeKeyString (): String {
333- /*
334- * For strings we assume that escaped symbols are rather an exception, so firstly
335- * we optimistically scan for closing quote via intrinsified and blazing-fast 'indexOf',
336- * than do our pessimistic check for backslash and fallback to slow-path if necessary.
337- */
338- consumeNextToken(STRING )
339- var current = currentPosition
340- val closingQuote = indexOf(' "' , current)
341- if (closingQuote == - 1 ) {
342- current = definitelyNotEof(current)
343- if (current == - 1 ) fail(TC_STRING )
344- // it's also possible just to resize buffer,
345- // instead of falling back to slow path,
346- // not sure what is better
347- else return consumeString(currentPosition, current)
348- }
349- // Now we _optimistically_ know where the string ends (it might have been an escaped quote)
350- for (i in current until closingQuote) {
351- // Encountered escape sequence, should fallback to "slow" path and symmbolic scanning
352- if (source[i] == STRING_ESC ) {
353- return consumeString(currentPosition, i)
354- }
355- }
356- this .currentPosition = closingQuote + 1
357- return substring(current, closingQuote)
358- }
301+ abstract fun consumeKeyString (): String
359302
360303 fun consumeString (): String {
361304 if (peekedString != null ) {
@@ -365,10 +308,10 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
365308 return consumeKeyString()
366309 }
367310
368- private fun consumeString (startPosition : Int , current : Int ): String {
311+ @JsName(" consumeString2" ) // WA for JS issue
312+ protected fun consumeString (source : CharSequence , startPosition : Int , current : Int ): String {
369313 var currentPosition = current
370314 var lastPosition = startPosition
371- var source = source
372315 var char = source[currentPosition] // Avoid two range checks visible in the profiler
373316 var usedAppend = false
374317 while (char != STRING ) {
@@ -383,7 +326,6 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
383326 currentPosition = definitelyNotEof(currentPosition)
384327 if (currentPosition == - 1 )
385328 fail(" EOF" , currentPosition)
386- source = this .source
387329 lastPosition = currentPosition
388330 }
389331 char = source[currentPosition]
@@ -424,7 +366,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
424366 return result
425367 }
426368
427- // Allows to consume unquoted string
369+ // Allows consuming unquoted string
428370 fun consumeStringLenient (): String {
429371 if (peekedString != null ) {
430372 return takePeeked()
@@ -445,11 +387,13 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
445387 if (current >= source.length) {
446388 usedAppend = true
447389 appendRange(currentPosition, current)
448- current = definitelyNotEof(current)
449- if (current == - 1 ) {
390+ val eof = definitelyNotEof(current)
391+ if (eof == - 1 ) {
450392 // to handle plain lenient strings, such as top-level
451393 currentPosition = current
452394 return decodedString(0 , 0 )
395+ } else {
396+ current = eof
453397 }
454398 }
455399 }
@@ -639,6 +583,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
639583 return result
640584 }
641585
586+ @JsName(" consumeBoolean2" ) // WA for JS issue
642587 private fun consumeBoolean (start : Int ): Boolean {
643588 /*
644589 * In ASCII representation, upper and lower case letters are different
0 commit comments