Skip to content

Commit 365ac9c

Browse files
authored
Performance-friendly JsonLexer (#1635)
* Performance-friendly JsonLexer
1 parent 7807f6d commit 365ac9c

File tree

11 files changed

+228
-109
lines changed

11 files changed

+228
-109
lines changed

formats/json/commonMain/src/kotlinx/serialization/json/Json.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public sealed class Json(
9595
* @throws [SerializationException] if the given JSON string cannot be deserialized to the value of type [T].
9696
*/
9797
public final override fun <T> decodeFromString(deserializer: DeserializationStrategy<T>, string: String): T {
98-
val lexer = JsonLexer(string)
98+
val lexer = StringJsonLexer(string)
9999
val input = StreamingJsonDecoder(this, WriteMode.OBJ, lexer)
100100
val result = input.decodeSerializableValue(deserializer)
101101
lexer.expectEof()

formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ internal fun InvalidFloatingPointDecoded(value: Number, key: String, output: Str
4545
JsonDecodingException(-1, unexpectedFpErrorMessage(value, key, output))
4646

4747
// Extension on JSON reader and fail immediately
48-
internal fun JsonLexer.throwInvalidFloatingPointDecoded(result: Number): Nothing {
48+
internal fun AbstractJsonLexer.throwInvalidFloatingPointDecoded(result: Number): Nothing {
4949
fail("Unexpected special floating-point value $result. By default, " +
5050
"non-finite floating point values are prohibited because they do not conform JSON specification. " +
5151
specialFlowingValuesHint

formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import kotlinx.serialization.json.*
1010
@OptIn(ExperimentalSerializationApi::class)
1111
internal class JsonTreeReader(
1212
configuration: JsonConfiguration,
13-
private val lexer: JsonLexer
13+
private val lexer: AbstractJsonLexer
1414
) {
1515
private val isLenient = configuration.isLenient
1616
private var stackDepth = 0

formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@ import kotlinx.serialization.modules.*
1313
import kotlin.jvm.*
1414

1515
/**
16-
* [JsonDecoder] which reads given JSON from [JsonLexer] field by field.
16+
* [JsonDecoder] which reads given JSON from [AbstractJsonLexer] field by field.
1717
*/
1818
@OptIn(ExperimentalSerializationApi::class, ExperimentalUnsignedTypes::class)
1919
internal open class StreamingJsonDecoder(
2020
final override val json: Json,
2121
private val mode: WriteMode,
22-
@JvmField internal val lexer: JsonLexer
22+
@JvmField internal val lexer: AbstractJsonLexer
2323
) : JsonDecoder, AbstractDecoder() {
2424

2525
override val serializersModule: SerializersModule = json.serializersModule
@@ -249,7 +249,7 @@ internal open class StreamingJsonDecoder(
249249
@OptIn(ExperimentalSerializationApi::class)
250250
@ExperimentalUnsignedTypes
251251
internal class JsonDecoderForUnsignedTypes(
252-
private val lexer: JsonLexer,
252+
private val lexer: AbstractJsonLexer,
253253
json: Json
254254
) : AbstractDecoder() {
255255
override val serializersModule: SerializersModule = json.serializersModule
@@ -261,7 +261,7 @@ internal class JsonDecoderForUnsignedTypes(
261261
override fun decodeShort(): Short = lexer.parseString("UShort") { toUShort().toShort() }
262262
}
263263

264-
private inline fun <T> JsonLexer.parseString(expectedType: String, block: String.() -> T): T {
264+
private inline fun <T> AbstractJsonLexer.parseString(expectedType: String, block: String.() -> T): T {
265265
val input = consumeStringLenient()
266266
try {
267267
return input.block()

formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ private sealed class AbstractJsonTreeDecoder(
163163

164164
@OptIn(ExperimentalUnsignedTypes::class)
165165
override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder =
166-
if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(JsonLexer(getPrimitiveValue(tag).content), json)
166+
if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(StringJsonLexer(getPrimitiveValue(tag).content), json)
167167
else super.decodeTaggedInline(tag, inlineDescriptor)
168168
}
169169

formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonLexer.kt renamed to formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt

Lines changed: 41 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44

55
package kotlinx.serialization.json.internal
66

7+
import kotlinx.serialization.json.internal.*
78
import kotlinx.serialization.json.internal.CharMappings.CHAR_TO_TOKEN
89
import kotlinx.serialization.json.internal.CharMappings.ESCAPE_2_CHAR
9-
import kotlin.jvm.JvmField
10+
import kotlin.js.*
11+
import kotlin.jvm.*
1012

1113
internal const val lenientHint = "Use 'isLenient = true' in 'Json {}` builder to accept non-compliant JSON."
1214
internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values."
@@ -118,60 +120,47 @@ internal fun charToTokenClass(c: Char) = if (c.code < CTC_MAX) CHAR_TO_TOKEN[c.c
118120

119121
internal fun escapeToChar(c: Int): Char = if (c < ESC2C_MAX) ESCAPE_2_CHAR[c] else INVALID
120122

121-
// Streaming JSON reader
122-
internal open class JsonLexer(@JvmField protected var source: CharSequence) {
123+
/**
124+
* The base class that reads the JSON from the given char sequence source.
125+
* It has two implementations: one over the raw [String] instance, [StringJsonLexer],
126+
* and one over an arbitrary stream of data, [ReaderJsonLexer] (JVM-only).
127+
*
128+
* [AbstractJsonLexer] contains base implementation for cold or not performance-sensitive
129+
* methods on top of [CharSequence], but [StringJsonLexer] overrides some
130+
* of them for the performance reasons (devirtualization of [CharSequence] and avoid
131+
* of additional spills).
132+
*/
133+
internal abstract class AbstractJsonLexer {
134+
135+
protected abstract val source: CharSequence
123136

124137
@JvmField
125138
protected var currentPosition: Int = 0 // position in source
126139

127140
open fun ensureHaveChars() {}
128141

129-
fun expectEof() {
130-
val nextToken = consumeNextToken()
131-
if (nextToken != TC_EOF)
132-
fail("Expected EOF, but had ${source[currentPosition - 1]} instead")
133-
}
142+
// Used as bound check in loops
143+
abstract fun definitelyNotEof(position: Int): Int
134144

135-
// should be used inside loops instead of range checks
136-
protected open fun definitelyNotEof(position: Int): Int = if (position < source.length) position else -1
145+
abstract fun tryConsumeComma(): Boolean
137146

147+
abstract fun canConsumeValue(): Boolean
138148

139-
fun tryConsumeComma(): Boolean {
140-
val current = skipWhitespaces()
141-
if (current >= source.length || current == -1) return false
142-
if (source[current] == ',') {
143-
++currentPosition
144-
return true
145-
}
146-
return false
147-
}
148-
149-
fun canConsumeValue(): Boolean {
150-
ensureHaveChars()
151-
var current = currentPosition
152-
while (true) {
153-
current = definitelyNotEof(current)
154-
if (current == -1) break // could be inline function but KT-1436
155-
val c = source[current]
156-
// Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
157-
if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
158-
++current
159-
continue
160-
}
161-
currentPosition = current
162-
return isValidValueStart(c)
163-
}
164-
currentPosition = current
165-
return false
166-
}
149+
abstract fun consumeNextToken(): Byte
167150

168-
private fun isValidValueStart(c: Char): Boolean {
151+
protected fun isValidValueStart(c: Char): Boolean {
169152
return when (c) {
170153
'}', ']', ':', ',' -> false
171154
else -> true
172155
}
173156
}
174157

158+
fun expectEof() {
159+
val nextToken = consumeNextToken()
160+
if (nextToken != TC_EOF)
161+
fail("Expected EOF, but had ${source[currentPosition - 1]} instead")
162+
}
163+
175164
/*
176165
* Peeked string for coerced enums.
177166
* If the value was picked, 'consumeString' will take it without scanning the source.
@@ -188,7 +177,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
188177
return token
189178
}
190179

191-
fun consumeNextToken(expected: Char) {
180+
open fun consumeNextToken(expected: Char) {
192181
ensureHaveChars()
193182
val source = source
194183
var cpos = currentPosition
@@ -205,15 +194,15 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
205194
unexpectedToken(expected) // EOF
206195
}
207196

208-
private fun unexpectedToken(expected: Char) {
197+
protected fun unexpectedToken(expected: Char) {
209198
--currentPosition // To properly handle null
210199
if (expected == STRING && consumeStringLenient() == NULL) {
211200
fail("Expected string literal but 'null' literal was found.\n$coerceInputValuesHint", currentPosition - 4)
212201
}
213202
fail(charToTokenClass(expected))
214203
}
215204

216-
private fun fail(expectedToken: Byte) {
205+
protected fun fail(expectedToken: Byte) {
217206
// We know that the token was consumed prior to this call
218207
// Slow path, never called in normal code, can avoid optimizing it
219208
val expected = when (expectedToken) {
@@ -248,26 +237,6 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
248237
return TC_EOF
249238
}
250239

251-
fun consumeNextToken(): Byte {
252-
ensureHaveChars()
253-
val source = source
254-
var cpos = currentPosition
255-
while (true) {
256-
cpos = definitelyNotEof(cpos)
257-
if (cpos == -1) break
258-
val ch = source[cpos++]
259-
return when (val tc = charToTokenClass(ch)) {
260-
TC_WHITESPACE -> continue
261-
else -> {
262-
currentPosition = cpos
263-
tc
264-
}
265-
}
266-
}
267-
currentPosition = cpos
268-
return TC_EOF
269-
}
270-
271240
/**
272241
* Tries to consume `null` token from input.
273242
* Returns `true` if the next 4 chars in input are not `null`,
@@ -291,7 +260,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
291260
return false
292261
}
293262

294-
private fun skipWhitespaces(): Int {
263+
open fun skipWhitespaces(): Int {
295264
var current = currentPosition
296265
// Skip whitespaces
297266
while (true) {
@@ -329,33 +298,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
329298
* This method is a copy of consumeString, but used for key of json objects, so there
330299
* is no need to lookup peeked string.
331300
*/
332-
fun consumeKeyString(): String {
333-
/*
334-
* For strings we assume that escaped symbols are rather an exception, so firstly
335-
* we optimistically scan for closing quote via intrinsified and blazing-fast 'indexOf',
336-
* than do our pessimistic check for backslash and fallback to slow-path if necessary.
337-
*/
338-
consumeNextToken(STRING)
339-
var current = currentPosition
340-
val closingQuote = indexOf('"', current)
341-
if (closingQuote == -1) {
342-
current = definitelyNotEof(current)
343-
if (current == -1) fail(TC_STRING)
344-
// it's also possible just to resize buffer,
345-
// instead of falling back to slow path,
346-
// not sure what is better
347-
else return consumeString(currentPosition, current)
348-
}
349-
// Now we _optimistically_ know where the string ends (it might have been an escaped quote)
350-
for (i in current until closingQuote) {
351-
// Encountered escape sequence, should fallback to "slow" path and symmbolic scanning
352-
if (source[i] == STRING_ESC) {
353-
return consumeString(currentPosition, i)
354-
}
355-
}
356-
this.currentPosition = closingQuote + 1
357-
return substring(current, closingQuote)
358-
}
301+
abstract fun consumeKeyString(): String
359302

360303
fun consumeString(): String {
361304
if (peekedString != null) {
@@ -365,10 +308,10 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
365308
return consumeKeyString()
366309
}
367310

368-
private fun consumeString(startPosition: Int, current: Int): String {
311+
@JsName("consumeString2") // WA for JS issue
312+
protected fun consumeString(source: CharSequence, startPosition: Int, current: Int): String {
369313
var currentPosition = current
370314
var lastPosition = startPosition
371-
var source = source
372315
var char = source[currentPosition] // Avoid two range checks visible in the profiler
373316
var usedAppend = false
374317
while (char != STRING) {
@@ -383,7 +326,6 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
383326
currentPosition = definitelyNotEof(currentPosition)
384327
if (currentPosition == -1)
385328
fail("EOF", currentPosition)
386-
source = this.source
387329
lastPosition = currentPosition
388330
}
389331
char = source[currentPosition]
@@ -424,7 +366,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
424366
return result
425367
}
426368

427-
// Allows to consume unquoted string
369+
// Allows consuming unquoted string
428370
fun consumeStringLenient(): String {
429371
if (peekedString != null) {
430372
return takePeeked()
@@ -445,11 +387,13 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
445387
if (current >= source.length) {
446388
usedAppend = true
447389
appendRange(currentPosition, current)
448-
current = definitelyNotEof(current)
449-
if (current == -1) {
390+
val eof = definitelyNotEof(current)
391+
if (eof == -1) {
450392
// to handle plain lenient strings, such as top-level
451393
currentPosition = current
452394
return decodedString(0, 0)
395+
} else {
396+
current = eof
453397
}
454398
}
455399
}
@@ -639,6 +583,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
639583
return result
640584
}
641585

586+
@JsName("consumeBoolean2") // WA for JS issue
642587
private fun consumeBoolean(start: Int): Boolean {
643588
/*
644589
* In ASCII representation, upper and lower case letters are different

0 commit comments

Comments
 (0)