Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite and restructure JSON parser #1343

Merged
merged 12 commits into from
Mar 12, 2021
Prev Previous commit
Next Next commit
Optimize boolean consumption (+40% on boolean stress benchmark)
  • Loading branch information
qwwdfsad committed Feb 17, 2021
commit 36d96524fd9720ea3c442c5b46694f4ea295a018
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@ private const val CTC_MAX = 0x7e
// mapping from escape chars real chars
private const val ESC2C_MAX = 0x75

/*
* In ASCII representation, upper and lower case letters are different
* in 6-th bit and we leverage this fact
*/
private const val asciiCaseMask = 1 shl 5

// object instead of @SharedImmutable because there is mutual initialization in [initC2ESC] and [initC2TC]
Expand Down Expand Up @@ -466,53 +462,66 @@ internal class JsonReader(private val source: String) {
}
}

// fun consumeBoolean(allowQuotation: Boolean): Boolean {
// skipWhitespaces()
// var current = currentPosition
//// var hasQuote = false
//// if (allowQuotation && source[current] == STRING) {
//// hasQuote = true
//// ++current
//// }
//
// // TODO handle EOF
// val result = when (source[current++].toInt() or asciiCaseMask) {
// 't'.toInt() -> {
// if (source.length - current < 3) fail("")
// val r = source[current + 0].toInt() or asciiCaseMask == 'r'.toInt()
// val u = source[current + 1].toInt() or asciiCaseMask == 'u'.toInt()
// val e = source[current + 2].toInt() or asciiCaseMask == 'e'.toInt()
// if (!(r and u and e)) fail("")
//
//// for ((i, c) in "rue".withIndex()) {
//// if (c.toInt() != source[current + i].toInt() or asciiCaseMask) {
//// fail("")
//// }
//// }
// currentPosition += 4
// true
// }
// 'f'.toInt() -> {
// if (source.length - current < 4) fail("")
// val a = source[current + 0].toInt() or asciiCaseMask == 'a'.toInt()
// val l = source[current + 1].toInt() or asciiCaseMask == 'l'.toInt()
// val s = source[current + 2].toInt() or asciiCaseMask == 's'.toInt()
// val e = source[current + 3].toInt() or asciiCaseMask == 'e'.toInt()
// if (!(a and l and s and e)) fail("")
//// for ((i, c) in "alse".withIndex()) {
//// if (c.toInt() != source[current + i].toInt() or asciiCaseMask) {
//// fail("")
//// }
//// }
// currentPosition += 5
// false
// }
// else -> TODO()
// }
//
//// if (hasQuote) {
////
//// }
// return result
// }
fun consumeBoolean(): Boolean {
return consumeBoolean(skipWhitespaces())
}

fun consumeBooleanLenient(): Boolean {
var current = skipWhitespaces()
if (current == source.length) fail("EOF")
val hasQuotation = if (source[current] == STRING) {
++current
true
} else {
false
}
val result = consumeBoolean(current)
if (hasQuotation) {
if (currentPosition == source.length) fail("EOF")
if (source[currentPosition] != STRING)
fail("Expected closing quotation mark")
++currentPosition
}
return result
}

private fun consumeBoolean(start: Int): Boolean {
/*
* In ASCII representation, upper and lower case letters are different
* in 6-th bit and we leverage this fact, our implementation consumes boolean literals
* in a case-insensitive manner.
*/
var current = start
if (current == source.length) fail("EOF")
return when (source[current++].toInt() or asciiCaseMask) {
't'.toInt() -> {
consumeBooleanLiteral("rue", current)
true
}
'f'.toInt() -> {
consumeBooleanLiteral("alse", current)
false
}
else -> {
fail("Expected valid boolean literal prefix, but had ${source[current - 1]}")
qwwdfsad marked this conversation as resolved.
Show resolved Hide resolved
}
}
}


private fun consumeBooleanLiteral(literalSuffix: String, current: Int) {
if (source.length - current < literalSuffix.length) {
fail("Unexpected end of boolean literal")
}

for (i in literalSuffix.indices) {
val expected = literalSuffix[i]
val actual = source[current + i]
if (expected.toInt() != actual.toInt() or asciiCaseMask) {
fail("Expected valid boolean literal prefix, but had ${source.substring(current - 1, current - 1 + i)}")
}
}

currentPosition = current + literalSuffix.length
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,11 @@ internal open class StreamingJsonDecoder(
* We prohibit non true/false boolean literals at all as it is considered way too error-prone,
* but allow quoted literal in relaxed mode for booleans.
*/
val string = if (configuration.isLenient) {
reader.consumeStringLenient()
return if (configuration.isLenient) {
reader.consumeBooleanLenient()
} else {
// TODO _SHOULD_ be ONLY unquoted
reader.consumeStringLenient()
return reader.consumeBoolean()
qwwdfsad marked this conversation as resolved.
Show resolved Hide resolved
}
string.toBooleanStrictOrNull()?.let { return it }
reader.fail("Failed to parse type 'boolean' for input '$string'")
}

/*
Expand Down