Skip to content

Commit

Permalink
handle nullable bytearrays and add null values in benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
Chuckame committed Jul 5, 2024
1 parent 83544bd commit 228fcd1
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 156 deletions.
21 changes: 11 additions & 10 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ Each benchmark is executed with the following configuration:
- Writing to a null output stream
- All with the exact same schema generated by avro4k
- Generating a maximum of use cases:
- unions (including nullable fields)
- nullable fields
- unions
- arrays
- records
- enums
Expand All @@ -24,15 +25,15 @@ Each benchmark is executed with the following configuration:
Computer: Macbook air M2

```
Benchmark Mode Cnt Score Error Units Relative Difference (%)
Avro4kBenchmark.read thrpt 5 20537.185 ± 135.318 ops/s 0.00%
ApacheAvroReflectBenchmark.read thrpt 5 20059.982 ± 241.854 ops/s -2.32%
Avro4kGenericWithApacheAvroBenchmark.read thrpt 5 7591.527 ± 172.173 ops/s -63.03%
Avro4kBenchmark.write thrpt 5 41215.703 ± 1274.692 ops/s 0.00%
ApacheAvroReflectBenchmark.write thrpt 5 37188.260 ± 115.447 ops/s -9.74%
JacksonAvroBenchmark.write thrpt 5 30757.363 ± 1557.034 ops/s -25.39%
Avro4kGenericWithApacheAvroBenchmark.write thrpt 5 21305.149 ± 830.640 ops/s -48.33%
Benchmark Mode Cnt Score Error Units Relative Difference (%)
Avro4kBenchmark.read thrpt 5 21443.935 ± 2215.328 ops/s 0.00%
ApacheAvroReflectBenchmark.read thrpt 5 19803.543 ± 485.869 ops/s -7.64%
Avro4kGenericWithApacheAvroBenchmark.read thrpt 5 8836.787 ± 404.874 ops/s -58.79%
Avro4kBenchmark.write thrpt 5 50565.556 ± 849.344 ops/s 0.00%
ApacheAvroReflectBenchmark.write thrpt 5 46872.768 ± 2406.622 ops/s -7.30%
JacksonAvroBenchmark.write thrpt 5 32349.182 ± 10105.111 ops/s -36.01%
Avro4kGenericWithApacheAvroBenchmark.write thrpt 5 27471.887 ± 315.498 ops/s -45.67%
```

> [!WARNING]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ internal object ManualProfilingWrite {
fun main(vararg args: String) {
Avro4kBenchmark().apply {
initTestData()
for (i in 0 until 100_000) {
for (i in 0 until 1_000_000) {
if (i % 1_000 == 0) println("Iteration $i")
write()
}
Expand All @@ -17,7 +17,7 @@ internal object ManualProfilingRead {
fun main(vararg args: String) {
Avro4kBenchmark().apply {
initTestData()
for (i in 0 until 100_000) {
for (i in 0 until 1_000_000) {
if (i % 1_000 == 0) println("Iteration $i")
read()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,31 @@ class CharJacksonDeserializer : StdDeserializer<Char>(Char::class.java) {

@Serializable
internal data class Client(
val id: Long = 0,
val index: Int = 0,
val isActive: Boolean = false,
val id: Long,
val index: Int,
val isActive: Boolean,
@Serializable(with = BigDecimalAsStringSerializer::class)
@JsonFormat(shape = JsonFormat.Shape.STRING)
val balance: BigDecimal? = null,
val picture: ByteArray? = null,
val age: Int = 0,
val eyeColor: EyeColor? = null,
val name: String? = null,
val balance: BigDecimal?,
val picture: ByteArray?,
val age: Int,
val eyeColor: EyeColor?,
val name: String?,
@JsonSerialize(using = CharJacksonSerializer::class)
@JsonDeserialize(using = CharJacksonDeserializer::class)
val gender: Char? = null,
val company: String? = null,
val emails: Array<String> = emptyArray(),
val phones: LongArray = LongArray(0),
val address: String? = null,
val about: String? = null,
val gender: Char?,
val company: String?,
val emails: Array<String>,
val phones: LongArray,
val address: String?,
val about: String?,
@Serializable(with = LocalDateSerializer::class)
val registered: LocalDate? = null,
val latitude: Double = 0.0,
val longitude: Float = 0.0f,
val tags: List<String?> = emptyList(),
val partner: Partner,
val map: Map<String, String> = emptyMap(),
val registered: LocalDate?,
val latitude: Double,
val longitude: Float,
val tags: List<String?>,
val partner: Partner?,
val map: Map<String, String>,
)

@Serializable
Expand All @@ -80,18 +80,18 @@ sealed interface Partner

@Serializable
internal class GoodPartner(
val id: Long = 0,
val name: String? = null,
val id: Long,
val name: String,
@Serializable(with = InstantSerializer::class)
val since: Instant? = null
val since: Instant
) : Partner

@Serializable
internal class BadPartner(
val id: Long = 0,
val name: String? = null,
val id: Long,
val name: String,
@Serializable(with = InstantSerializer::class)
val since: Instant? = null
val since: Instant
) : Partner

@Serializable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,50 @@ import kotlin.math.absoluteValue

internal object ClientsGenerator {
fun generate(size: Int) = Clients(
clients = buildList {
repeat(size) {
add(newClient())
buildList {
repeat(size) { index ->
add(Client(
partner = when (RandomUtils.nextInt(4)) {
0 -> GoodPartner(RandomUtils.nextLong(), RandomUtils.randomAlphabetic(30), Instant.ofEpochMilli(RandomUtils.nextLong()))
1 -> BadPartner(RandomUtils.nextLong(), RandomUtils.randomAlphabetic(30), Instant.ofEpochMilli(RandomUtils.nextLong().absoluteValue))
2 -> if (RandomUtils.nextBoolean()) Stranger.KNOWN_STRANGER else Stranger.UNKNOWN_STRANGER
3 -> null
else -> throw IllegalStateException("Unexpected value")
},
id = RandomUtils.nextLong().absoluteValue,
index = RandomUtils.nextInt(0, Int.MAX_VALUE),
isActive = RandomUtils.nextBoolean(),
balance = if (index % 2 == 0) null else RandomUtils.randomBigDecimal(),
picture = if (index % 3 == 0) null else RandomUtils.randomBytes(4048),
age = RandomUtils.nextInt(0, 100),
eyeColor = if (index % 2 == 0) null else EyeColor.entries[RandomUtils.nextInt(3)],
name = if (index % 2 == 0) null else RandomUtils.randomAlphanumeric(20),
gender = if (index % 2 == 0) null else if (RandomUtils.nextBoolean()) 'M' else 'F',
company = if (index % 2 == 0) null else RandomUtils.randomAlphanumeric(20),
emails = RandomUtils.stringArray(RandomUtils.nextInt(5, 10), 10),
phones = RandomUtils.longArray(RandomUtils.nextInt(5, 10)),
address = if (index % 2 == 0) null else RandomUtils.randomAlphanumeric(20),
about = RandomUtils.randomAlphanumeric(20),
registered = if (index % 3 == 0) null else
LocalDate.of(
1900 + RandomUtils.nextInt(110),
1 + RandomUtils.nextInt(12),
1 + RandomUtils.nextInt(28)
),
latitude = RandomUtils.nextDouble(0.0, 90.0),
longitude = RandomUtils.nextFloat(0.0f, 180.0f),
tags = buildList {
repeat(RandomUtils.nextInt(5, 25)) {
add(if (it % 2 == 0) null else RandomUtils.randomAlphanumeric(10))
}
},
map = buildMap {
repeat(10) {
put(RandomUtils.randomAlphanumeric(10), RandomUtils.randomAlphanumeric(10))
}
}
))
}
}
)

private fun newClient(): Client {
val u = Client(
partner = when (RandomUtils.nextInt(3)) {
0 -> GoodPartner(RandomUtils.nextLong(), RandomUtils.randomAlphabetic(30), Instant.ofEpochMilli(RandomUtils.nextLong()))
1 -> BadPartner(RandomUtils.nextLong(), RandomUtils.randomAlphabetic(30), Instant.ofEpochMilli(RandomUtils.nextLong().absoluteValue))
2 -> if (RandomUtils.nextBoolean()) Stranger.KNOWN_STRANGER else Stranger.UNKNOWN_STRANGER
else -> throw IllegalStateException("Unexpected value")
},
id = RandomUtils.nextLong().absoluteValue,
index = RandomUtils.nextInt(0, Int.MAX_VALUE),
isActive = RandomUtils.nextBoolean(),
balance = RandomUtils.randomBigDecimal(),
picture = RandomUtils.randomBytes(4048),
age = RandomUtils.nextInt(0, 100),
eyeColor = EyeColor.entries[RandomUtils.nextInt(3)],
name = RandomUtils.randomAlphanumeric(20),
gender = if (RandomUtils.nextBoolean()) 'M' else 'F',
company = RandomUtils.randomAlphanumeric(20),
emails = RandomUtils.stringArray(RandomUtils.nextInt(5, 10), 10),
phones = RandomUtils.longArray(RandomUtils.nextInt(5, 10)),
address = RandomUtils.randomAlphanumeric(20),
about = RandomUtils.randomAlphanumeric(20),
registered =
LocalDate.of(
1900 + RandomUtils.nextInt(110),
1 + RandomUtils.nextInt(12),
1 + RandomUtils.nextInt(28)
),
latitude = RandomUtils.nextDouble(0.0, 90.0),
longitude = RandomUtils.nextFloat(0.0f, 180.0f),
tags = buildList {
repeat(RandomUtils.nextInt(5, 25)) {
add(RandomUtils.randomAlphanumeric(10))
}
},
map = buildMap {
repeat(10) {
put(RandomUtils.randomAlphanumeric(10), RandomUtils.randomAlphanumeric(10))
}
}
)
return u
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import com.github.avrokotlin.avro4k.decodeResolvingInt
import com.github.avrokotlin.avro4k.decodeResolvingLong
import com.github.avrokotlin.avro4k.internal.UnexpectedDecodeSchemaError
import com.github.avrokotlin.avro4k.internal.decoder.AbstractPolymorphicDecoder
import com.github.avrokotlin.avro4k.internal.decoder.direct.AbstractAvroDirectDecoder.SizeGetter
import com.github.avrokotlin.avro4k.internal.getElementIndexNullable
import com.github.avrokotlin.avro4k.internal.isFullNameOrAliasMatch
import com.github.avrokotlin.avro4k.internal.nonNullSerialName
Expand Down Expand Up @@ -46,7 +45,7 @@ internal abstract class AbstractAvroDirectDecoder(
protected val binaryDecoder: org.apache.avro.io.Decoder,
) : AbstractInterceptingDecoder(), UnionDecoder {
abstract override var currentWriterSchema: Schema
private var currentlyReadingCollection: Boolean = false
private var previousCollectionSize = -1

override val serializersModule: SerializersModule
get() = avro.serializersModule
Expand All @@ -64,60 +63,21 @@ internal abstract class AbstractAvroDirectDecoder(
return decodeBytes() as T
}

if (deserializer is AbstractCollectionSerializer<*, T, *> && currentWriterSchema.isCollectionOrMap()) {
var result: T = deserializer.merge(this, null)
with(result.collectionSizeGetter()) {
if (result.collectionSize() > 0) {
currentlyReadingCollection = true
var prevSize = result.collectionSize()
while (true) {
result = deserializer.merge(this@AbstractAvroDirectDecoder, result)
val newSize = result.collectionSize()
if (prevSize == newSize) {
break
}
prevSize = newSize
}
currentlyReadingCollection = false
}
}
return result
if (deserializer is AbstractCollectionSerializer<*, T, *>) {
return decodeCollectionLike(deserializer)
}

return super<AbstractInterceptingDecoder>.decodeSerializableValue(deserializer)
}

private fun Schema.isCollectionOrMap(): Boolean {
if (isUnion) {
this.types.forEach {
if (it.isCollectionOrMap()) {
return true
}
}
return false
}
return this.type == Schema.Type.ARRAY || this.type == Schema.Type.MAP
}

fun interface SizeGetter<T> {
fun T.collectionSize(): Int
}

private fun <T> T.collectionSizeGetter(): SizeGetter<T> {
return when (this) {
is Collection<*> -> SizeGetter { size }
is Map<*, *> -> SizeGetter { size }
is Array<*> -> SizeGetter { size }
is BooleanArray -> SizeGetter { size }
is ByteArray -> SizeGetter { size }
is ShortArray -> SizeGetter { size }
is IntArray -> SizeGetter { size }
is LongArray -> SizeGetter { size }
is FloatArray -> SizeGetter { size }
is DoubleArray -> SizeGetter { size }
is CharArray -> SizeGetter { size }
else -> throw SerializationException("Unsupported collection type: ${this?.let { it::class }}")
}
@OptIn(InternalSerializationApi::class)
private fun <T> decodeCollectionLike(deserializer: AbstractCollectionSerializer<*, T, *>): T {
var result: T? = null
do {
result = deserializer.merge(this, result)
} while (previousCollectionSize > 0)
previousCollectionSize = -1
return result!!
}

override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder {
Expand All @@ -132,7 +92,7 @@ internal abstract class AbstractAvroDirectDecoder(
}) {
when (it.type) {
Schema.Type.ARRAY -> {
AnyValueDecoder { ArrayBlockDirectDecoder(it, decodeFirstBlock = !currentlyReadingCollection, avro, binaryDecoder) }
AnyValueDecoder { ArrayBlockDirectDecoder(it, decodeFirstBlock = previousCollectionSize == -1, { previousCollectionSize = it }, avro, binaryDecoder) }
}

Schema.Type.BYTES -> {
Expand All @@ -152,7 +112,7 @@ internal abstract class AbstractAvroDirectDecoder(
}) {
when (it.type) {
Schema.Type.MAP -> {
AnyValueDecoder { MapBlockDirectDecoder(it, decodeFirstBlock = !currentlyReadingCollection, avro, binaryDecoder) }
AnyValueDecoder { MapBlockDirectDecoder(it, decodeFirstBlock = previousCollectionSize == -1, { previousCollectionSize = it }, avro, binaryDecoder) }
}

else -> null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ internal class BytesDirectDecoder(
internal class ArrayBlockDirectDecoder(
private val arraySchema: Schema,
private val decodeFirstBlock: Boolean,
private val onCollectionSizeDecoded: (Int) -> Unit,
avro: Avro,
binaryDecoder: org.apache.avro.io.Decoder,
) : AbstractAvroDirectDecoder(avro, binaryDecoder) {
Expand All @@ -44,7 +45,7 @@ internal class ArrayBlockDirectDecoder(
binaryDecoder.readArrayStart().toInt()
} else {
binaryDecoder.arrayNext().toInt()
}
}.also { onCollectionSizeDecoded(it) }
}

override fun beginElement(
Expand All @@ -65,6 +66,7 @@ internal class ArrayBlockDirectDecoder(
internal class MapBlockDirectDecoder(
private val mapSchema: Schema,
private val decodeFirstBlock: Boolean,
private val onCollectionSizeDecoded: (Int) -> Unit,
avro: Avro,
binaryDecoder: org.apache.avro.io.Decoder,
) : AbstractAvroDirectDecoder(avro, binaryDecoder) {
Expand All @@ -75,7 +77,7 @@ internal class MapBlockDirectDecoder(
binaryDecoder.readMapStart().toInt()
} else {
binaryDecoder.mapNext().toInt()
}
}.also { onCollectionSizeDecoded(it) }
}

override fun beginElement(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ internal abstract class AbstractAvroGenericDecoder : AbstractDecoder(), AvroDeco
get() = avro.serializersModule

override fun <T> decodeSerializableValue(deserializer: DeserializationStrategy<T>): T {
if (deserializer.descriptor == ByteArraySerializer().descriptor) {
if (deserializer == ByteArraySerializer()) {
// fast-path for ByteArray fields, to avoid slow-path with ArrayGenericDecoder
@Suppress("UNCHECKED_CAST")
return decodeBytes() as T
Expand Down
Loading

0 comments on commit 228fcd1

Please sign in to comment.