Skip to content

Commit

Permalink
Merge pull request #225 from Chuckame/improve-benchmark
Browse files Browse the repository at this point in the history
Improve benchmark
  • Loading branch information
Chuckame authored Jun 25, 2024
2 parents 25f3f38 + 948643e commit 23814d2
Show file tree
Hide file tree
Showing 33 changed files with 1,128 additions and 477 deletions.
54 changes: 29 additions & 25 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,43 @@ This project contains a benchmark that compares the serialization / deserializat

- [Avro4k](https://github.com/avro-kotlin/avro4k/)
- [Jackson Avro](https://github.com/FasterXML/jackson-dataformats-binary/tree/master/avro)
- Coming soon: [Avro](https://avro.apache.org/)
- [Avro (using ReflectData)](https://avro.apache.org/)

Each benchmark is executed with the following configuration:
- Reading from a prepared byte array
- Writing to a null output stream
- All with the exact same schema generated by avro4k
- Generating a maximum of use cases:
- unions (including nullable fields)
- arrays
- records
- enums
- primitives (string, int, float, double, boolean)
- logical types (date, timestamp-millis, char, uuid)
- not benchmarking uuid as jackson uses a different representation (fixed) than avro4k and apache avro (string)

## Results

<details>
<summary>Macbook air M2 - without direct encoding</summary>
Computer: Macbook air M2

```
Benchmark Mode Cnt Score Error Units
Avro4kClientsBenchmark.read thrpt 2 439983.130 ops/s
Avro4kClientsBenchmark.write thrpt 2 474453.236 ops/s
JacksonAvroClientsBenchmark.read thrpt 2 577757.798 ops/s
JacksonAvroClientsBenchmark.write thrpt 2 649982.820 ops/s
Benchmark Mode Cnt Score Error Units Relative Difference (%)
Avro4kBenchmark.read thrpt 5 20537.185 ± 135.318 ops/s 0.00%
ApacheAvroReflectBenchmark.read thrpt 5 20059.982 ± 241.854 ops/s -2.32%
Avro4kGenericWithApacheAvroBenchmark.read thrpt 5 7591.527 ± 172.173 ops/s -63.03%
Avro4kBenchmark.write thrpt 5 41215.703 ± 1274.692 ops/s 0.00%
ApacheAvroReflectBenchmark.write thrpt 5 37188.260 ± 115.447 ops/s -9.74%
JacksonAvroBenchmark.write thrpt 5 30757.363 ± 1557.034 ops/s -25.39%
Avro4kGenericWithApacheAvroBenchmark.write thrpt 5 21305.149 ± 830.640 ops/s -48.33%
```

For the moment, Jackson Avro is faster than Avro4k because Avro4k is still not doing direct encoding so there is an intermediate generic data step.
> [!WARNING]
> JacksonAvroBenchmark.read is failing because of a bug in the library when combining kotlin and avro format.
</details>

<br>

<details>
<summary>Macbook air M2 - with direct encoding but without direct decoding</summary>

```
Benchmark Mode Cnt Score Error Units
Avro4kClientsBenchmark.read thrpt 2 471489.689 ops/s
Avro4kClientsBenchmark.write thrpt 2 686791.337 ops/s
JacksonAvroClientsBenchmark.read thrpt 2 513425.052 ops/s
JacksonAvroClientsBenchmark.write thrpt 2 627412.940 ops/s
```

</details>
> [!NOTE]
> To add the relative difference, just ask to chatgpt "can you add another column in this benchmark that indicates the relative difference in percent regarding
> Avro4kDirectBenchmark:"
## Run the benchmark locally

Expand Down
19 changes: 16 additions & 3 deletions benchmark/api/benchmark.api
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
public abstract interface class com/github/avrokotlin/benchmark/Partner {
public static final field Companion Lcom/github/avrokotlin/benchmark/Partner$Companion;
public final class com/github/avrokotlin/benchmark/internal/CharJacksonDeserializer : com/fasterxml/jackson/databind/deser/std/StdDeserializer {
public fun <init> ()V
public fun deserialize (Lcom/fasterxml/jackson/core/JsonParser;Lcom/fasterxml/jackson/databind/DeserializationContext;)Ljava/lang/Character;
public synthetic fun deserialize (Lcom/fasterxml/jackson/core/JsonParser;Lcom/fasterxml/jackson/databind/DeserializationContext;)Ljava/lang/Object;
}

public final class com/github/avrokotlin/benchmark/Partner$Companion {
public final class com/github/avrokotlin/benchmark/internal/CharJacksonSerializer : com/fasterxml/jackson/databind/ser/std/StdSerializer {
public fun <init> ()V
public fun acceptJsonFormatVisitor (Lcom/fasterxml/jackson/databind/jsonFormatVisitors/JsonFormatVisitorWrapper;Lcom/fasterxml/jackson/databind/JavaType;)V
public fun serialize (Ljava/lang/Character;Lcom/fasterxml/jackson/core/JsonGenerator;Lcom/fasterxml/jackson/databind/SerializerProvider;)V
public synthetic fun serialize (Ljava/lang/Object;Lcom/fasterxml/jackson/core/JsonGenerator;Lcom/fasterxml/jackson/databind/SerializerProvider;)V
}

public abstract interface class com/github/avrokotlin/benchmark/internal/Partner {
public static final field Companion Lcom/github/avrokotlin/benchmark/internal/Partner$Companion;
}

public final class com/github/avrokotlin/benchmark/internal/Partner$Companion {
public final fun serializer ()Lkotlinx/serialization/KSerializer;
}

7 changes: 6 additions & 1 deletion benchmark/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@ plugins {
id("org.jetbrains.kotlinx.benchmark") version "0.4.11"
kotlin("plugin.allopen") version libs.versions.kotlin
kotlin("plugin.serialization") version libs.versions.kotlin
kotlin("plugin.noarg") version libs.versions.kotlin
}

allOpen {
annotation("org.openjdk.jmh.annotations.State")
}

noArg {
annotation("kotlinx.serialization.Serializable")
}

benchmark {
configurations {
named("main") {
Expand All @@ -30,7 +35,7 @@ dependencies {
implementation("org.apache.commons:commons-lang3:3.14.0")
implementation("org.jetbrains.kotlinx:kotlinx-benchmark-runtime:0.4.11")

val jacksonVersion = "2.17.0"
val jacksonVersion = "2.17.1"
implementation("com.fasterxml.jackson.module:jackson-module-kotlin:$jacksonVersion")
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:$jacksonVersion")
implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-avro:$jacksonVersion")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package com.github.avrokotlin.benchmark

import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.encodeToByteArray
import com.github.avrokotlin.benchmark.internal.Clients
import kotlinx.benchmark.Benchmark
import org.apache.avro.Conversions
import org.apache.avro.data.TimeConversions
import org.apache.avro.io.DatumReader
import org.apache.avro.io.DatumWriter
import org.apache.avro.io.DecoderFactory
import org.apache.avro.io.Encoder
import org.apache.avro.io.EncoderFactory
import org.apache.avro.reflect.ReflectData
import java.io.ByteArrayInputStream
import java.io.OutputStream

internal class ApacheAvroReflectBenchmark : SerializationBenchmark() {
lateinit var writer: DatumWriter<Clients>
lateinit var encoder: Encoder
lateinit var reader: DatumReader<Clients>

lateinit var data: ByteArray
var writeMode = false

override fun setup() {
ReflectData.get().addLogicalTypeConversion(Conversions.UUIDConversion())
ReflectData.get().addLogicalTypeConversion(Conversions.DecimalConversion())
ReflectData.get().addLogicalTypeConversion(TimeConversions.DateConversion())
ReflectData.get().addLogicalTypeConversion(TimeConversions.TimestampMillisConversion())

writer = ReflectData.get().createDatumWriter(schema) as DatumWriter<Clients>
encoder = EncoderFactory.get().directBinaryEncoder(OutputStream.nullOutputStream(), null)

reader = ReflectData.get().createDatumReader(schema) as DatumReader<Clients>
}

override fun prepareBinaryData() {
data = Avro.encodeToByteArray(schema, clients)
}

@Benchmark
fun read() {
if (writeMode) writeMode = false
val decoder = DecoderFactory.get().directBinaryDecoder(ByteArrayInputStream(data), null)
reader.read(null, decoder)
}

@Benchmark
fun write() {
if (!writeMode) writeMode = true
writer.write(clients, encoder)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package com.github.avrokotlin.benchmark

import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.decodeFromByteArray
import com.github.avrokotlin.avro4k.encodeToByteArray
import com.github.avrokotlin.avro4k.encodeToStream
import com.github.avrokotlin.benchmark.internal.Clients
import kotlinx.benchmark.Benchmark
import kotlinx.serialization.ExperimentalSerializationApi
import java.io.OutputStream

internal class Avro4kBenchmark : SerializationBenchmark() {
lateinit var data: ByteArray
var writeMode = false

override fun setup() {
}

override fun prepareBinaryData() {
data = Avro.encodeToByteArray(schema, clients)
}

@Benchmark
fun read() {
if (writeMode) writeMode = false
Avro.decodeFromByteArray<Clients>(schema, data)
}

@OptIn(ExperimentalSerializationApi::class)
@Benchmark
fun write() {
if (!writeMode) writeMode = true
Avro.encodeToStream(schema, clients, OutputStream.nullOutputStream())
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package com.github.avrokotlin.benchmark

import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.decodeFromGenericData
import com.github.avrokotlin.avro4k.encodeToByteArray
import com.github.avrokotlin.avro4k.encodeToGenericData
import com.github.avrokotlin.benchmark.internal.Clients
import kotlinx.benchmark.Benchmark
import kotlinx.serialization.ExperimentalSerializationApi
import org.apache.avro.Conversions
import org.apache.avro.generic.GenericData
import org.apache.avro.io.DatumReader
import org.apache.avro.io.DatumWriter
import org.apache.avro.io.DecoderFactory
import org.apache.avro.io.Encoder
import org.apache.avro.io.EncoderFactory
import java.io.ByteArrayInputStream
import java.io.OutputStream

internal class Avro4kGenericWithApacheAvroBenchmark : SerializationBenchmark() {
lateinit var writer: DatumWriter<Any?>
lateinit var encoder: Encoder
lateinit var reader: DatumReader<Any?>

lateinit var data: ByteArray
var writeMode = false

override fun setup() {
GenericData.get().addLogicalTypeConversion(Conversions.DecimalConversion())
// GenericData.get().addLogicalTypeConversion(TimeConversions.DateConversion())
// GenericData.get().addLogicalTypeConversion(TimeConversions.TimestampMillisConversion())

writer = GenericData.get().createDatumWriter(schema) as DatumWriter<Any?>
encoder = EncoderFactory.get().directBinaryEncoder(OutputStream.nullOutputStream(), null)

reader = GenericData.get().createDatumReader(schema) as DatumReader<Any?>
}

override fun prepareBinaryData() {
data = Avro.encodeToByteArray(schema, clients)
}

@OptIn(ExperimentalSerializationApi::class)
@Benchmark
fun read() {
if (writeMode) writeMode = false
val decoder = DecoderFactory.get().directBinaryDecoder(ByteArrayInputStream(data), null)
val genericData = reader.read(null, decoder)
Avro { validateSerialization = true }.decodeFromGenericData<Clients>(schema, genericData)
}

@OptIn(ExperimentalSerializationApi::class)
@Benchmark
fun write() {
if (!writeMode) writeMode = true
val genericData = Avro.encodeToGenericData(schema, clients)
writer.write(genericData, encoder)
}
}
Loading

0 comments on commit 23814d2

Please sign in to comment.