Skip to content

Commit

Permalink
feat: revamp the schema generation
Browse files Browse the repository at this point in the history
  • Loading branch information
Chuckame committed Apr 15, 2024
1 parent 8aa48f3 commit c9034e4
Show file tree
Hide file tree
Showing 41 changed files with 1,252 additions and 710 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- name: Checkout the repo
uses: actions/checkout@v4
- name: Setup Gradle
uses: gradle/gradle-build-action@v3
uses: gradle/actions/setup-gradle@v3

- name: Run tests
run: ./gradlew check
Expand All @@ -38,7 +38,7 @@ jobs:
- name: Checkout the repo
uses: actions/checkout@v4
- name: Setup Gradle
uses: gradle/gradle-build-action@v3
uses: gradle/actions/setup-gradle@v3

- name: deploy to sonatype snapshots
run: ./gradlew publish
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Checkout the repo
uses: actions/checkout@v4
- name: Setup Gradle
uses: gradle/gradle-build-action@v3
uses: gradle/actions/setup-gradle@v3

- name: Run tests
run: ./gradlew check
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: Checkout the repo
uses: actions/checkout@v4
- name: Setup Gradle
uses: gradle/gradle-build-action@v3
uses: gradle/actions/setup-gradle@v3

- name: publish release
run: ./gradlew publishToSonatype closeAndReleaseSonatypeStagingRepository
Expand Down
7 changes: 4 additions & 3 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies {
api(libs.kotlinx.serialization.core)
implementation(libs.kotlinx.serialization.json)
implementation(libs.xerial.snappy)
implementation(kotlin("reflect"))
testImplementation(libs.kotest.junit5)
testImplementation(libs.kotest.core)
testImplementation(libs.kotest.json)
Expand All @@ -44,9 +45,9 @@ dependencies {

tasks.withType<KotlinCompile>().configureEach {
kotlinOptions.jvmTarget = "1.8"
kotlinOptions.apiVersion = "1.6"
kotlinOptions.languageVersion = "1.6"
kotlinOptions.freeCompilerArgs += "-opt-in=kotlin.RequiresOptIn"
kotlinOptions.apiVersion = "1.8"
kotlinOptions.languageVersion = "1.8"
kotlinOptions.freeCompilerArgs += listOf("-opt-in=kotlinx.serialization.ExperimentalSerializationApi", "-Xcontext-receivers")
}
java {
sourceCompatibility = JavaVersion.VERSION_1_8
Expand Down
32 changes: 20 additions & 12 deletions src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ import com.github.avrokotlin.avro4k.io.AvroEncodeFormat
import com.github.avrokotlin.avro4k.io.AvroFormat
import com.github.avrokotlin.avro4k.io.AvroInputStream
import com.github.avrokotlin.avro4k.io.AvroOutputStream
import com.github.avrokotlin.avro4k.schema.schemaFor
import com.github.avrokotlin.avro4k.schema.AvroSerialDescriptorValueVisitor
import com.github.avrokotlin.avro4k.serializer.BigDecimalAsStringSerializer
import com.github.avrokotlin.avro4k.serializer.BigIntegerSerializer
import com.github.avrokotlin.avro4k.serializer.URLSerializer
import com.github.avrokotlin.avro4k.serializer.UUIDSerializer
import kotlinx.serialization.BinaryFormat
import kotlinx.serialization.DeserializationStrategy
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerialFormat
import kotlinx.serialization.SerializationStrategy
import kotlinx.serialization.descriptors.SerialDescriptor
Expand All @@ -31,6 +33,7 @@ import java.nio.ByteBuffer
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.Paths
import java.util.concurrent.ConcurrentHashMap

open class AvroInputStreamBuilder<T>(
private val converter: (Any) -> T,
Expand Down Expand Up @@ -60,10 +63,12 @@ open class AvroInputStreamBuilder<T>(
val wschema = writerSchema ?: error("Writer schema needs to be supplied for Json format")
AvroDecodeFormat.Json(wschema, readerSchema ?: wschema)
}

is AvroFormat.BinaryFormat -> {
val wschema = writerSchema ?: error("Writer schema needs to be supplied for Binary format")
AvroDecodeFormat.Binary(wschema, readerSchema ?: wschema)
}

is AvroFormat.DataFormat -> AvroDecodeFormat.Data(writerSchema, readerSchema)
}
}
Expand Down Expand Up @@ -152,11 +157,12 @@ class AvroOutputStreamBuilder<T>(
}
}

@OptIn(ExperimentalSerializationApi::class)
class Avro internal constructor(
internal val configuration: AvroInternalConfiguration,
override val serializersModule: SerializersModule,
) : SerialFormat, BinaryFormat {
internal val schemaCache: MutableMap<SerialDescriptor, Schema> = ConcurrentHashMap()

constructor(
serializersModule: SerializersModule = defaultModule,
configuration: AvroConfiguration = AvroConfiguration(),
Expand All @@ -167,7 +173,10 @@ class Avro internal constructor(
companion object {
val defaultModule =
SerializersModule {
contextual(UUIDSerializer())
contextual(UUIDSerializer)
contextual(BigDecimalAsStringSerializer)
contextual(BigIntegerSerializer)
contextual(URLSerializer)
}
val default = Avro(defaultModule)

Expand Down Expand Up @@ -303,14 +312,13 @@ class Avro internal constructor(
)
}

fun schema(descriptor: SerialDescriptor): Schema =
schemaFor(
serializersModule,
descriptor,
descriptor.annotations,
configuration,
mutableMapOf()
).schema()
fun schema(descriptor: SerialDescriptor): Schema {
return schemaCache.getOrPut(descriptor) {
lateinit var output: Schema
AvroSerialDescriptorValueVisitor(this) { output = it }.visitValue(descriptor)
return output
}
}

fun <T> schema(serializer: SerializationStrategy<T>): Schema {
return schema(serializer.descriptor)
Expand Down
122 changes: 68 additions & 54 deletions src/main/kotlin/com/github/avrokotlin/avro4k/annotations.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,43 @@

package com.github.avrokotlin.avro4k

import com.github.avrokotlin.avro4k.schema.ClassElement
import com.github.avrokotlin.avro4k.serializer.BigDecimalSerializer
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerialInfo
import kotlinx.serialization.descriptors.PrimitiveKind
import org.apache.avro.LogicalTypes
import org.apache.avro.LogicalType
import org.apache.avro.Schema
import org.apache.avro.SchemaBuilder
import org.intellij.lang.annotations.Language
import kotlin.reflect.KClass

/**
* When annotated on a property, overrides the namespace for the nested record.
* When annotated on a property, deeply overrides the namespace for all the nested named types (records, enums and fixed).
*
* Works with standard classes and inline classes.
*/
@SerialInfo
@Target(AnnotationTarget.PROPERTY)
annotation class AvroNamespaceOverride(val value: String)
annotation class AvroNamespaceOverride(
val value: String,
)

/**
* Adds a property to the Avro schema or field.
*
* Ignored in inline classes.
*/
@SerialInfo
@Repeatable
@Target(AnnotationTarget.PROPERTY, AnnotationTarget.CLASS)
annotation class AvroProp(val key: String, val value: String)

/**
* Adds a json property to the Avro schema or field.
*
* Ignored in inline classes.
*/
@SerialInfo
@Repeatable
@Target(AnnotationTarget.PROPERTY, AnnotationTarget.CLASS)
annotation class AvroJsonProp(
val key: String,
Expand All @@ -31,67 +47,33 @@ annotation class AvroJsonProp(

/**
* To be used with [BigDecimalSerializer] to specify the scale, precision, type and rounding mode of the decimal value.
*
* Can be used with [AvroFixed] to serialize value as a fixed type.
*/
@SerialInfo
@Target(AnnotationTarget.PROPERTY)
annotation class AvroDecimalLogicalType(
annotation class AvroDecimal(
val scale: Int = 2,
val precision: Int = 8,
val schema: LogicalDecimalTypeEnum = LogicalDecimalTypeEnum.BYTES,
)

enum class LogicalDecimalTypeEnum {
BYTES,
STRING,

/**
* Fixed requires the field annotated with [AvroFixed]
*/
FIXED,
}

@SerialInfo
@Target(AnnotationTarget.PROPERTY)
annotation class AvroUuidLogicalType

@SerialInfo
@Target(AnnotationTarget.PROPERTY)
annotation class AvroTimeLogicalType(val type: LogicalTimeTypeEnum)

enum class LogicalTimeTypeEnum(val kind: PrimitiveKind, val schemaFor: () -> Schema) {
DATE(PrimitiveKind.INT, { LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType()) }),
TIME_MILLIS(
PrimitiveKind.INT,
{ LogicalTypes.timeMillis().addToSchema(SchemaBuilder.builder().intType()) }
),
TIME_MICROS(
PrimitiveKind.LONG,
{ LogicalTypes.timeMicros().addToSchema(SchemaBuilder.builder().longType()) }
),
TIMESTAMP_MILLIS(
PrimitiveKind.LONG,
{ LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder().longType()) }
),
TIMESTAMP_MICROS(
PrimitiveKind.LONG,
{ LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder().longType()) }
),
LOCAL_TIMESTAMP_MILLIS(
PrimitiveKind.LONG,
{ LogicalTypes.localTimestampMillis().addToSchema(SchemaBuilder.builder().longType()) }
),
LOCAL_TIMESTAMP_MICROS(
PrimitiveKind.LONG,
{ LogicalTypes.localTimestampMicros().addToSchema(SchemaBuilder.builder().longType()) }
),
}

/**
* Adds documentation to:
* - a record's field
* - a record
* - an enum
*
* Ignored in inline classes.
*/
@SerialInfo
@Target(AnnotationTarget.PROPERTY, AnnotationTarget.CLASS)
annotation class AvroDoc(val value: String)

/**
* Adds aliases to a field of a record. It helps to allow having different names for the same field for better compatibility when changing a schema.
*
* Ignored in inline classes.
*
* @param value The aliases for the annotated property. Note that the given aliases won't be changed by the configured [AvroConfiguration.fieldNamingStrategy].
*/
@SerialInfo
Expand All @@ -106,12 +88,44 @@ annotation class AvroAlias(vararg val value: String)
@Target(AnnotationTarget.PROPERTY)
annotation class AvroFixed(val size: Int)

/**
* Sets the default avro value for a record's field.
*
* Ignored in inline classes.
*/
@SerialInfo
@Target(AnnotationTarget.PROPERTY)
annotation class AvroDefault(
@Language("JSON") val value: String,
)

/**
* This annotation indicates that the annotated enum class should be serialized as an Avro enum with the given default value.
*
* It must be annotated on an enum class. Otherwise, it will be ignored.
*/
@SerialInfo
@Target(AnnotationTarget.CLASS)
annotation class AvroEnumDefault(val value: String)
annotation class AvroEnumDefault(val value: String)

/**
* Allows to specify the schema of a property.
*/
@SerialInfo
@Target(AnnotationTarget.PROPERTY)
annotation class AvroSchema(val value: KClass<out AvroSchemaSupplier>)

interface AvroSchemaSupplier {
fun getSchema(classElement: ClassElement): Schema
}

/**
* Allows to specify the logical type applied on the generated schema of a property.
*/
@SerialInfo
@Target(AnnotationTarget.PROPERTY)
annotation class AvroLogicalType(val value: KClass<out AvroLogicalTypeSupplier>)

interface AvroLogicalTypeSupplier {
fun getLogicalType(classElement: ClassElement): LogicalType
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,17 @@ package com.github.avrokotlin.avro4k.decoder

import kotlinx.serialization.SerializationException
import org.apache.avro.generic.GenericData
import org.apache.avro.generic.GenericEnumSymbol
import org.apache.avro.util.Utf8
import java.nio.ByteBuffer

interface FromAvroValue<T, R> {
fun fromValue(value: T): R
}

object StringFromAvroValue : FromAvroValue<Any?, String> {
override fun fromValue(value: Any?): String {
object StringFromAvroValue {
fun fromValue(value: Any?): String {
return when (value) {
is String -> value
is Utf8 -> value.toString()
is CharSequence -> value.toString()
is GenericData.Fixed -> String(value.bytes())
is ByteArray -> String(value)
is CharSequence -> value.toString()
is ByteBuffer -> String(value.array())
null -> throw SerializationException("Cannot decode <null> as a string")
else -> throw SerializationException("Unsupported type for String [is ${value::class.qualifiedName}]")
}
}
}

object EnumFromAvroValue : FromAvroValue<Any, String> {
override fun fromValue(value: Any): String {
return when (value) {
is GenericEnumSymbol<*> -> value.toString()
is String -> value
else -> value.toString()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class ListDecoder(
override fun fieldSchema(): Schema = schema.elementType

override fun decodeEnum(enumDescriptor: SerialDescriptor): Int {
val symbol = EnumFromAvroValue.fromValue(array[index]!!)
val symbol = array[index]!!.toString()
return (0 until enumDescriptor.elementsCount).find { enumDescriptor.getElementName(it) == symbol } ?: -1
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class RecordDecoder(
}

override fun decodeEnum(enumDescriptor: SerialDescriptor): Int {
val symbol = EnumFromAvroValue.fromValue(fieldValue()!!)
val symbol = fieldValue()!!.toString()
val enumValueByEnumName =
(0 until enumDescriptor.elementsCount).associateBy { enumDescriptor.getElementName(it) }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import com.github.avrokotlin.avro4k.AvroInternalConfiguration
import com.github.avrokotlin.avro4k.ListRecord
import com.github.avrokotlin.avro4k.Record
import com.github.avrokotlin.avro4k.schema.extractNonNull
import com.github.avrokotlin.avro4k.schema.unwrapValueClass
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerializationException
import kotlinx.serialization.descriptors.PolymorphicKind
Expand Down Expand Up @@ -38,6 +37,10 @@ interface StructureEncoder : FieldEncoder {
}
}

@ExperimentalSerializationApi
internal val SerialDescriptor.unwrapValueClass: SerialDescriptor
get() = if (isInline) getElementDescriptor(0) else this

@ExperimentalSerializationApi
class RecordEncoder(
private val schema: Schema,
Expand Down
Loading

0 comments on commit c9034e4

Please sign in to comment.