diff --git a/README.md b/README.md index fc18025e..7cebe6db 100644 --- a/README.md +++ b/README.md @@ -415,10 +415,33 @@ Would result in the following schema: } ``` +### Nullable fields, optional fields and compatibility + +#### TL;DR; +To make your nullable fields optional (put `default: null` on all nullable fields if no other explicit default provided) and be able to remove nullable fields regarding compatibility checks, +you can set in the configuration the `defaultNullForNullableFields` to `true`. Example: +```kotlin +Avro(AvroConfiguration(defaultNullForNullableFields = true)) +``` + +#### Longer story + +With avro, you can have nullable fields and optional fields, that are taken into account for compatibility checking when using the schema registry. + +But if you want to remove a nullable field that is not optional, depending on the compatibility mode, it may not be compatible because of the missing default value. + +- What is an optional field ? +> An optional field is a field that have a *default* value, like an int with a default as `-1`. + +- What is a nullable field ? +> A nullable field is a field that contains a `null` type in its type union, but **it's not an optional field if you don't put `default` value to `null`**. + +So to mark a field as optional and facilitate avro contract evolution regarding compatibility checks, then set `default` to `null`. + ## Types -Avro4s supports the Avro logical types out of the box as well as other common JDK types. +Avro4k supports the Avro logical types out of the box as well as other common JDK types. Avro has no understanding of Kotlin types, or anything outside of it's built in set of supported types, so all values must be converted to something that is compatible with Avro. diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt index 6e984525..aad93b3b 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt @@ -4,10 +4,18 @@ package com.github.avrokotlin.avro4k import com.github.avrokotlin.avro4k.decoder.RootRecordDecoder import com.github.avrokotlin.avro4k.encoder.RootRecordEncoder -import com.github.avrokotlin.avro4k.io.* +import com.github.avrokotlin.avro4k.io.AvroDecodeFormat +import com.github.avrokotlin.avro4k.io.AvroEncodeFormat +import com.github.avrokotlin.avro4k.io.AvroFormat +import com.github.avrokotlin.avro4k.io.AvroInputStream +import com.github.avrokotlin.avro4k.io.AvroOutputStream import com.github.avrokotlin.avro4k.schema.schemaFor import com.github.avrokotlin.avro4k.serializer.UUIDSerializer -import kotlinx.serialization.* +import kotlinx.serialization.BinaryFormat +import kotlinx.serialization.DeserializationStrategy +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.SerialFormat +import kotlinx.serialization.SerializationStrategy import kotlinx.serialization.descriptors.SerialDescriptor import kotlinx.serialization.modules.SerializersModule import kotlinx.serialization.modules.contextual @@ -259,7 +267,7 @@ class Avro( serializersModule, descriptor, descriptor.annotations, - configuration.namingStrategy, + configuration, mutableMapOf() ).schema() diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt index b441f5d9..91a082ac 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt @@ -3,4 +3,7 @@ package com.github.avrokotlin.avro4k import com.github.avrokotlin.avro4k.schema.DefaultNamingStrategy import com.github.avrokotlin.avro4k.schema.NamingStrategy -data class AvroConfiguration(val namingStrategy: NamingStrategy = DefaultNamingStrategy) +data class AvroConfiguration( + val namingStrategy: NamingStrategy = DefaultNamingStrategy, + val defaultNullForNullableFields: Boolean = false, +) diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt index 84b15e03..e68a4d98 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt @@ -2,6 +2,7 @@ package com.github.avrokotlin.avro4k.schema import com.github.avrokotlin.avro4k.AnnotationExtractor import com.github.avrokotlin.avro4k.Avro +import com.github.avrokotlin.avro4k.AvroConfiguration import com.github.avrokotlin.avro4k.AvroProp import com.github.avrokotlin.avro4k.RecordNaming import kotlinx.serialization.ExperimentalSerializationApi @@ -22,7 +23,7 @@ import org.apache.avro.SchemaBuilder @ExperimentalSerializationApi class ClassSchemaFor( private val descriptor: SerialDescriptor, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val serializersModule: SerializersModule, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -74,12 +75,12 @@ class ClassSchemaFor( val fieldDescriptor = descriptor.getElementDescriptor(index) val annos = AnnotationExtractor(descriptor.getElementAnnotations( index)) - val fieldNaming = RecordNaming(descriptor, index, namingStrategy) + val fieldNaming = RecordNaming(descriptor, index, configuration.namingStrategy) val schema = schemaFor( serializersModule, fieldDescriptor, descriptor.getElementAnnotations(index), - namingStrategy, + configuration, resolvedSchemas ).schema() @@ -89,7 +90,7 @@ class ClassSchemaFor( val (size, name) = when (val a = annos.fixed()) { null -> { val fieldAnnos = AnnotationExtractor(fieldDescriptor.annotations) - val n = RecordNaming(fieldDescriptor, namingStrategy) + val n = RecordNaming(fieldDescriptor, configuration.namingStrategy) when (val b = fieldAnnos.fixed()) { null -> 0 to n.name else -> b to n.name @@ -115,18 +116,20 @@ class ClassSchemaFor( else -> schemaOrFixed.overrideNamespace(ns) } - val default: Any? = annos.default()?.let { + val default: Any? = annos.default()?.let { annotationDefaultValue -> when { - it == Avro.NULL -> Schema.Field.NULL_DEFAULT_VALUE + annotationDefaultValue == Avro.NULL -> Schema.Field.NULL_DEFAULT_VALUE schemaWithResolvedNamespace.extractNonNull().type in listOf( Schema.Type.FIXED, Schema.Type.BYTES, Schema.Type.STRING, Schema.Type.ENUM - ) -> it - else -> json.parseToJsonElement(it).convertToAvroDefault() + ) -> annotationDefaultValue + else -> json.parseToJsonElement(annotationDefaultValue).convertToAvroDefault() } - } + } ?: if (configuration.defaultNullForNullableFields && fieldDescriptor.isNullable) { + Schema.Field.NULL_DEFAULT_VALUE + } else null val field = Schema.Field(fieldNaming.name, schemaWithResolvedNamespace, annos.doc(), default) val props = this.descriptor.getElementAnnotations(index).filterIsInstance() diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt index ddb8d336..bd8916f6 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt @@ -2,6 +2,7 @@ package com.github.avrokotlin.avro4k.schema import com.github.avrokotlin.avro4k.AnnotationExtractor import com.github.avrokotlin.avro4k.Avro +import com.github.avrokotlin.avro4k.AvroConfiguration import com.github.avrokotlin.avro4k.RecordNaming import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.InternalSerializationApi @@ -64,7 +65,7 @@ class EnumSchemaFor( @ExperimentalSerializationApi class PairSchemaFor(private val descriptor: SerialDescriptor, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val serializersModule: SerializersModule, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -74,14 +75,14 @@ class PairSchemaFor(private val descriptor: SerialDescriptor, serializersModule, descriptor.getElementDescriptor(0), descriptor.getElementAnnotations(0), - namingStrategy, + configuration, resolvedSchemas ) val b = schemaFor( serializersModule, descriptor.getElementDescriptor(1), descriptor.getElementAnnotations(1), - namingStrategy, + configuration, resolvedSchemas ) return SchemaBuilder.unionOf() @@ -91,10 +92,11 @@ class PairSchemaFor(private val descriptor: SerialDescriptor, .endUnion() } } + @ExperimentalSerializationApi class ListSchemaFor(private val descriptor: SerialDescriptor, private val serializersModule: SerializersModule, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -107,7 +109,7 @@ class ListSchemaFor(private val descriptor: SerialDescriptor, val elementSchema = schemaFor(serializersModule, elementType, descriptor.getElementAnnotations(0), - namingStrategy, + configuration, resolvedSchemas ).schema() return Schema.createArray(elementSchema) @@ -115,10 +117,11 @@ class ListSchemaFor(private val descriptor: SerialDescriptor, } } } + @ExperimentalSerializationApi class MapSchemaFor(private val descriptor: SerialDescriptor, private val serializersModule: SerializersModule, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -131,25 +134,31 @@ class MapSchemaFor(private val descriptor: SerialDescriptor, serializersModule, valueType, descriptor.getElementAnnotations(1), - namingStrategy, + configuration, resolvedSchemas ).schema() return Schema.createMap(valueSchema) } + else -> throw RuntimeException("Avro only supports STRING as the key type in a MAP") } } } + @ExperimentalSerializationApi -class NullableSchemaFor(private val schemaFor: SchemaFor, private val annotations : List) : SchemaFor { +class NullableSchemaFor( + private val schemaFor: SchemaFor, + private val annotations: List, +) : SchemaFor { - private val nullFirst by lazy{ + private val nullFirst by lazy { //The default value can only be of the first type in the union definition. //Therefore we have to check the default value in order to decide the order of types within the union. //If no default is set, or if the default value is of type "null", nulls will be first. val default = AnnotationExtractor(annotations).default() default == null || default == Avro.NULL } + override fun schema(): Schema { val elementSchema = schemaFor.schema() val nullSchema = SchemaBuilder.builder().nullType() @@ -162,7 +171,7 @@ class NullableSchemaFor(private val schemaFor: SchemaFor, private val annotation fun schemaFor(serializersModule: SerializersModule, descriptor: SerialDescriptor, annos: List, - namingStrategy: NamingStrategy, + configuration: AvroConfiguration, resolvedSchemas: MutableMap ): SchemaFor { @@ -173,7 +182,7 @@ fun schemaFor(serializersModule: SerializersModule, } else descriptor val schemaFor: SchemaFor = when (underlying) { - is AvroDescriptor -> SchemaFor.const(underlying.schema(annos, serializersModule, namingStrategy)) + is AvroDescriptor -> SchemaFor.const(underlying.schema(annos, serializersModule, configuration.namingStrategy)) else -> when (descriptor.unwrapValueClass.kind) { PrimitiveKind.STRING -> SchemaFor.StringSchemaFor PrimitiveKind.LONG -> SchemaFor.LongSchemaFor @@ -193,16 +202,18 @@ fun schemaFor(serializersModule: SerializersModule, "Contextual or default serializer not found for $descriptor " }, annos, - namingStrategy, + configuration, resolvedSchemas ) + StructureKind.CLASS, StructureKind.OBJECT -> when (descriptor.serialName) { - "kotlin.Pair" -> PairSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas) - else -> ClassSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas) + "kotlin.Pair" -> PairSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas) + else -> ClassSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas) } - StructureKind.LIST -> ListSchemaFor(descriptor, serializersModule, namingStrategy, resolvedSchemas) - StructureKind.MAP -> MapSchemaFor(descriptor, serializersModule, namingStrategy, resolvedSchemas) - is PolymorphicKind -> UnionSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas) + + StructureKind.LIST -> ListSchemaFor(descriptor, serializersModule, configuration, resolvedSchemas) + StructureKind.MAP -> MapSchemaFor(descriptor, serializersModule, configuration, resolvedSchemas) + is PolymorphicKind -> UnionSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas) else -> throw SerializationException("Unsupported type ${descriptor.serialName} of ${descriptor.kind}") } } diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt index 42591ce2..ae51eb36 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt @@ -1,5 +1,6 @@ package com.github.avrokotlin.avro4k.schema +import com.github.avrokotlin.avro4k.AvroConfiguration import com.github.avrokotlin.avro4k.RecordNaming import com.github.avrokotlin.avro4k.possibleSerializationSubclasses import kotlinx.serialization.ExperimentalSerializationApi @@ -10,7 +11,7 @@ import org.apache.avro.Schema @ExperimentalSerializationApi class UnionSchemaFor( private val descriptor: SerialDescriptor, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val serializersModule: SerializersModule, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -19,7 +20,7 @@ class UnionSchemaFor( descriptor.possibleSerializationSubclasses(serializersModule).sortedBy { it.serialName } return Schema.createUnion( leafSerialDescriptors.map { - ClassSchemaFor(it, namingStrategy, serializersModule, resolvedSchemas).schema() + ClassSchemaFor(it, configuration, serializersModule, resolvedSchemas).schema() } ) } diff --git a/src/test/kotlin/com/github/avrokotlin/avro4k/schema/NullableWithDefaultsSchemaTest.kt b/src/test/kotlin/com/github/avrokotlin/avro4k/schema/NullableWithDefaultsSchemaTest.kt new file mode 100644 index 00000000..b3f4ebaf --- /dev/null +++ b/src/test/kotlin/com/github/avrokotlin/avro4k/schema/NullableWithDefaultsSchemaTest.kt @@ -0,0 +1,33 @@ +package com.github.avrokotlin.avro4k.schema + +import com.github.avrokotlin.avro4k.Avro +import com.github.avrokotlin.avro4k.AvroConfiguration +import io.kotest.core.spec.style.FunSpec +import io.kotest.matchers.shouldBe +import kotlinx.serialization.Serializable + +class NullableWithDefaultsSchemaTest : FunSpec({ + + test("generate null as Union[T, Null]") { + + val expected = org.apache.avro.Schema.Parser().parse(javaClass.getResourceAsStream("/nullables-with-defaults.json")) + val schema = Avro(AvroConfiguration(defaultNullForNullableFields = true)).schema(Test.serializer()) + schema.toString(true) shouldBe expected.toString(true) + } + +// test("move default option values to first schema as per avro spec") { +// val schema = AvroSchema[OptionWithDefault] +// val expected = new org . apache . avro . Schema . Parser ().parse(getClass.getResourceAsStream("/option_default_value.json")) +// schema.toString(true) shouldBe expected.toString(true) +// } +// +// test("if a field has a default value of null then define the field to be nullable") { +// val schema = AvroSchema[FieldWithNull] +// val expected = new org . apache . avro . Schema . Parser ().parse(getClass.getResourceAsStream("/option_from_null_default.json")) +// schema.toString(true) shouldBe expected.toString(true) +// } + +}) { + @Serializable + data class Test(val nullableString: String?, val nullableBoolean: Boolean?) +} diff --git a/src/test/resources/nullables-with-defaults.json b/src/test/resources/nullables-with-defaults.json new file mode 100644 index 00000000..c8499d37 --- /dev/null +++ b/src/test/resources/nullables-with-defaults.json @@ -0,0 +1,23 @@ +{ + "type": "record", + "name": "Test", + "namespace": "com.github.avrokotlin.avro4k.schema.NullableWithDefaultsSchemaTest", + "fields": [ + { + "name": "nullableString", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "nullableBoolean", + "type": [ + "null", + "boolean" + ], + "default": null + } + ] +}