agile-lab-dev
diff --git a/‎README.md‎
Lines changed: 108 additions & 0 deletions b/‎README.md‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎build.sbt‎
Lines changed: 14 additions & 2 deletions b/‎build.sbt‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎common/src/main/scala/it/agilelab/darwin/common/Connector.scala‎
Lines changed: 178 additions & 1 deletion b/‎common/src/main/scala/it/agilelab/darwin/common/Connector.scala‎
Lines changed: 178 additions & 1 deletion
@@ -20,6 +20,7 @@ Table of contents
   - [HBase](#hbase)
   - [PostgreSql](#postgresql)
   - [REST](#rest)
+  - [Confluent](#confluent)
 ---
 
 Overview
@@ -130,6 +131,26 @@ libraryDependencies += "it.agilelab" %% "darwin-mock-connector" % "1.1.0-SNAPSHO
 </dependency>
 ```
 
+
+### Confluent schema registry Connector
+
+Darwin can be used as a *facade* over confluent schema registry.
+
+### sbt
+
+```scala
+libraryDependencies += "it.agilelab" %% "darwin-confluent-connector" % "1.1.0-SNAPSHOT"
+``` 
+#### maven
+```xml
+<dependency>
+  <groupId>it.agilelab</groupId>
+  <artifactId>darwin-confluent-connector_2.11</artifactId>
+  <version>1.1.0-SNAPSHOT</version>
+</dependency>
+```
+
+
 Background
 -------------
 In systems where objects encoded using Avro are stored, a problem arises when there is an evolution of the structure 
@@ -402,6 +423,93 @@ darwin-rest {
 }
 ```
 
+## Confluent
+
+Darwin can be used as a `facade` over the `Confluent schema registry`.
+
+Connecting to the confluent schema registry will help all applications currently using darwin to function correctly
+when running over confluent platform.
+
+The connector can be used even if the only confluent component used is the schema registry.
+
+When using the confluent connector a the avro single object encoding will be performed using the *Confluent* flavour.
+
+### Confluent Single object encoding
+
+The schema registry will assign globally unique ids to schemas, each avro message is encoded as following
+
+```
+0x00                       |   1 byte magic number representing confluent encoded avro
+0xXX 0xXX 0xXX 0xXX        |   4 byte schema identifier interpreted as an integer
+...                        |   avro encoded payload without schema (raw avro bytes not prepended with the json schema)
+```
+
+### Subject
+
+Confluent schema registry supports attaching schemas to a `subject`, the subject is the granularity at which schema
+compatibility is enforced, schemas can be registered with 3 subject strategies
+
+* topic: The subject is the name of the topic (topic contains a single avro data type)
+* record: The subject is the fully qualified name of the topic (multiple topics can contain the same avro data type)
+* topic-record: The subject is derived from topic and record fqdn (a topic can have multiple data types, compatibility on 
+same avro data type will be enforced for each topic instead of globally)
+
+In order to support this scheme avro schemas registered via darwin should have a custom extension (`x-darwin-subject`)
+like in this example
+
+```json
+{
+  "type" : "record",
+  "name" : "record",
+  "fields" : [ {
+    "name" : "stringField",
+    "type" : "string"
+  }, {
+    "name" : "stringField2",
+    "type" : [ "string", "null" ],
+    "default" : "default-for-nullable"
+  } ],
+  "x-darwin-subject" : "subject-string"
+}
+```
+
+## Configuration
+
+```hocon
+darwin {
+  type = "lazy"
+  connector = "confluent"
+
+  endpoints: ["http://schema-registry-00:7777", "http://schema-registry-01:7777"]
+  max-cached-schemas: 1000
+  kafka.schemaregistry.standard-property-1: 1
+  kafka.schemaregistry.standard-property-2: "default" 
+}
+
+```
+
+The confluent connector can be used by declaring `confluent` as connector.
+
+The `endpoints` configuration is a list of url to the confluent schema registry
+
+the `max-cached-schemas` configures how many schemas are internally cached by the confluent schema registry connector
+
+all other properties will be injected in the confluent schema registry client configuration.
+
+For example if confluent schema registry declares a property `kafka.schemaregistry.auth` this property can simply be
+added to the darwin configuration like this
+
+```hocon
+darwin {
+  type = "lazy"
+  connector = "confluent"
+
+  endpoints: ["http://schema-registry-00:7777", "http://schema-registry-01:7777"]
+  max-cached-schemas: 1000
+  kafka.schemaregistry.auth: "true"
+}
+```
+
 ## Mock
 
 MockConnector can be conveniently used during tests or if all the schemas (past and current) are known when launching 
 
@@ -23,7 +23,8 @@ lazy val root             = Project("darwin", file("."))
     mockConnector,
     mockApplication,
     restConnector,
-    mongoConnector
+    mongoConnector,
+    confluentConnector
   )
 
 lazy val core = Project("darwin-core", file("core"))
@@ -65,7 +66,6 @@ lazy val hbaseConnector2 = Project("darwin-hbase2-connector", file("hbase2"))
   .settings(Settings.hbase2TestSettings)
   .enablePlugins(JavaAppPackaging)
 
-
 lazy val postgresConnector = Project("darwin-postgres-connector", file("postgres"))
   .settings(Settings.commonSettings: _*)
   .dependsOn(coreCommon)
@@ -85,6 +85,18 @@ lazy val restConnector = Project("darwin-rest-connector", file("rest"))
   .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211, Versions.scala_213))
   .enablePlugins(JavaAppPackaging)
 
+lazy val confluentConnector = Project("darwin-confluent-connector", file("confluent"))
+  .settings(Settings.commonSettings: _*)
+  .dependsOn(coreCommon)
+  .settings(pgpPassphrase := Settings.pgpPass)
+  .settings(
+    libraryDependencies ++= Dependencies.core_deps ++
+      Dependencies.wireMock ++
+      Dependencies.confluentSchemaRegistryDependencies :+ Dependencies.scalatest
+  )
+  .settings(crossScalaVersions := Versions.crossScalaVersions)
+  .enablePlugins(JavaAppPackaging)
+
 lazy val restServer = Project("darwin-rest-server", file("rest-server"))
   .settings(Settings.commonSettings: _*)
   .dependsOn(coreCommon, mockConnector)
 
@@ -1,6 +1,13 @@
 package it.agilelab.darwin.common
 
-import org.apache.avro.Schema
+import java.io.{ InputStream, OutputStream }
+import java.nio.{ ByteBuffer, ByteOrder }
+
+import it.agilelab.darwin.common.compat.RightBiasedEither
+import it.agilelab.darwin.manager.SchemaPayloadPair
+import it.agilelab.darwin.manager.exception.DarwinException
+import it.agilelab.darwin.manager.util.AvroSingleObjectEncodingUtils
+import org.apache.avro.{ Schema, SchemaNormalization }
 
 /**
   * Generic abstraction of a component capable of reading and writing Schema entities in an external storage.
@@ -49,4 +56,174 @@ trait Connector extends Serializable {
     * @return an option that is empty if no schema was found for the ID or defined if a schema was found
     */
   def findSchema(id: Long): Option[Schema]
+
+  /**
+    * Generate a fingerprint for a schema, the default implementation is SchemaNormalization.parsingFingerprint64
+    *
+    * @param schema the schema to fingerprint
+    * @return the schema id
+    */
+  def fingerprint(schema: Schema): Long = {
+    SchemaNormalization.parsingFingerprint64(schema)
+  }
+
+  /**
+    * Writes to the given OutputStream the Single Object Encoding header and returns the OutputStream
+    *
+    * @return the input OutputStream
+    */
+  def writeHeaderToStream(byteStream: OutputStream, schemaId: Long, endianness: ByteOrder): OutputStream = {
+    AvroSingleObjectEncodingUtils.writeHeaderToStream(byteStream, schemaId, endianness)
+  }
+
+  /**
+    * Create an array that creates a Single-Object encoded byte array.
+    * By specifications the encoded array is obtained concatenating the V1_HEADER, the schema id and the avro-encoded
+    * payload.
+    *
+    * @param avroPayload avro-serialized payload
+    * @param schema      the schema used to encode the payload
+    * @return a Single-Object encoded byte array
+    */
+  def generateAvroSingleObjectEncoded(
+    avroPayload: Array[Byte],
+    schema: Schema,
+    endianness: ByteOrder,
+    getId: Schema => Long
+  ): Array[Byte] = {
+    AvroSingleObjectEncodingUtils.generateAvroSingleObjectEncoded(avroPayload, getId(schema), endianness)
+  }
+
+  /**
+    * Writes to the given OutputStream the Single Object Encoding header then the avroValue and returns the OutputStream
+    *
+    * @param byteStream the stream to write to
+    * @param avroValue  the value to be written to the stream
+    * @param schemaId   id of the schema used to encode the payload
+    * @return the input OutputStream
+    */
+  def generateAvroSingleObjectEncoded(
+    byteStream: OutputStream,
+    avroValue: Array[Byte],
+    schemaId: Long,
+    endianness: ByteOrder
+  ): OutputStream = {
+    AvroSingleObjectEncodingUtils.generateAvroSingleObjectEncoded(byteStream, avroValue, schemaId, endianness)
+  }
+
+  /**
+    * Writes to the given OutputStream the Single Object Encoding header then calls the avroWriter function to
+    * possibly add data to the stream and finally returns the OutputStream
+    *
+    * @param byteStream the stream to write to
+    * @param schemaId   id of the schema used to encode the payload
+    * @param avroWriter function that will be called to add user generated avro to the stream
+    * @return the input OutputStream
+    */
+  def generateAvroSingleObjectEncoded(byteStream: OutputStream, schemaId: Long, endianness: ByteOrder)(
+    avroWriter: OutputStream => OutputStream
+  ): OutputStream = {
+    AvroSingleObjectEncodingUtils.generateAvroSingleObjectEncoded(byteStream, schemaId, endianness)(avroWriter)
+  }
+
+  /**
+    * Extracts a Tuple2 that contains the Schema and the Avro-encoded payload
+    *
+    * @param avroSingleObjectEncoded a byte array of a Single-Object encoded payload
+    * @return a pair containing the Schema and the payload of the input array
+    */
+  def retrieveSchemaAndAvroPayload(
+    avroSingleObjectEncoded: Array[Byte],
+    endianness: ByteOrder,
+    getSchema: Long => Option[Schema]
+  ): (Schema, Array[Byte]) = {
+    if (AvroSingleObjectEncodingUtils.isAvroSingleObjectEncoded(avroSingleObjectEncoded)) {
+      val id = AvroSingleObjectEncodingUtils.extractId(avroSingleObjectEncoded, endianness)
+      getSchema(id) match {
+        case Some(schema) =>
+          schema -> AvroSingleObjectEncodingUtils.dropHeader(avroSingleObjectEncoded)
+        case _            =>
+          throw new DarwinException(s"No schema found for ID $id")
+      }
+    } else {
+      throw AvroSingleObjectEncodingUtils.parseException()
+    }
+  }
+
+  /**
+    * Extracts the Schema from the ByteBuffer after the method call the ByteBuffer position will be right after the
+    * header.
+    *
+    * @param avroSingleObjectEncoded a ByteBuffer of a Single-Object encoded payload
+    * @return the avro Schema
+    */
+  def retrieveSchemaAndAvroPayload(
+    avroSingleObjectEncoded: ByteBuffer,
+    endianness: ByteOrder,
+    getSchema: Long => Option[Schema]
+  ): Schema = {
+    if (AvroSingleObjectEncodingUtils.isAvroSingleObjectEncoded(avroSingleObjectEncoded)) {
+      val id = AvroSingleObjectEncodingUtils.extractId(avroSingleObjectEncoded, endianness)
+      getSchema(id) match {
+        case Some(schema) => schema
+        case _            => throw new DarwinException(s"No schema found for ID $id")
+      }
+    } else {
+      throw AvroSingleObjectEncodingUtils.parseException()
+    }
+  }
+
+  /**
+    * Extracts the schema from the avro single-object encoded at the head of this input stream.
+    * The input stream will have 10 bytes consumed if the first two bytes correspond to the single object encoded
+    * header, or zero bytes consumed if the InputStream supports marking; if it doesn't, the first bytes (up to 2) will
+    * be consumed and returned in the Left part of the Either
+    *
+    * @param inputStream avro single-object encoded input stream
+    * @return the schema ID extracted from the input data
+    */
+  def extractSchema(
+    inputStream: InputStream,
+    endianness: ByteOrder,
+    getSchema: Long => Option[Schema]
+  ): Either[Array[Byte], Schema] = {
+    AvroSingleObjectEncodingUtils.extractId(inputStream, endianness).rightMap { id =>
+      getSchema(id).getOrElse(throw new DarwinException(s"No schema found for ID $id"))
+    }
+  }
+
+  /**
+    * Extracts the schema from the avro single-object encoded in the input array.
+    *
+    * @param array avro single-object encoded array
+    * @return the schema ID extracted from the input data
+    */
+  def extractSchema(
+    array: Array[Byte],
+    endianness: ByteOrder,
+    getSchema: Long => Option[Schema]
+  ): Either[Exception, Schema] = {
+    try {
+      val id = AvroSingleObjectEncodingUtils.extractId(array, endianness)
+      getSchema(id)
+        .toRight(new RuntimeException(s"Cannot find schema with id $id"))
+    } catch {
+      case ie: IllegalArgumentException => Left(ie)
+    }
+  }
+
+  /**
+    * Extracts a SchemaPayloadPair that contains the Schema and the Avro-encoded payload
+    *
+    * @param avroSingleObjectEncoded a byte array of a Single-Object encoded payload
+    * @return a SchemaPayloadPair containing the Schema and the payload of the input array
+    */
+  def retrieveSchemaAndPayload(
+    avroSingleObjectEncoded: Array[Byte],
+    endianness: ByteOrder,
+    getSchema: Long => Option[Schema]
+  ): SchemaPayloadPair = {
+    val (schema, payload) = retrieveSchemaAndAvroPayload(avroSingleObjectEncoded, endianness, getSchema)
+    SchemaPayloadPair.create(schema, payload)
+  }
 }