Skip to content

Commit

Permalink
Bulk Load CDK: CSV to its own toolkit; test stuff to test fixtures
Browse files Browse the repository at this point in the history
  • Loading branch information
johnny-schmidt committed Oct 22, 2024
1 parent 82e9e67 commit f231b57
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 17 deletions.
8 changes: 8 additions & 0 deletions airbyte-cdk/bulk/toolkits/load-csv/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
dependencies {
implementation project(':airbyte-cdk:bulk:core:bulk-cdk-core-base')
implementation project(':airbyte-cdk:bulk:core:bulk-cdk-core-load')

api("org.apache.commons:commons-csv:1.10.0")

testFixturesImplementation testFixtures(project(":airbyte-cdk:bulk:core:bulk-cdk-core-load"))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.cdk.load.data.csv

import io.airbyte.cdk.load.data.ObjectType

class AirbyteTypeToCsvHeader {
fun convert(schema: ObjectType): Array<String> {
return schema.properties.map { it.key }.toTypedArray()
}
}

fun ObjectType.toCsvHeader(): Array<String> {
return AirbyteTypeToCsvHeader().convert(this)
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.cdk.load.data
package io.airbyte.cdk.load.data.csv

import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.ArrayValue
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.NumberValue
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.toJson
import io.airbyte.cdk.load.util.serializeToString

class AirbyteValueToCsvRow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,13 @@
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.cdk.load.data
package io.airbyte.cdk.load.file.csv

import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.csv.toCsvHeader
import java.io.Writer
import org.apache.commons.csv.CSVFormat
import org.apache.commons.csv.CSVPrinter

class AirbyteTypeToCsvHeader {
fun convert(schema: ObjectType): Array<String> {
return schema.properties.map { it.key }.toTypedArray()
}
}

fun ObjectType.toCsvHeader(): Array<String> {
return AirbyteTypeToCsvHeader().convert(this)
}

fun ObjectType.toCsvPrinterWithHeader(writer: Writer): CSVPrinter =
CSVFormat.Builder.create().setHeader(*toCsvHeader()).setAutoFlush(true).build().print(writer)
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,24 @@
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.cdk.load.data
package io.airbyte.cdk.load.data.csv

import io.airbyte.cdk.load.data.AirbyteType
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayValue
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.BooleanValue
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.NumberValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.toAirbyteValue
import io.airbyte.cdk.load.util.deserializeToNode
import org.apache.commons.csv.CSVRecord

Expand Down
6 changes: 4 additions & 2 deletions airbyte-cdk/bulk/toolkits/load-object-storage/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ dependencies {
// and migrate them to their respective toolkits, so that these
// dependencies can be removed.
api project(':airbyte-cdk:bulk:toolkits:bulk-cdk-toolkit-load-avro')
api project(':airbyte-cdk:bulk:toolkits:bulk-cdk-toolkit-load-csv')
api project(':airbyte-cdk:bulk:toolkits:bulk-cdk-toolkit-load-parquet')

api("org.apache.commons:commons-csv:1.10.0")

testFixturesImplementation testFixtures(project(":airbyte-cdk:bulk:core:bulk-cdk-core-load"))
testFixturesImplementation testFixtures(project(":airbyte-cdk:bulk:toolkits:bulk-cdk-toolkit-load-avro"))
testFixturesImplementation testFixtures(project(":airbyte-cdk:bulk:toolkits:bulk-cdk-toolkit-load-csv"))
testFixturesImplementation testFixtures(project(":airbyte-cdk:bulk:toolkits:bulk-cdk-toolkit-load-parquet"))
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import io.airbyte.cdk.load.command.object_storage.ObjectStorageFormatConfigurati
import io.airbyte.cdk.load.command.object_storage.ParquetFormatConfiguration
import io.airbyte.cdk.load.data.avro.toAirbyteValue
import io.airbyte.cdk.load.data.avro.toAvroSchema
import io.airbyte.cdk.load.data.csv.toAirbyteValue
import io.airbyte.cdk.load.data.toAirbyteValue
import io.airbyte.cdk.load.file.GZIPProcessor
import io.airbyte.cdk.load.file.NoopProcessor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ import io.airbyte.cdk.load.command.object_storage.ParquetFormatConfiguration
import io.airbyte.cdk.load.data.DestinationRecordToAirbyteValueWithMeta
import io.airbyte.cdk.load.data.avro.toAvroRecord
import io.airbyte.cdk.load.data.avro.toAvroSchema
import io.airbyte.cdk.load.data.toCsvPrinterWithHeader
import io.airbyte.cdk.load.data.toCsvRecord
import io.airbyte.cdk.load.data.csv.toCsvRecord
import io.airbyte.cdk.load.data.toJson
import io.airbyte.cdk.load.file.avro.toAvroWriter
import io.airbyte.cdk.load.file.csv.toCsvPrinterWithHeader
import io.airbyte.cdk.load.file.object_storage.ObjectStoragePathFactory
import io.airbyte.cdk.load.file.parquet.toParquetWriter
import io.airbyte.cdk.load.file.s3.S3Client
Expand Down

0 comments on commit f231b57

Please sign in to comment.