-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🎉 New Destination: Databricks #5998
Merged
Merged
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit
Hold shift + click to select a range
942ef2e
skeleton databricks destination
Phlair 4c3e10c
Delete DynamicClassLoader.java
Phlair 3e5ddef
Merge branch 'master' into george/hotload-jar
tuliren af94c07
Update dependency
tuliren d7db844
Implement databricks destination as a stream copier (#5748)
tuliren 5779333
Update spec and configs (#5792)
Phlair e6aa16a
Remove connector definition json
tuliren 74877eb
Depend on local spark jdbc driver
tuliren f3383f1
Merge branch 'master' into liren/destination-databricks
tuliren e92f22f
Implement databricks copy destination
tuliren ee1f165
Fix check method
tuliren 3821dd4
Add ci credential
tuliren 9c7917c
Fix second sync test
tuliren 3dddd3b
Add unit tests
tuliren 5eb3e3b
Format code
tuliren 736975c
Update destination table location
tuliren c0ef240
Update documentation
tuliren 3895c10
Fix schema deletion bug
tuliren e310e0b
Update readme
tuliren 8586890
Reuse avro record helper from s3
tuliren a55e4c0
Add sample config.json
tuliren 7ea6326
Replace s3 config with data source
tuliren 6257848
Update documentation
tuliren 596d5c0
Update doc
tuliren 7cc610f
Update logging of s3 full path
tuliren 294c816
Add comments about driver url
tuliren 9f7f14c
Add purge_data_source parameter for debugging
tuliren 7e7cbf6
Enable automatic optimization
tuliren File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Implement databricks destination as a stream copier (#5748)
- Loading branch information
commit d7db844e93a945585f771099e7f0d89b4da2396a
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
114 changes: 114 additions & 0 deletions
114
.../src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
package io.airbyte.integrations.destination.databricks; | ||
|
||
import com.amazonaws.services.s3.AmazonS3; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import io.airbyte.db.jdbc.JdbcDatabase; | ||
import io.airbyte.integrations.destination.ExtendedNameTransformer; | ||
import io.airbyte.integrations.destination.jdbc.SqlOperations; | ||
import io.airbyte.integrations.destination.jdbc.copy.StreamCopier; | ||
import io.airbyte.integrations.destination.jdbc.copy.s3.S3Config; | ||
import io.airbyte.integrations.destination.s3.S3DestinationConfig; | ||
import io.airbyte.integrations.destination.s3.parquet.S3ParquetFormatConfig; | ||
import io.airbyte.integrations.destination.s3.parquet.S3ParquetWriter; | ||
import io.airbyte.integrations.destination.s3.writer.S3WriterFactory; | ||
import io.airbyte.protocol.models.AirbyteRecordMessage; | ||
import io.airbyte.protocol.models.AirbyteStream; | ||
import io.airbyte.protocol.models.ConfiguredAirbyteStream; | ||
import java.sql.Timestamp; | ||
import java.util.UUID; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* This implementation is similar to {@link io.airbyte.integrations.destination.jdbc.copy.s3.S3StreamCopier}. | ||
* The difference is that this implementation creates Parquet staging files, instead of CSV ones. | ||
*/ | ||
public class DatabricksStreamCopier implements StreamCopier { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(DatabricksStreamCopier.class); | ||
private static final ObjectMapper MAPPER = new ObjectMapper(); | ||
|
||
private final AmazonS3 s3Client; | ||
private final S3Config s3Config; | ||
private final String tmpTableName; | ||
private final AirbyteStream stream; | ||
private final JdbcDatabase db; | ||
private final ExtendedNameTransformer nameTransformer; | ||
private final SqlOperations sqlOperations; | ||
private final S3ParquetWriter parquetWriter; | ||
|
||
public DatabricksStreamCopier(String stagingFolder, | ||
String schema, | ||
ConfiguredAirbyteStream configuredStream, | ||
AmazonS3 s3Client, | ||
JdbcDatabase db, | ||
S3Config s3Config, | ||
ExtendedNameTransformer nameTransformer, | ||
SqlOperations sqlOperations, | ||
S3WriterFactory writerFactory, | ||
Timestamp uploadTime) throws Exception { | ||
this.stream = configuredStream.getStream(); | ||
this.db = db; | ||
this.nameTransformer = nameTransformer; | ||
this.sqlOperations = sqlOperations; | ||
this.tmpTableName = nameTransformer.getTmpTableName(stream.getName()); | ||
this.s3Client = s3Client; | ||
this.s3Config = s3Config; | ||
this.parquetWriter = (S3ParquetWriter) writerFactory | ||
.create(getS3DestinationConfig(s3Config, stagingFolder), s3Client, configuredStream, uploadTime); | ||
} | ||
|
||
@Override | ||
public void write(UUID id, AirbyteRecordMessage recordMessage) throws Exception { | ||
parquetWriter.write(id, recordMessage); | ||
} | ||
|
||
@Override | ||
public void closeStagingUploader(boolean hasFailed) throws Exception { | ||
parquetWriter.close(hasFailed); | ||
} | ||
|
||
@Override | ||
public void createTemporaryTable() throws Exception { | ||
|
||
} | ||
|
||
@Override | ||
public void copyStagingFileToTemporaryTable() throws Exception { | ||
|
||
} | ||
|
||
@Override | ||
public void createDestinationSchema() throws Exception { | ||
|
||
} | ||
|
||
@Override | ||
public String createDestinationTable() throws Exception { | ||
return null; | ||
} | ||
|
||
@Override | ||
public String generateMergeStatement(String destTableName) throws Exception { | ||
return null; | ||
} | ||
|
||
@Override | ||
public void removeFileAndDropTmpTable() throws Exception { | ||
|
||
} | ||
|
||
private S3DestinationConfig getS3DestinationConfig(S3Config s3Config, String stagingFolder) { | ||
return new S3DestinationConfig( | ||
s3Config.getEndpoint(), | ||
s3Config.getBucketName(), | ||
stagingFolder, | ||
s3Config.getRegion(), | ||
s3Config.getAccessKeyId(), | ||
s3Config.getSecretAccessKey(), | ||
// use default parquet format config | ||
new S3ParquetFormatConfig(MAPPER.createObjectNode()) | ||
); | ||
} | ||
|
||
} |
43 changes: 43 additions & 0 deletions
43
...in/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopierFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package io.airbyte.integrations.destination.databricks; | ||
|
||
import com.amazonaws.services.s3.AmazonS3; | ||
import io.airbyte.db.jdbc.JdbcDatabase; | ||
import io.airbyte.integrations.destination.ExtendedNameTransformer; | ||
import io.airbyte.integrations.destination.jdbc.SqlOperations; | ||
import io.airbyte.integrations.destination.jdbc.copy.StreamCopier; | ||
import io.airbyte.integrations.destination.jdbc.copy.StreamCopierFactory; | ||
import io.airbyte.integrations.destination.jdbc.copy.s3.S3Config; | ||
import io.airbyte.integrations.destination.jdbc.copy.s3.S3StreamCopier; | ||
import io.airbyte.integrations.destination.s3.parquet.S3ParquetWriter; | ||
import io.airbyte.integrations.destination.s3.writer.ProductionWriterFactory; | ||
import io.airbyte.integrations.destination.s3.writer.S3WriterFactory; | ||
import io.airbyte.protocol.models.AirbyteStream; | ||
import io.airbyte.protocol.models.ConfiguredAirbyteStream; | ||
import java.sql.Timestamp; | ||
|
||
public class DatabricksStreamCopierFactory implements StreamCopierFactory<S3Config> { | ||
|
||
@Override | ||
public StreamCopier create(String configuredSchema, | ||
S3Config s3Config, | ||
String stagingFolder, | ||
ConfiguredAirbyteStream configuredStream, | ||
ExtendedNameTransformer nameTransformer, | ||
JdbcDatabase db, | ||
SqlOperations sqlOperations) { | ||
try { | ||
AirbyteStream stream = configuredStream.getStream(); | ||
String schema = StreamCopierFactory.getSchema(stream, configuredSchema, nameTransformer); | ||
AmazonS3 s3Client = S3StreamCopier.getAmazonS3(s3Config); | ||
S3WriterFactory writerFactory = new ProductionWriterFactory(); | ||
Timestamp uploadTimestamp = new Timestamp(System.currentTimeMillis()); | ||
|
||
return new DatabricksStreamCopier( | ||
stagingFolder, schema, configuredStream, s3Client, db, s3Config, nameTransformer, sqlOperations, writerFactory, uploadTimestamp); | ||
} catch (Exception e) { | ||
throw new RuntimeException(e); | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This interface change is needed so that the stream copier can work with Parquet writer.