From 76573f026c063c9978f209da955dbfdd36b91256 Mon Sep 17 00:00:00 2001 From: oyeliseiev-ua <134942613+oyeliseiev-ua@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:35:50 +0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20SingleStore=20(#?= =?UTF-8?q?37337)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Marcos Marx Co-authored-by: marcosmarxm --- .../airbyte/cdk/db/factory/DatabaseDriver.kt | 3 +- .../airbyte/cdk/integrations/JdbcConnector.kt | 3 +- .../jdbc/test/JdbcSourceAcceptanceTest.kt | 8 +- .../connectors/source-singlestore/README.md | 71 +++ .../acceptance-test-config.yml | 8 + .../source-singlestore/build.gradle | 50 +++ .../source-singlestore/gradle.properties | 3 + .../connectors/source-singlestore/icon.svg | 28 ++ .../integration_tests/acceptance.py | 14 + .../source-singlestore/metadata.yaml | 25 ++ .../singlestore/SingleStoreQueryUtils.java | 133 ++++++ .../source/singlestore/SingleStoreSource.java | 159 +++++++ .../SingleStoreSourceOperations.java | 139 ++++++ .../source/singlestore/SingleStoreType.java | 84 ++++ .../SingleStoreCursorBasedStateManager.java | 83 ++++ .../SingleStoreInitialLoadHandler.java | 149 +++++++ .../SingleStoreInitialLoadRecordIterator.java | 169 ++++++++ ...gleStoreInitialLoadStreamStateManager.java | 112 +++++ .../SingleStoreInitialReadUtil.java | 173 ++++++++ .../internal_models/internal_models.yaml | 48 ++ .../src/main/resources/spec.json | 182 ++++++++ .../SingleStoreSourceAcceptanceTest.java | 88 ++++ .../SingleStoreSourceDatatypeTest.java | 304 +++++++++++++ .../SingleStoreSslSourceAcceptanceTest.java | 32 ++ .../resources/dummy_config.json | 7 + .../resources/expected_spec.json | 185 ++++++++ .../SingleStoreJdbcSourceAcceptanceTest.java | 409 ++++++++++++++++++ .../SingleStoreSourceOperationsTest.java | 114 +++++ .../singlestore/SingleStoreSourceTest.java | 49 +++ .../singlestore/SingleStoreSpecTest.java | 113 +++++ ...ingleStoreSslJdbcSourceAcceptanceTest.java | 32 ++ .../AirbyteSingleStoreTestContainer.java | 150 +++++++ .../SingleStoreContainerFactory.java | 62 +++ .../singlestore/SingleStoreTestDatabase.java | 127 ++++++ docs/integrations/sources/singlestore.md | 176 ++++++++ 35 files changed, 3489 insertions(+), 3 deletions(-) create mode 100644 airbyte-integrations/connectors/source-singlestore/README.md create mode 100644 airbyte-integrations/connectors/source-singlestore/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-singlestore/build.gradle create mode 100644 airbyte-integrations/connectors/source-singlestore/gradle.properties create mode 100644 airbyte-integrations/connectors/source-singlestore/icon.svg create mode 100644 airbyte-integrations/connectors/source-singlestore/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-singlestore/metadata.yaml create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreQueryUtils.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSource.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperations.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreType.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/cursor_based/SingleStoreCursorBasedStateManager.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadHandler.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadRecordIterator.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadStreamStateManager.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialReadUtil.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/resources/internal_models/internal_models.yaml create mode 100644 airbyte-integrations/connectors/source-singlestore/src/main/resources/spec.json create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceDatatypeTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSslSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/dummy_config.json create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/expected_spec.json create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreJdbcSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperationsTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSpecTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSslJdbcSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/AirbyteSingleStoreTestContainer.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreContainerFactory.java create mode 100644 airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreTestDatabase.java create mode 100644 docs/integrations/sources/singlestore.md diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/db/factory/DatabaseDriver.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/db/factory/DatabaseDriver.kt index 8f20d997b2c8..fee60a3edb94 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/db/factory/DatabaseDriver.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/db/factory/DatabaseDriver.kt @@ -25,7 +25,8 @@ enum class DatabaseDriver(val driverClassName: String, val urlFormatString: Stri SNOWFLAKE("net.snowflake.client.jdbc.SnowflakeDriver", "jdbc:snowflake://%s/"), YUGABYTEDB("com.yugabyte.Driver", "jdbc:yugabytedb://%s:%d/%s"), EXASOL("com.exasol.jdbc.EXADriver", "jdbc:exa:%s:%d"), - TERADATA("com.teradata.jdbc.TeraDriver", "jdbc:teradata://%s/"); + TERADATA("com.teradata.jdbc.TeraDriver", "jdbc:teradata://%s/"), + SINGLESTORE("com.singlestore.jdbc.Driver", "jdbc:singlestore://%s:%d/%s"); companion object { /** diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/JdbcConnector.kt b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/JdbcConnector.kt index 5190a53314bf..acf32e45a8d0 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/JdbcConnector.kt +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/JdbcConnector.kt @@ -49,7 +49,8 @@ protected constructor(@JvmField protected val driverClassName: String) : BaseCon ChronoUnit.SECONDS ) .or { Optional.of(POSTGRES_CONNECT_TIMEOUT_DEFAULT_DURATION) } - DatabaseDriver.MYSQL -> + DatabaseDriver.MYSQL, + DatabaseDriver.SINGLESTORE -> maybeParseDuration( connectionProperties["connectTimeout"], ChronoUnit.MILLIS diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/kotlin/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.kt b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/kotlin/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.kt index 4d682520b278..47c1b4e2daa3 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/kotlin/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/kotlin/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.kt @@ -346,6 +346,7 @@ abstract class JdbcSourceAcceptanceTest> { // for them. when (testdb.databaseDriver) { DatabaseDriver.MYSQL, + DatabaseDriver.SINGLESTORE, DatabaseDriver.CLICKHOUSE, DatabaseDriver.TERADATA -> return else -> {} @@ -1211,6 +1212,7 @@ abstract class JdbcSourceAcceptanceTest> { // some databases don't make insertion order guarantee when equal ordering value if ( testdb.databaseDriver == DatabaseDriver.TERADATA || + testdb.databaseDriver == DatabaseDriver.SINGLESTORE || testdb.databaseDriver == DatabaseDriver.ORACLE ) { MatcherAssert.assertThat( @@ -1311,7 +1313,10 @@ abstract class JdbcSourceAcceptanceTest> { .map { r: AirbyteMessage -> r.record.data[COL_NAME].asText() } // teradata doesn't make insertion order guarantee when equal ordering value - if (testdb.databaseDriver == DatabaseDriver.TERADATA) { + if ( + testdb.databaseDriver == DatabaseDriver.TERADATA || + testdb.databaseDriver == DatabaseDriver.SINGLESTORE + ) { MatcherAssert.assertThat( listOf("c", "d", "e", "f"), Matchers.containsInAnyOrder(*thirdSyncExpectedNames.toTypedArray()), @@ -1706,6 +1711,7 @@ abstract class JdbcSourceAcceptanceTest> { get() = when (testdb.databaseDriver) { DatabaseDriver.MYSQL, + DatabaseDriver.SINGLESTORE, DatabaseDriver.CLICKHOUSE, DatabaseDriver.TERADATA -> testdb.databaseName else -> SCHEMA_NAME diff --git a/airbyte-integrations/connectors/source-singlestore/README.md b/airbyte-integrations/connectors/source-singlestore/README.md new file mode 100644 index 000000000000..2a269ae9e7ff --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/README.md @@ -0,0 +1,71 @@ +# Source SingleStore + +This is the repository for the Singlestore source connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.com/integrations/sources/singlestore). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-singlestore:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.com/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-singlestore:buildConnectorImage +``` + +Once built, the docker image name and tag will be `airbyte/source-singlestore:dev`. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-singlestore:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-singlestore:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-singlestore:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-singlestore:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/...` +Place integration tests in `src/test-integration/...` + +#### Acceptance Tests +Airbyte has a standard test suite that all source connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/sources/singlestoreSourceAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-singlestore:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-singlestore:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=source-singlestore test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog are up to date (`docs/integrations/sources/singlestore.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-singlestore/acceptance-test-config.yml b/airbyte-integrations/connectors/source-singlestore/acceptance-test-config.yml new file mode 100644 index 000000000000..a6764ca95b70 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/acceptance-test-config.yml @@ -0,0 +1,8 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-singlestore:dev +acceptance_tests: + spec: + tests: + - spec_path: "src/test-integration/resources/expected_spec.json" + config_path: "src/test-integration/resources/dummy_config.json" diff --git a/airbyte-integrations/connectors/source-singlestore/build.gradle b/airbyte-integrations/connectors/source-singlestore/build.gradle new file mode 100644 index 000000000000..b0f969441ec4 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/build.gradle @@ -0,0 +1,50 @@ +import org.jsonschema2pojo.SourceType + +plugins { + id 'application' + id 'airbyte-java-connector' + id 'org.jsonschema2pojo' version '1.2.1' +} + +tasks.named('sourcesJar').configure { + dependsOn tasks.matching { it.name == 'generateJsonSchema2Pojo' } +} + +airbyteJavaConnector { + cdkVersionRequired = '0.30.1' + features = ['db-sources'] + useLocalCdk = true +} + +java { + compileJava { + options.compilerArgs += "-Xlint:-try,-rawtypes" + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.source.singlestore.SingleStoreSource' +} + +dependencies { + implementation 'com.singlestore:singlestore-jdbc-client:1.2.2' + testImplementation 'org.apache.commons:commons-lang3:3.11' + testFixturesImplementation 'org.testcontainers:jdbc:1.19.0' + integrationTestJavaImplementation project(':airbyte-integrations:connectors:source-singlestore') +} + +jsonSchema2Pojo { + sourceType = SourceType.YAMLSCHEMA + source = files("${sourceSets.main.output.resourcesDir}/internal_models") + targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') + removeOldOutput = true + + targetPackage = 'io.airbyte.integrations.source.singlestore.internal.models' + + useLongIntegers = true + generateBuilders = true + includeConstructors = false + includeSetters = true +} diff --git a/airbyte-integrations/connectors/source-singlestore/gradle.properties b/airbyte-integrations/connectors/source-singlestore/gradle.properties new file mode 100644 index 000000000000..e2038c54baa6 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/gradle.properties @@ -0,0 +1,3 @@ +testExecutionConcurrency=1 + +JunitMethodExecutionTimeout=5 m diff --git a/airbyte-integrations/connectors/source-singlestore/icon.svg b/airbyte-integrations/connectors/source-singlestore/icon.svg new file mode 100644 index 000000000000..88448a71a5a5 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/icon.svg @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-singlestore/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-singlestore/integration_tests/acceptance.py new file mode 100644 index 000000000000..82823254d266 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-singlestore/metadata.yaml b/airbyte-integrations/connectors/source-singlestore/metadata.yaml new file mode 100644 index 000000000000..9b70bda112c1 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/metadata.yaml @@ -0,0 +1,25 @@ +data: + allowedHosts: + hosts: + - ${host} + registries: + oss: + enabled: true + cloud: + enabled: false + connectorSubtype: database + connectorType: source + definitionId: 2e8ae725-0069-4452-afa0-d1848cf69676 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/source-singlestore + githubIssueLabel: source-singlestore + icon: singlestore.svg + license: MIT + name: SingleStore + releaseDate: + supportLevel: community + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/sources/singlestore + tags: + - language:java +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreQueryUtils.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreQueryUtils.java new file mode 100644 index 000000000000..b725d6e2055a --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreQueryUtils.java @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getIdentifierWithQuoting; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.integrations.source.singlestore.internal.models.CursorBasedStatus; +import io.airbyte.integrations.source.singlestore.internal.models.InternalModels.StateType; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SingleStoreQueryUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreQueryUtils.class); + + public static final String MAX_PK_VALUE_QUERY = """ + SELECT MAX(%s) as %s FROM %s; + """; + public static final String MAX_CURSOR_VALUE_QUERY = """ + SELECT %s FROM %s WHERE %s = (SELECT MAX(%s) FROM %s); + """; + + public static final String MAX_PK_COL = "max_pk"; + + public static String getMaxPkValueForStream(final JdbcDatabase database, + final ConfiguredAirbyteStream stream, + final String pkFieldName, + final String quoteString) { + final String name = stream.getStream().getName(); + final String namespace = stream.getStream().getNamespace(); + final String fullTableName = getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString); + final String maxPkQuery = String.format(MAX_PK_VALUE_QUERY, getIdentifierWithQuoting(pkFieldName, quoteString), MAX_PK_COL, fullTableName); + LOGGER.info("Querying for max pk value: {}", maxPkQuery); + try { + final List jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(maxPkQuery).executeQuery(), + JdbcUtils.getDefaultSourceOperations()::rowToJson); + Preconditions.checkState(jsonNodes.size() == 1); + if (jsonNodes.get(0).get(MAX_PK_COL) == null) { + LOGGER.info("Max PK is null for table {} - this could indicate an empty table", fullTableName); + return null; + } + return jsonNodes.get(0).get(MAX_PK_COL).asText(); + } catch (final SQLException e) { + throw new RuntimeException(e); + } + } + + /** + * Iterates through each stream and find the max cursor value and the record count which has that + * value based on each cursor field provided by the customer per stream. This information is saved + * in a Hashmap with the mapping being the AirbyteStreamNameNamespacePair -> CursorBasedStatus. + * + * @param database the source db + * @param streams streams to be synced + * @param stateManager stream stateManager + * @return Map of streams to statuses + */ + public static Map getCursorBasedSyncStatusForStreams( + final JdbcDatabase database, + final List streams, + final StateManager stateManager, + final String quoteString) { + + final Map cursorBasedStatusMap = new HashMap<>(); + streams.forEach(stream -> { + try { + final String name = stream.getStream().getName(); + final String namespace = stream.getStream().getNamespace(); + final String fullTableName = getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString); + + final Optional cursorInfoOptional = stateManager.getCursorInfo( + new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(name, namespace)); + if (cursorInfoOptional.isEmpty()) { + throw new RuntimeException(String.format("Stream %s was not provided with an appropriate cursor", stream.getStream().getName())); + } + + LOGGER.info("Querying max cursor value for {}.{}", namespace, name); + final String cursorField = cursorInfoOptional.get().getCursorField(); + final String quotedCursorField = getIdentifierWithQuoting(cursorField, quoteString); + final String cursorBasedSyncStatusQuery = String.format(MAX_CURSOR_VALUE_QUERY, quotedCursorField, fullTableName, quotedCursorField, + quotedCursorField, fullTableName); + final List jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(cursorBasedSyncStatusQuery).executeQuery(), + JdbcUtils.getDefaultSourceOperations()::rowToJson); + final CursorBasedStatus cursorBasedStatus = new CursorBasedStatus(); + cursorBasedStatus.setStateType(StateType.CURSOR_BASED); + cursorBasedStatus.setStreamName(name); + cursorBasedStatus.setStreamNamespace(namespace); + cursorBasedStatus.setCursorField(ImmutableList.of(cursorField)); + if (!jsonNodes.isEmpty()) { + final JsonNode result = jsonNodes.get(0); + cursorBasedStatus.setCursor(result.get(cursorField).asText()); + cursorBasedStatus.setCursorRecordCount((long) jsonNodes.size()); + } + cursorBasedStatusMap.put(new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(name, namespace), cursorBasedStatus); + } catch (final SQLException e) { + throw new RuntimeException(e); + } + }); + + return cursorBasedStatusMap; + } + + public static void logStreamSyncStatus(final List streams, final String syncType) { + if (streams.isEmpty()) { + LOGGER.info("No Streams will be synced via {}.", syncType); + } else { + LOGGER.info("Streams to be synced via {} : {}", syncType, streams.size()); + LOGGER.info("Streams: {}", prettyPrintConfiguredAirbyteStreamList(streams)); + } + } + + public static String prettyPrintConfiguredAirbyteStreamList(final List streamList) { + return streamList.stream().map(s -> "%s.%s".formatted(s.getStream().getNamespace(), s.getStream().getName())).collect(Collectors.joining(", ")); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSource.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSource.java new file mode 100644 index 000000000000..98a5f9a711ea --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSource.java @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static io.airbyte.cdk.db.jdbc.JdbcUtils.EQUALS; +import static io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils.CLIENT_KEY_STORE_PASS; +import static io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils.CLIENT_KEY_STORE_TYPE; +import static io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils.CLIENT_KEY_STORE_URL; +import static io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils.TRUST_KEY_STORE_PASS; +import static io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils.TRUST_KEY_STORE_TYPE; +import static io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils.TRUST_KEY_STORE_URL; +import static io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialReadUtil.convertNameNamespacePairFromV0; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.Source; +import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils; +import io.airbyte.cdk.integrations.source.relationaldb.RelationalDbReadUtil; +import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.integrations.source.singlestore.cursor_based.SingleStoreCursorBasedStateManager; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialLoadHandler; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialLoadStreamStateManager; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialReadUtil; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialReadUtil.CursorBasedStreams; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialReadUtil.InitialLoadStreams; +import io.airbyte.integrations.source.singlestore.internal.models.CursorBasedStatus; +import io.airbyte.protocol.models.CommonField; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SingleStoreSource extends AbstractJdbcSource implements Source { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreSource.class); + public static final String DRIVER_CLASS = DatabaseDriver.SINGLESTORE.getDriverClassName(); + + public SingleStoreSource() { + super(DRIVER_CLASS, AdaptiveStreamingQueryConfig::new, new SingleStoreSourceOperations()); + } + + @Override + public JsonNode toDatabaseConfig(final JsonNode config) { + final String encodedDatabaseName = URLEncoder.encode(config.get(JdbcUtils.DATABASE_KEY).asText(), StandardCharsets.UTF_8); + final StringBuilder jdbcUrl = new StringBuilder( + String.format("jdbc:singlestore://%s:%s/%s", config.get(JdbcUtils.HOST_KEY).asText(), config.get(JdbcUtils.PORT_KEY).asText(), + encodedDatabaseName)); + jdbcUrl.append("?yearIsDateType=false"); + jdbcUrl.append("&tinyInt1isBit=false"); + // metrics + jdbcUrl.append(String.format("&_connector_name=%s", "Airbyte Source Connector")); + if (config.get(JdbcUtils.JDBC_URL_PARAMS_KEY) != null && !config.get(JdbcUtils.JDBC_URL_PARAMS_KEY).asText().isEmpty()) { + jdbcUrl.append(JdbcUtils.AMPERSAND).append(config.get(JdbcUtils.JDBC_URL_PARAMS_KEY).asText()); + } + final Map sslParameters = JdbcSSLConnectionUtils.parseSSLConfig(config); + jdbcUrl.append(JdbcUtils.AMPERSAND).append(toJDBCQueryParams(sslParameters)); + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put(JdbcUtils.USERNAME_KEY, config.get(JdbcUtils.USERNAME_KEY).asText()).put(JdbcUtils.JDBC_URL_KEY, jdbcUrl.toString()); + if (config.has(JdbcUtils.PASSWORD_KEY)) { + configBuilder.put(JdbcUtils.PASSWORD_KEY, config.get(JdbcUtils.PASSWORD_KEY).asText()); + } + return Jsons.jsonNode(configBuilder.build()); + } + + /** + * Generates SSL related query parameters from map of parsed values. + * + * @param sslParams ssl parameters + * @return SSL portion of JDBC question params or and empty string + */ + public String toJDBCQueryParams(final Map sslParams) { + return Objects.isNull(sslParams) ? "" : sslParams.entrySet().stream().map((entry) -> switch (entry.getKey()) { + case JdbcSSLConnectionUtils.SSL_MODE -> JdbcSSLConnectionUtils.SSL_MODE + EQUALS + + com.singlestore.jdbc.export.SslMode.from(entry.getValue()).name(); + case TRUST_KEY_STORE_URL -> "trustStore" + EQUALS + entry.getValue(); + case TRUST_KEY_STORE_PASS -> "trustStorePassword" + EQUALS + entry.getValue(); + case TRUST_KEY_STORE_TYPE -> "trustStoreType" + EQUALS + entry.getValue(); + case CLIENT_KEY_STORE_URL -> "keyStore" + EQUALS + entry.getValue(); + case CLIENT_KEY_STORE_PASS -> "keyStorePassword" + EQUALS + entry.getValue(); + case CLIENT_KEY_STORE_TYPE -> "keyStoreType" + EQUALS + entry.getValue(); + default -> ""; + }).filter(s -> Objects.nonNull(s) && !s.isEmpty()).collect(Collectors.joining(JdbcUtils.AMPERSAND)); + } + + @Override + public List> getIncrementalIterators(final JdbcDatabase database, + final ConfiguredAirbyteCatalog catalog, + final Map>> tableNameToTable, + final StateManager stateManager, + final Instant emittedAt) { + final JsonNode sourceConfig = database.getSourceConfig(); + if (SingleStoreInitialReadUtil.isAnyStreamIncrementalSyncMode(catalog)) { + final SingleStoreCursorBasedStateManager cursorBasedStateManager = new SingleStoreCursorBasedStateManager(stateManager.getRawStateMessages(), + catalog); + LOGGER.info("Syncing via Primary Key"); + final InitialLoadStreams initialLoadStreams = SingleStoreInitialReadUtil.streamsForInitialPrimaryKeyLoad(stateManager, catalog); + final Map pairToCursorBasedStatus = SingleStoreQueryUtils.getCursorBasedSyncStatusForStreams( + database, initialLoadStreams.streamsForInitialLoad(), stateManager, getQuoteString()); + final CursorBasedStreams cursorBasedStreams = new CursorBasedStreams( + RelationalDbReadUtil.identifyStreamsForCursorBased(catalog, initialLoadStreams.streamsForInitialLoad()), pairToCursorBasedStatus); + + SingleStoreQueryUtils.logStreamSyncStatus(initialLoadStreams.streamsForInitialLoad(), "Primary Key"); + SingleStoreQueryUtils.logStreamSyncStatus(cursorBasedStreams.streamsForCursorBased(), "Cursor"); + + final SingleStoreInitialLoadStreamStateManager singleStoreInitialLoadStreamStateManager = new SingleStoreInitialLoadStreamStateManager( + initialLoadStreams, + SingleStoreInitialReadUtil.initPairToPrimaryKeyInfoMap(database, initialLoadStreams, tableNameToTable, getQuoteString())); + final SingleStoreInitialLoadHandler initialLoadHandler = new SingleStoreInitialLoadHandler(sourceConfig, database, + new SingleStoreSourceOperations(), getQuoteString(), singleStoreInitialLoadStreamStateManager, + namespacePair -> Jsons.jsonNode(pairToCursorBasedStatus.get(convertNameNamespacePairFromV0(namespacePair)))); + final List> initialLoadIterator = new ArrayList<>( + initialLoadHandler.getIncrementalIterators(new ConfiguredAirbyteCatalog().withStreams(initialLoadStreams.streamsForInitialLoad()), + tableNameToTable, emittedAt)); + + // Build Cursor based iterator + final List> cursorBasedIterator = new ArrayList<>( + super.getIncrementalIterators(database, new ConfiguredAirbyteCatalog().withStreams(cursorBasedStreams.streamsForCursorBased()), + tableNameToTable, cursorBasedStateManager, emittedAt)); + return Stream.of(initialLoadIterator, cursorBasedIterator).flatMap(Collection::stream).collect(Collectors.toList()); + } + return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt); + } + + @Override + public Set getExcludedInternalNameSpaces() { + return Set.of("information_schema", "memsql", "cluster"); + } + + public static void main(final String[] args) throws Exception { + final Source source = new SingleStoreSource(); + LOGGER.info("starting source: {}", SingleStoreSource.class); + new IntegrationRunner(source).run(args); + LOGGER.info("completed source: {}", SingleStoreSource.class); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperations.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperations.java new file mode 100644 index 000000000000..39e6e3fda66e --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperations.java @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE_NAME; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.BIGINT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.BIGINT_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.DATE; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.DATETIME; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.DECIMAL; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.DECIMAL_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.DOUBLE; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.DOUBLE_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.FLOAT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.FLOAT_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.INT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.INT_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.LONGTEXT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.MEDIUMINT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.MEDIUMINT_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.MEDIUMTEXT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.SMALLINT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.SMALLINT_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.TEXT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.TIME; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.TIMESTAMP; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.TINYINT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.TINYINT_UNSIGNED; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.TINYTEXT; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.VARCHAR; +import static io.airbyte.integrations.source.singlestore.SingleStoreType.YEAR; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.NullNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.singlestore.jdbc.client.result.ResultSetMetaData; +import io.airbyte.cdk.db.SourceOperations; +import io.airbyte.cdk.db.jdbc.AbstractJdbcCompatibleSourceOperations; +import io.airbyte.cdk.db.jdbc.JdbcConstants; +import io.airbyte.protocol.models.JsonSchemaType; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Set; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SingleStoreSourceOperations extends AbstractJdbcCompatibleSourceOperations implements + SourceOperations { + + private static final Set ALLOWED_CURSOR_TYPES = Set.of(TINYINT, TINYINT_UNSIGNED, SMALLINT, SMALLINT_UNSIGNED, MEDIUMINT, + MEDIUMINT_UNSIGNED, INT, INT_UNSIGNED, BIGINT, BIGINT_UNSIGNED, FLOAT, FLOAT_UNSIGNED, DOUBLE, DOUBLE_UNSIGNED, DECIMAL, DECIMAL_UNSIGNED, DATE, + DATETIME, TIMESTAMP, TIME, YEAR, VARCHAR, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT); + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreSourceOperations.class); + + @Override + public void copyToJsonField(@NotNull ResultSet resultSet, int colIndex, @NotNull ObjectNode json) throws SQLException { + final ResultSetMetaData metaData = (ResultSetMetaData) resultSet.getMetaData(); + String type = metaData.getColumnTypeName(colIndex); + final String columnName = metaData.getColumnName(colIndex); + SingleStoreType columnType = SingleStoreType.getByName(type); + + switch (columnType) { + case BIT, TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, BINARY, VARBINARY -> putBinary(json, columnName, resultSet, colIndex); + case TINYINT, TINYINT_UNSIGNED, YEAR -> putShortInt(json, columnName, resultSet, colIndex); + case SMALLINT, SMALLINT_UNSIGNED, MEDIUMINT, MEDIUMINT_UNSIGNED, INT -> putInteger(json, columnName, resultSet, colIndex); + case INT_UNSIGNED, BIGINT, BIGINT_UNSIGNED -> putBigInt(json, columnName, resultSet, colIndex); + case FLOAT, FLOAT_UNSIGNED -> putFloat(json, columnName, resultSet, colIndex); + case DOUBLE, DOUBLE_UNSIGNED -> putDouble(json, columnName, resultSet, colIndex); + case DECIMAL, DECIMAL_UNSIGNED -> putBigDecimal(json, columnName, resultSet, colIndex); + case DATE -> putDate(json, columnName, resultSet, colIndex); + case DATETIME, TIMESTAMP -> putTimestamp(json, columnName, resultSet, colIndex); + case TIME, CHAR, VARCHAR, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT, JSON, ENUM, SET -> putString(json, columnName, resultSet, colIndex); + case NULL -> json.set(columnName, NullNode.instance); + default -> putDefault(json, columnName, resultSet, colIndex); + } + } + + @Override + public void setCursorField(@NotNull PreparedStatement preparedStatement, + int parameterIndex, + final SingleStoreType cursorFieldType, + @NotNull String value) + throws SQLException { + if (cursorFieldType == null) { + throw new IllegalArgumentException("NULL cannot be used as a cursor."); + } + switch (cursorFieldType) { + case BIT -> setBit(preparedStatement, parameterIndex, value); + case YEAR, TINYINT, TINYINT_UNSIGNED, SMALLINT, SMALLINT_UNSIGNED, MEDIUMINT, MEDIUMINT_UNSIGNED -> setInteger(preparedStatement, + parameterIndex, value); + case INT, INT_UNSIGNED, BIGINT, BIGINT_UNSIGNED -> setBigInteger(preparedStatement, parameterIndex, value); + case FLOAT, FLOAT_UNSIGNED, DOUBLE, DOUBLE_UNSIGNED -> setDouble(preparedStatement, parameterIndex, value); + case DECIMAL, DECIMAL_UNSIGNED -> setDecimal(preparedStatement, parameterIndex, value); + case DATE -> setDate(preparedStatement, parameterIndex, value); + case DATETIME, TIMESTAMP -> setTimestamp(preparedStatement, parameterIndex, value); + case TIME, VARCHAR, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT -> setString(preparedStatement, parameterIndex, value); + case TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, BINARY, VARBINARY -> setBinary(preparedStatement, parameterIndex, value); + default -> throw new IllegalArgumentException(String.format("%s cannot be used as a cursor.", cursorFieldType)); + } + } + + @Override + public SingleStoreType getDatabaseFieldType(@NotNull JsonNode field) { + try { + return SingleStoreType.getByName(field.get(INTERNAL_COLUMN_TYPE_NAME).asText()); + } catch (IllegalArgumentException ex) { + LOGGER.warn(String.format("Could not convert column: %s from table: %s.%s with type: %s. Casting to VARCHAR.", + field.get(JdbcConstants.INTERNAL_COLUMN_NAME), field.get(JdbcConstants.INTERNAL_SCHEMA_NAME), field.get(JdbcConstants.INTERNAL_TABLE_NAME), + field.get(JdbcConstants.INTERNAL_COLUMN_TYPE))); + return SingleStoreType.VARCHAR; + } + } + + @Override + public boolean isCursorType(@Nullable SingleStoreType singlestoreType) { + return ALLOWED_CURSOR_TYPES.contains(singlestoreType); + } + + @NotNull + @Override + public JsonSchemaType getAirbyteType(SingleStoreType singlestoreType) { + return switch (singlestoreType) { + case TINYINT, TINYINT_UNSIGNED, SMALLINT, SMALLINT_UNSIGNED, MEDIUMINT, MEDIUMINT_UNSIGNED, INT, INT_UNSIGNED, BIGINT, BIGINT_UNSIGNED -> JsonSchemaType.INTEGER; + case FLOAT, FLOAT_UNSIGNED, DOUBLE, DOUBLE_UNSIGNED, DECIMAL, DECIMAL_UNSIGNED -> JsonSchemaType.NUMBER; + case BIT, TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, BINARY, VARBINARY -> JsonSchemaType.STRING_BASE_64; + case DATETIME, TIMESTAMP -> JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE; + case DATE -> JsonSchemaType.STRING_DATE; + case NULL -> JsonSchemaType.NULL; + default -> JsonSchemaType.STRING; + }; + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreType.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreType.java new file mode 100644 index 000000000000..ddafb30d4b0f --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/SingleStoreType.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import java.sql.SQLType; +import java.sql.Types; +import java.util.Arrays; + +public enum SingleStoreType implements SQLType { + + VARCHAR("VARCHAR", Types.VARCHAR), + BIGINT("BIGINT", Types.BIGINT), + BIGINT_UNSIGNED("BIGINT UNSIGNED", Types.BIGINT), + FLOAT("FLOAT", Types.REAL), + FLOAT_UNSIGNED("FLOAT UNSIGNED", Types.REAL), + DOUBLE("DOUBLE", Types.DOUBLE), + DOUBLE_UNSIGNED("DOUBLE UNSIGNED", Types.DOUBLE), + DECIMAL("DECIMAL", Types.DECIMAL), + DECIMAL_UNSIGNED("DECIMAL UNSIGNED", Types.DECIMAL), + TINYINT("TINYINT", Types.TINYINT), + TINYINT_UNSIGNED("TINYINT UNSIGNED", Types.TINYINT), + SMALLINT("SMALLINT", Types.SMALLINT), + SMALLINT_UNSIGNED("SMALLINT UNSIGNED", Types.SMALLINT), + INT("INT", Types.INTEGER), + INT_UNSIGNED("INT UNSIGNED", Types.INTEGER), + MEDIUMINT("MEDIUMINT", Types.INTEGER), + MEDIUMINT_UNSIGNED("MEDIUMINT UNSIGNED", Types.INTEGER), + LONGTEXT("LONGTEXT", Types.LONGVARCHAR), + VARBINARY("VARBINARY", Types.VARBINARY), + JSON("JSON", Types.LONGVARCHAR), + DATETIME("DATETIME", Types.TIMESTAMP), + TIMESTAMP("TIMESTAMP", Types.TIMESTAMP), + TEXT("TEXT", Types.LONGVARCHAR), + MEDIUMTEXT("MEDIUMTEXT", Types.LONGVARCHAR), + SET("SET", Types.VARCHAR), + ENUM("ENUM", Types.VARCHAR), + TINYBLOB("TINYBLOB", Types.VARBINARY), + BLOB("BLOB", Types.LONGVARBINARY), + MEDIUMBLOB("MEDIUMBLOB", Types.LONGVARBINARY), + LONGBLOB("LONGBLOB", Types.LONGVARBINARY), + BIT("BIT", Types.BIT), + DATE("DATE", Types.DATE), + TIME("TIME", Types.TIME), + YEAR("YEAR", Types.INTEGER), + CHAR("CHAR", Types.CHAR), + BINARY("BINARY", Types.BINARY), + TINYTEXT("TINYTEXT", Types.VARCHAR), + GEOGRAPHYPOINT("GEOGRAPHYPOINT", Types.OTHER), + GEOGRAPHY("GEOGRAPHY", Types.OTHER), + VECTOR("VECTOR", Types.OTHER), + NULL("NULL", Types.NULL); + + SingleStoreType(String singleStoreTypeName, int type) { + this.singleStoreTypeName = singleStoreTypeName; + this.type = type; + } + + private final String singleStoreTypeName; + private final int type; + + public static SingleStoreType getByName(String name) { + return Arrays.stream(values()).filter(v -> v.getName().equalsIgnoreCase(name)).findFirst() + .orElseThrow(() -> new IllegalArgumentException( + "Type:" + name + " is not a valid.")); + } + + @Override + public String getName() { + return singleStoreTypeName; + } + + @Override + public String getVendor() { + return "com.singlestore"; + } + + @Override + public Integer getVendorTypeNumber() { + return type; + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/cursor_based/SingleStoreCursorBasedStateManager.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/cursor_based/SingleStoreCursorBasedStateManager.java new file mode 100644 index 000000000000..1a1a2db61ecf --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/cursor_based/SingleStoreCursorBasedStateManager.java @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore.cursor_based; + +import com.google.common.collect.Lists; +import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo; +import io.airbyte.cdk.integrations.source.relationaldb.state.StreamStateManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.singlestore.internal.models.CursorBasedStatus; +import io.airbyte.integrations.source.singlestore.internal.models.InternalModels.StateType; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.StreamDescriptor; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SingleStoreCursorBasedStateManager extends StreamStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreCursorBasedStateManager.class); + + public SingleStoreCursorBasedStateManager(final List airbyteStateMessages, final ConfiguredAirbyteCatalog catalog) { + super(airbyteStateMessages, catalog); + } + + @Override + public AirbyteStateMessage toState(@NotNull final Optional pair) { + if (pair.isPresent()) { + final Map pairToCursorInfoMap = getPairToCursorInfoMap(); + final Optional cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair.get())); + + if (cursorInfo.isPresent()) { + LOGGER.debug("Generating state message for {}...", pair); + return new AirbyteStateMessage().withType(AirbyteStateType.STREAM) + .withStream(generateStreamState(pair.get(), cursorInfo.get())); + } else { + LOGGER.warn("Cursor information could not be located in state for stream {}. Returning a new, empty state message...", pair); + return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } + } else { + LOGGER.warn("Stream not provided. Returning a new, empty state message..."); + return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } + } + + /** + * Generates the stream state for the given stream and cursor information. + * + * @param airbyteStreamNameNamespacePair The stream. + * @param cursorInfo The current cursor. + * @return The {@link AirbyteStreamState} representing the current state of the stream. + */ + private static AirbyteStreamState generateStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, + final CursorInfo cursorInfo) { + return new AirbyteStreamState().withStreamDescriptor( + new StreamDescriptor().withName(airbyteStreamNameNamespacePair.getName()).withNamespace(airbyteStreamNameNamespacePair.getNamespace())) + .withStreamState(Jsons.jsonNode(generateDbStreamState(airbyteStreamNameNamespacePair, cursorInfo))); + } + + private static CursorBasedStatus generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, + final CursorInfo cursorInfo) { + final CursorBasedStatus state = new CursorBasedStatus(); + state.setStateType(StateType.CURSOR_BASED); + state.setStreamName(airbyteStreamNameNamespacePair.getName()); + state.setStreamNamespace(airbyteStreamNameNamespacePair.getNamespace()); + state.setCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField())); + state.setCursor(cursorInfo.getCursor()); + if (cursorInfo.getCursorRecordCount() > 0L) { + state.setCursorRecordCount(cursorInfo.getCursorRecordCount()); + } + return state; + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadHandler.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadHandler.java new file mode 100644 index 000000000000..91bc4ad7d901 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadHandler.java @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore.initialsync; + +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION_PROPERTY; +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants; +import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil; +import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; +import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateIterator; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateEmitFrequency; +import io.airbyte.commons.stream.AirbyteStreamUtils; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.source.singlestore.SingleStoreSourceOperations; +import io.airbyte.integrations.source.singlestore.SingleStoreType; +import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.CommonField; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteMessage.Type; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.SyncMode; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SingleStoreInitialLoadHandler { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreInitialLoadHandler.class); + private static final long RECORD_LOGGING_SAMPLE_RATE = 1_000_000; + private static final long DEFAULT_CHUNK_SIZE = 1_000_000; + private final JsonNode config; + private final JdbcDatabase database; + private final SingleStoreSourceOperations sourceOperations; + private final String quoteString; + private final SingleStoreInitialLoadStreamStateManager initialLoadStateManager; + private final Function streamStateForIncrementalRunSupplier; + + public SingleStoreInitialLoadHandler(final JsonNode config, + final JdbcDatabase database, + final SingleStoreSourceOperations sourceOperations, + final String quoteString, + final SingleStoreInitialLoadStreamStateManager initialLoadStateManager, + final Function streamStateForIncrementalRunSupplier) { + this.config = config; + this.database = database; + this.sourceOperations = sourceOperations; + this.quoteString = quoteString; + this.initialLoadStateManager = initialLoadStateManager; + this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier; + } + + public List> getIncrementalIterators(final ConfiguredAirbyteCatalog catalog, + final Map>> tableNameToTable, + final Instant emittedAt) { + final List> iteratorList = new ArrayList<>(); + for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { + final AirbyteStream stream = airbyteStream.getStream(); + final String streamName = stream.getName(); + final String namespace = stream.getNamespace(); + final List primaryKeys = stream.getSourceDefinedPrimaryKey().stream().flatMap(pk -> Stream.of(pk.get(0))).toList(); + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace); + final String fullyQualifiedTableName = DbSourceDiscoverUtil.getFullyQualifiedTableName(namespace, streamName); + if (!tableNameToTable.containsKey(fullyQualifiedTableName)) { + LOGGER.info("Skipping stream {} because it is not in the source", fullyQualifiedTableName); + continue; + } + if (airbyteStream.getSyncMode().equals(SyncMode.INCREMENTAL)) { + // Grab the selected fields to sync + final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); + final List selectedDatabaseFields = table.getFields().stream().map(CommonField::getName) + .filter(CatalogHelpers.getTopLevelFieldNames(airbyteStream)::contains).collect(Collectors.toList()); + + // This is to handle the case if the user de-selects the PK column + // Necessary to query the data via pk but won't be added to the final record + primaryKeys.forEach(pk -> { + if (!selectedDatabaseFields.contains(pk)) { + selectedDatabaseFields.add(0, pk); + } + }); + + final AutoCloseableIterator queryStream = new SingleStoreInitialLoadRecordIterator(database, sourceOperations, quoteString, + initialLoadStateManager, selectedDatabaseFields, pair, DEFAULT_CHUNK_SIZE, isCompositePrimaryKey(airbyteStream)); + final AutoCloseableIterator recordIterator = getRecordIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli()); + final AutoCloseableIterator recordAndMessageIterator = augmentWithState(recordIterator, airbyteStream, pair); + iteratorList.add(augmentWithLogs(recordAndMessageIterator, pair, streamName)); + } + } + return iteratorList; + } + + private static boolean isCompositePrimaryKey(final ConfiguredAirbyteStream stream) { + return stream.getStream().getSourceDefinedPrimaryKey().size() > 1; + } + + // Transforms the given iterator to create an {@link AirbyteRecordMessage} + private AutoCloseableIterator getRecordIterator(final AutoCloseableIterator recordIterator, + final String streamName, + final String namespace, + final long emittedAt) { + return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage().withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace).withEmittedAt(emittedAt).withData(r))); + } + + // Augments the given iterator with record count logs. + private AutoCloseableIterator augmentWithLogs(final AutoCloseableIterator iterator, + final AirbyteStreamNameNamespacePair pair, + final String streamName) { + final AtomicLong recordCount = new AtomicLong(); + return AutoCloseableIterators.transform(iterator, AirbyteStreamUtils.convertFromNameAndNamespace(pair.getName(), pair.getNamespace()), r -> { + final long count = recordCount.incrementAndGet(); + if (count % RECORD_LOGGING_SAMPLE_RATE == 0) { + LOGGER.info("Reading stream {}. Records read: {}", streamName, count); + } + return r; + }); + } + + private AutoCloseableIterator augmentWithState(final AutoCloseableIterator recordIterator, + final ConfiguredAirbyteStream airbyteStream, + final AirbyteStreamNameNamespacePair pair) { + final Duration syncCheckpointDuration = + config.get(SYNC_CHECKPOINT_DURATION_PROPERTY) != null ? Duration.ofSeconds(config.get(SYNC_CHECKPOINT_DURATION_PROPERTY).asLong()) + : DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION; + final long syncCheckpointRecords = config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY) != null ? config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY).asLong() + : DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS; + initialLoadStateManager.setStreamStateForIncrementalRunSupplier(streamStateForIncrementalRunSupplier); + return AutoCloseableIterators.transformIterator(r -> new SourceStateIterator<>(r, airbyteStream, initialLoadStateManager, + new StateEmitFrequency(syncCheckpointRecords, syncCheckpointDuration)), recordIterator, pair); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadRecordIterator.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadRecordIterator.java new file mode 100644 index 000000000000..1b740fcab629 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadRecordIterator.java @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore.initialsync; + +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifier; +import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.AbstractIterator; +import io.airbyte.cdk.db.JdbcCompatibleSourceOperations; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.source.singlestore.SingleStoreType; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialReadUtil.PrimaryKeyInfo; +import io.airbyte.integrations.source.singlestore.internal.models.PrimaryKeyLoadStatus; +import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.CheckForNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings("try") +public class SingleStoreInitialLoadRecordIterator extends AbstractIterator implements AutoCloseableIterator { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreInitialLoadRecordIterator.class); + private final JdbcCompatibleSourceOperations sourceOperations; + private final String quoteString; + private final SingleStoreInitialLoadStreamStateManager initialLoadStateManager; + private final List columnNames; + private final AirbyteStreamNameNamespacePair pair; + private final JdbcDatabase database; + // Represents the number of rows to get with each query. + private final long chunkSize; + private final PrimaryKeyInfo pkInfo; + private final boolean isCompositeKeyLoad; + private int numSubqueries = 0; + private AutoCloseableIterator currentIterator; + + SingleStoreInitialLoadRecordIterator(final JdbcDatabase database, + final JdbcCompatibleSourceOperations sourceOperations, + final String quoteString, + final SingleStoreInitialLoadStreamStateManager initialLoadStateManager, + final List columnNames, + final AirbyteStreamNameNamespacePair pair, + final long chunkSize, + final boolean isCompositeKeyLoad) { + this.database = database; + this.sourceOperations = sourceOperations; + this.quoteString = quoteString; + this.initialLoadStateManager = initialLoadStateManager; + this.columnNames = columnNames; + this.pair = pair; + this.chunkSize = chunkSize; + this.pkInfo = initialLoadStateManager.getPrimaryKeyInfo(pair); + this.isCompositeKeyLoad = isCompositeKeyLoad; + } + + @CheckForNull + @Override + protected JsonNode computeNext() { + if (shouldBuildNextSubquery()) { + try { + // We will only issue one query for a composite key load. If we have already processed all the data + // associated with this + // query, we should indicate that we are done processing for the given stream. + if (isCompositeKeyLoad && numSubqueries >= 1) { + return endOfData(); + } + // Previous stream (and connection) must be manually closed in this iterator. + if (currentIterator != null) { + currentIterator.close(); + } + + LOGGER.info("Subquery number : {}", numSubqueries); + final Stream stream = database.unsafeQuery(this::getPkPreparedStatement, sourceOperations::rowToJson); + + currentIterator = AutoCloseableIterators.fromStream(stream, pair); + numSubqueries++; + // If the current subquery has no records associated with it, the entire stream has been read. + if (!currentIterator.hasNext()) { + return endOfData(); + } + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + return currentIterator.next(); + } + + private boolean shouldBuildNextSubquery() { + // The next sub-query should be built if (i) it is the first subquery in the sequence. (ii) the + // previous subquery has finished. + return (currentIterator == null || !currentIterator.hasNext()); + } + + private PreparedStatement getPkPreparedStatement(final Connection connection) { + try { + final String tableName = pair.getName(); + final String schemaName = pair.getNamespace(); + LOGGER.info("Preparing query for table: {}", tableName); + final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, tableName, quoteString); + final String wrappedColumnNames = RelationalDbQueryUtils.enquoteIdentifierList(columnNames, quoteString); + final PrimaryKeyLoadStatus pkLoadStatus = initialLoadStateManager.getPrimaryKeyLoadStatus(pair); + if (pkLoadStatus == null) { + LOGGER.info("pkLoadStatus is null"); + final String quotedCursorField = enquoteIdentifier(pkInfo.pkFieldName(), quoteString); + final String sql; + // We cannot load in chunks for a composite key load, since each field might not have distinct + // values. + if (isCompositeKeyLoad) { + sql = String.format("SELECT %s FROM %s ORDER BY %s", wrappedColumnNames, fullTableName, quotedCursorField); + } else { + sql = String.format("SELECT %s FROM %s ORDER BY %s LIMIT %s", wrappedColumnNames, fullTableName, quotedCursorField, chunkSize); + } + final PreparedStatement preparedStatement = connection.prepareStatement(sql); + LOGGER.info("Executing query for table {}: {}", tableName, preparedStatement); + return preparedStatement; + } else { + LOGGER.info("pkLoadStatus value is : {}", pkLoadStatus.getPkVal()); + final String quotedCursorField = enquoteIdentifier(pkLoadStatus.getPkName(), quoteString); + final String sql; + // We cannot load in chunks for a composite key load, since each field might not have distinct + // values. Furthermore, we have to issue a >= + // query since we may not have processed all of the data associated with the last saved primary key + // value. + if (isCompositeKeyLoad) { + sql = String.format("SELECT %s FROM %s WHERE %s >= ? ORDER BY %s", wrappedColumnNames, fullTableName, quotedCursorField, quotedCursorField); + } else { + // The pk max value could be null - this can happen in the case of empty tables. In this case, we + // can just issue a query + // without any chunking. + if (pkInfo.pkMaxValue() != null) { + sql = String.format("SELECT %s FROM %s WHERE %s > ? AND %s <= ? ORDER BY %s LIMIT %s", wrappedColumnNames, fullTableName, + quotedCursorField, quotedCursorField, quotedCursorField, chunkSize); + } else { + sql = String.format("SELECT %s FROM %s WHERE %s > ? ORDER BY %s", wrappedColumnNames, fullTableName, quotedCursorField, + quotedCursorField); + } + } + final PreparedStatement preparedStatement = connection.prepareStatement(sql); + final SingleStoreType cursorFieldType = pkInfo.fieldType(); + sourceOperations.setCursorField(preparedStatement, 1, cursorFieldType, pkLoadStatus.getPkVal()); + if (!isCompositeKeyLoad && pkInfo.pkMaxValue() != null) { + sourceOperations.setCursorField(preparedStatement, 2, cursorFieldType, pkInfo.pkMaxValue()); + } + LOGGER.info("Executing query for table {}: {}", tableName, preparedStatement); + return preparedStatement; + } + } catch (final SQLException e) { + throw new RuntimeException(e); + } + } + + @Override + public void close() throws Exception { + if (currentIterator != null) { + currentIterator.close(); + } + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadStreamStateManager.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadStreamStateManager.java new file mode 100644 index 000000000000..d63dd8dd6f61 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialLoadStreamStateManager.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore.initialsync; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateMessageProducer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialReadUtil.InitialLoadStreams; +import io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialReadUtil.PrimaryKeyInfo; +import io.airbyte.integrations.source.singlestore.internal.models.InternalModels.StateType; +import io.airbyte.integrations.source.singlestore.internal.models.PrimaryKeyLoadStatus; +import io.airbyte.protocol.models.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.StreamDescriptor; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SingleStoreInitialLoadStreamStateManager implements SourceStateMessageProducer { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreInitialLoadStreamStateManager.class); + public static final String STATE_TYPE_KEY = "state_type"; + public static final String PRIMARY_KEY_STATE_TYPE = "primary_key"; + private Function streamStateForIncrementalRunSupplier; + private final Map pairToPrimaryKeyLoadStatus; + // Map of pair to the primary key info (field name & data type) associated with it. + private final Map pairToPrimaryKeyInfo; + + public SingleStoreInitialLoadStreamStateManager(final InitialLoadStreams initialLoadStreams, + final Map pairToPrimaryKeyInfo) { + this.pairToPrimaryKeyInfo = pairToPrimaryKeyInfo; + this.pairToPrimaryKeyLoadStatus = initPairToPrimaryKeyLoadStatusMap(initialLoadStreams.pairToInitialLoadStatus()); + } + + @Override + public AirbyteStateMessage generateStateMessageAtCheckpoint(final ConfiguredAirbyteStream stream) { + AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + var pkStatus = getPrimaryKeyLoadStatus(pair); + return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(getAirbyteStreamState(pair, Jsons.jsonNode(pkStatus))); + } + + @Override + public AirbyteMessage processRecordMessage(final ConfiguredAirbyteStream stream, final AirbyteMessage message) { + if (Objects.nonNull(message)) { + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + final String pkFieldName = this.getPrimaryKeyInfo(pair).pkFieldName(); + final String lastPk = message.getRecord().getData().get(pkFieldName).asText(); + final PrimaryKeyLoadStatus pkStatus = new PrimaryKeyLoadStatus().withStateType(StateType.PRIMARY_KEY).withPkName(pkFieldName).withPkVal(lastPk) + .withIncrementalState(getIncrementalState(pair)); + pairToPrimaryKeyLoadStatus.put(pair, pkStatus); + } + return message; + } + + @Override + public AirbyteStateMessage createFinalStateMessage(final ConfiguredAirbyteStream stream) { + AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()); + final JsonNode incrementalState = getIncrementalState(pair); + return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(getAirbyteStreamState(pair, incrementalState)); + } + + @Override + public boolean shouldEmitStateMessage(final ConfiguredAirbyteStream stream) { + return true; + } + + void setStreamStateForIncrementalRunSupplier(final Function streamStateForIncrementalRunSupplier) { + this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier; + } + + public PrimaryKeyLoadStatus getPrimaryKeyLoadStatus(final AirbyteStreamNameNamespacePair pair) { + return pairToPrimaryKeyLoadStatus.get(pair); + } + + public PrimaryKeyInfo getPrimaryKeyInfo(final AirbyteStreamNameNamespacePair pair) { + return pairToPrimaryKeyInfo.get(pair); + } + + private JsonNode getIncrementalState(final AirbyteStreamNameNamespacePair pair) { + final PrimaryKeyLoadStatus currentPkLoadStatus = getPrimaryKeyLoadStatus(pair); + return (currentPkLoadStatus == null || currentPkLoadStatus.getIncrementalState() == null) ? streamStateForIncrementalRunSupplier.apply(pair) + : currentPkLoadStatus.getIncrementalState(); + } + + private static AirbyteStreamState getAirbyteStreamState(final AirbyteStreamNameNamespacePair pair, final JsonNode stateData) { + LOGGER.info("STATE DATA FOR {}: {}", pair.getNamespace().concat("_").concat(pair.getName()), stateData); + assert Objects.nonNull(pair.getName()); + assert Objects.nonNull(pair.getNamespace()); + return new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(pair.getName()).withNamespace(pair.getNamespace())) + .withStreamState(stateData); + } + + private static Map initPairToPrimaryKeyLoadStatusMap( + final Map pairToPkStatus) { + final Map map = new HashMap<>(); + pairToPkStatus.forEach((pair, pkStatus) -> { + final AirbyteStreamNameNamespacePair updatedPair = new AirbyteStreamNameNamespacePair(pair.getName(), pair.getNamespace()); + map.put(updatedPair, pkStatus); + }); + return map; + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialReadUtil.java b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialReadUtil.java new file mode 100644 index 000000000000..fe8036a97c1e --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/java/io/airbyte/integrations/source/singlestore/initialsync/SingleStoreInitialReadUtil.java @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore.initialsync; + +import static io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialLoadStreamStateManager.PRIMARY_KEY_STATE_TYPE; +import static io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialLoadStreamStateManager.STATE_TYPE_KEY; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.Sets; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil; +import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; +import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.singlestore.SingleStoreQueryUtils; +import io.airbyte.integrations.source.singlestore.SingleStoreType; +import io.airbyte.integrations.source.singlestore.internal.models.CursorBasedStatus; +import io.airbyte.integrations.source.singlestore.internal.models.PrimaryKeyLoadStatus; +import io.airbyte.protocol.models.CommonField; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.StreamDescriptor; +import io.airbyte.protocol.models.v0.SyncMode; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SingleStoreInitialReadUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreInitialReadUtil.class); + + /** + * Determines the streams to sync for initial primary key load. These include streams that are (i) + * currently in primary key load (ii) newly added incremental streams. + */ + public static InitialLoadStreams streamsForInitialPrimaryKeyLoad(final StateManager stateManager, final ConfiguredAirbyteCatalog fullCatalog) { + + final List rawStateMessages = stateManager.getRawStateMessages(); + final Set streamsStillInPkSync = new HashSet<>(); + final Set alreadySeenStreamPairs = new HashSet<>(); + + // Build a map of stream <-> initial load status for streams that currently have an initial primary + // key load in progress. + final Map pairToInitialLoadStatus = new HashMap<>(); + if (rawStateMessages != null) { + rawStateMessages.forEach(stateMessage -> { + final AirbyteStreamState stream = stateMessage.getStream(); + final JsonNode streamState = stream.getStreamState(); + final StreamDescriptor streamDescriptor = stateMessage.getStream().getStreamDescriptor(); + if (streamState == null || streamDescriptor == null) { + return; + } + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamDescriptor.getName(), streamDescriptor.getNamespace()); + // Build a map of stream <-> initial load status for streams that currently have an initial primary + // key load in progress. + if (streamState.has(STATE_TYPE_KEY)) { + if (streamState.get(STATE_TYPE_KEY).asText().equalsIgnoreCase(PRIMARY_KEY_STATE_TYPE)) { + final PrimaryKeyLoadStatus primaryKeyLoadStatus = Jsons.object(streamState, PrimaryKeyLoadStatus.class); + pairToInitialLoadStatus.put(pair, primaryKeyLoadStatus); + streamsStillInPkSync.add(pair); + } + } + alreadySeenStreamPairs.add(new AirbyteStreamNameNamespacePair(streamDescriptor.getName(), streamDescriptor.getNamespace())); + }); + } + final List streamsForPkSync = new ArrayList<>(); + + fullCatalog.getStreams().stream() + .filter(stream -> streamsStillInPkSync.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream()))).map(Jsons::clone) + .forEach(streamsForPkSync::add); + final List newlyAddedStreams = identifyStreamsToSnapshot(fullCatalog, + Collections.unmodifiableSet(alreadySeenStreamPairs)); + streamsForPkSync.addAll(newlyAddedStreams); + return new InitialLoadStreams( + streamsForPkSync.stream().filter(SingleStoreInitialReadUtil::streamHasPrimaryKey).collect(Collectors.toList()), + pairToInitialLoadStatus); + } + + public static List identifyStreamsToSnapshot( + final ConfiguredAirbyteCatalog catalog, + final Set alreadySyncedStreams) { + final Set allStreams = AirbyteStreamNameNamespacePair.fromConfiguredCatalog( + catalog); + final Set newlyAddedStreams = new HashSet<>( + Sets.difference(allStreams, alreadySyncedStreams)); + return catalog.getStreams().stream().filter(c -> c.getSyncMode() == SyncMode.INCREMENTAL) + .filter(stream -> newlyAddedStreams.contains( + AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream()))) + .map(Jsons::clone) + .collect(Collectors.toList()); + } + + // Build a map of stream <-> primary key info (primary key field name + datatype) for all streams + // currently undergoing initial primary key syncs. + public static Map initPairToPrimaryKeyInfoMap( + final JdbcDatabase database, + final InitialLoadStreams initialLoadStreams, + final Map>> tableNameToTable, + final String quoteString) { + final Map pairToPkInfoMap = new HashMap<>(); + // For every stream that is in primary initial key sync, we want to maintain information about the + // current primary key info associated with the + // stream + initialLoadStreams.streamsForInitialLoad().forEach(stream -> { + final io.airbyte.protocol.models.AirbyteStreamNameNamespacePair pair = new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair( + stream.getStream().getName(), stream.getStream().getNamespace()); + final PrimaryKeyInfo pkInfo = getPrimaryKeyInfo(database, stream, tableNameToTable, quoteString); + pairToPkInfoMap.put(pair, pkInfo); + }); + return pairToPkInfoMap; + } + + // Returns the primary key info associated with the stream. + private static PrimaryKeyInfo getPrimaryKeyInfo(final JdbcDatabase database, + final ConfiguredAirbyteStream stream, + final Map>> tableNameToTable, + final String quoteString) { + // For cursor-based syncs, we cannot always assume a primary key field exists. We need to handle the + // case where it does not exist when we support + // cursor-based syncs. + if (stream.getStream().getSourceDefinedPrimaryKey().size() > 1) { + LOGGER.info("Composite primary key detected for {namespace, stream} : {}, {}", stream.getStream().getNamespace(), stream.getStream().getName()); + } + final String pkFieldName = stream.getStream().getSourceDefinedPrimaryKey().get(0).get(0); + final String fullyQualifiedTableName = DbSourceDiscoverUtil.getFullyQualifiedTableName(stream.getStream().getNamespace(), + (stream.getStream().getName())); + final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); + final SingleStoreType pkFieldType = table.getFields().stream().filter(field -> field.getName().equals(pkFieldName)).findFirst().get().getType(); + + final String pkMaxValue = SingleStoreQueryUtils.getMaxPkValueForStream(database, stream, pkFieldName, quoteString); + return new PrimaryKeyInfo(pkFieldName, pkFieldType, pkMaxValue); + } + + public static AirbyteStreamNameNamespacePair convertNameNamespacePairFromV0( + final io.airbyte.protocol.models.AirbyteStreamNameNamespacePair v1NameNamespacePair) { + return new AirbyteStreamNameNamespacePair(v1NameNamespacePair.getName(), v1NameNamespacePair.getNamespace()); + } + + public static boolean isAnyStreamIncrementalSyncMode(ConfiguredAirbyteCatalog catalog) { + return catalog.getStreams().stream().map(ConfiguredAirbyteStream::getSyncMode).anyMatch(syncMode -> syncMode == SyncMode.INCREMENTAL); + } + + private static boolean streamHasPrimaryKey(final ConfiguredAirbyteStream stream) { + return !stream.getStream().getSourceDefinedPrimaryKey().isEmpty(); + } + + public record InitialLoadStreams(List streamsForInitialLoad, + Map pairToInitialLoadStatus) { + + } + + public record CursorBasedStreams(List streamsForCursorBased, + Map pairToCursorBasedStatus) { + + } + + public record PrimaryKeyInfo(String pkFieldName, SingleStoreType fieldType, String pkMaxValue) { + + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/resources/internal_models/internal_models.yaml b/airbyte-integrations/connectors/source-singlestore/src/main/resources/internal_models/internal_models.yaml new file mode 100644 index 000000000000..929a5a79c77d --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/resources/internal_models/internal_models.yaml @@ -0,0 +1,48 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +title: SingleStore Models +type: object +description: SingleStore Models +properties: + state_type: + "$ref": "#/definitions/StateType" + primary_key_state: + "$ref": "#/definitions/PrimaryKeyLoadStatus" + cursor_based_state: + "$ref": "#/definitions/CursorBasedStatus" +definitions: + StateType: + description: Enum to define the sync mode of state. + type: string + enum: + - cursor_based + - primary_key + CursorBasedStatus: + type: object + extends: + type: object + existingJavaType: "io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState" + properties: + state_type: + "$ref": "#/definitions/StateType" + version: + description: Version of state. + type: integer + PrimaryKeyLoadStatus: + type: object + properties: + version: + description: Version of state. + type: integer + state_type: + "$ref": "#/definitions/StateType" + pk_name: + description: primary key name + type: string + pk_val: + description: primary key watermark + type: string + incremental_state: + description: State to switch to after completion of primary key initial sync + type: object + existingJavaType: com.fasterxml.jackson.databind.JsonNode diff --git a/airbyte-integrations/connectors/source-singlestore/src/main/resources/spec.json b/airbyte-integrations/connectors/source-singlestore/src/main/resources/spec.json new file mode 100644 index 000000000000..ed6cbc374231 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/main/resources/spec.json @@ -0,0 +1,182 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/sources/singlestore", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SingleStore Source Spec", + "type": "object", + "required": ["host", "port", "database", "username", "replication_method"], + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 3306, + "examples": ["3306"], + "order": 1 + }, + "database": { + "title": "Database", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "Username", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "jdbc_url_params": { + "title": "JDBC URL params", + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3). For more information read about JDBC URL parameters.", + "type": "string", + "order": 5 + }, + "ssl_mode": { + "title": "SSL modes", + "description": "SSL connection modes.", + "type": "object", + "order": 7, + "oneOf": [ + { + "title": "disable", + "description": "Do not use SSL/TLS", + "required": ["mode"], + "properties": { + "mode": { + "type": "string", + "const": "disable", + "order": 0 + } + } + }, + { + "title": "required", + "description": "Only use SSL/TLS for encryption. Do not perform certificate or hostname verification. This mode is not safe for production applications.", + "required": ["mode"], + "properties": { + "mode": { + "type": "string", + "const": "required", + "order": 0 + } + } + }, + { + "title": "Verify CA", + "description": "Use SSL/TLS for encryption and perform certificates verification, but do not perform hostname verification.", + "required": ["mode", "ca_certificate"], + "properties": { + "mode": { + "type": "string", + "const": "verify-ca", + "order": 0 + }, + "ca_certificate": { + "type": "string", + "title": "CA certificate", + "description": "CA certificate", + "airbyte_secret": true, + "multiline": true, + "order": 1 + }, + "client_certificate": { + "type": "string", + "title": "Client certificate", + "description": "Client certificate (this is not a required field, but if you want to use it, you will need to add the Client key as well)", + "airbyte_secret": true, + "multiline": true, + "order": 2, + "always_show": true + }, + "client_key": { + "type": "string", + "title": "Client key", + "description": "Client key (this is not a required field, but if you want to use it, you will need to add the Client certificate as well)", + "airbyte_secret": true, + "multiline": true, + "order": 3, + "always_show": true + }, + "client_key_password": { + "type": "string", + "title": "Client key password", + "description": "Password for keystorage. This field is optional. If you do not add it - the password will be generated automatically.", + "airbyte_secret": true, + "order": 4 + } + } + }, + { + "title": "Verify Full", + "description": "Use SSL/TLS for encryption, certificate verification, and hostname verification.", + "required": ["mode", "ca_certificate"], + "properties": { + "mode": { + "type": "string", + "const": "verify-full", + "order": 0 + }, + "ca_certificate": { + "type": "string", + "title": "CA certificate", + "description": "CA certificate", + "airbyte_secret": true, + "multiline": true, + "order": 1 + }, + "client_certificate": { + "type": "string", + "title": "Client certificate", + "description": "Client certificate (this is not a required field, but if you want to use it, you will need to add the Client key as well)", + "airbyte_secret": true, + "multiline": true, + "order": 2, + "always_show": true + }, + "client_key": { + "type": "string", + "title": "Client key", + "description": "Client key (this is not a required field, but if you want to use it, you will need to add the Client certificate as well)", + "airbyte_secret": true, + "multiline": true, + "order": 3, + "always_show": true + }, + "client_key_password": { + "type": "string", + "title": "Client key password", + "description": "Password for keystorage. This field is optional. If you do not add it - the password will be generated automatically.", + "airbyte_secret": true, + "order": 4 + } + } + } + ] + }, + "replication_method": { + "title": "Replication method", + "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. ", + "type": "string", + "order": 8, + "default": "STANDARD", + "enum": ["STANDARD"] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceAcceptanceTest.java new file mode 100644 index 000000000000..d880d693fe93 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceAcceptanceTest.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.Lists; +import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.BaseImage; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.*; +import java.util.HashMap; +import org.junit.jupiter.api.Order; + +@Order(2) +public class SingleStoreSourceAcceptanceTest extends SourceAcceptanceTest { + + protected SingleStoreTestDatabase testdb; + + private static final String STREAM_NAME = "id_and_name"; + private static final String STREAM_NAME2 = "public.starships"; + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { + testdb = createDatabase().with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));").with( + "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));").with( + "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); + } + + protected SingleStoreTestDatabase createDatabase() { + return SingleStoreTestDatabase.in(BaseImage.SINGLESTORE_DEV); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } + + @Override + protected String getImageName() { + return "airbyte/source-singlestore:dev"; + } + + @Override + protected ConnectorSpecification getSpec() throws Exception { + return Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder().withStandardReplication().build(); + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME, testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME2, testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))))); + } + + @Override + protected JsonNode getState() { + return Jsons.jsonNode(new HashMap<>()); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceDatatypeTest.java b/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceDatatypeTest.java new file mode 100644 index 000000000000..304676c83f77 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceDatatypeTest.java @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; +import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.BaseImage; +import io.airbyte.protocol.models.JsonSchemaType; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.Order; + +@Order(1) +public class SingleStoreSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { + + protected SingleStoreTestDatabase testdb; + + @Override + protected String getNameSpace() { + return testdb.getDatabaseName(); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } + + @Override + protected String getImageName() { + return "airbyte/source-singlestore:dev"; + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder().withStandardReplication().build(); + } + + @Override + protected Database setupDatabase() throws Exception { + testdb = SingleStoreTestDatabase.in(BaseImage.SINGLESTORE_DEV); + return testdb.getDatabase(); + } + + @Override + protected void initTests() { + addDataTypeTestData( + TestDataHolder.builder().sourceType("bit").airbyteType(JsonSchemaType.STRING_BASE_64) + // 1000001 is binary for A + .addInsertValues("null", "b'1100101'").addExpectedValues(null, "AAAAAAAAAGU=").build()); + + // tinyint without width + addDataTypeTestData( + TestDataHolder.builder().sourceType("tinyint").airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "-128", "127").addExpectedValues(null, "-128", "127").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("tinyint").fullSourceDataType("tinyint(1) unsigned") + .airbyteType(JsonSchemaType.INTEGER).addInsertValues("null", "0", "1", "2", "3") + .addExpectedValues(null, "0", "1", "2", "3").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("tinyint").fullSourceDataType("tinyint(2)") + .airbyteType(JsonSchemaType.INTEGER).addInsertValues("null", "-128", "127") + .addExpectedValues(null, "-128", "127").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("smallint").airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "-32768", "32767").addExpectedValues(null, "-32768", "32767") + .build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("smallint").airbyteType(JsonSchemaType.INTEGER) + .fullSourceDataType("smallint").addInsertValues("1").addExpectedValues("1").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("smallint").airbyteType(JsonSchemaType.INTEGER) + .fullSourceDataType("smallint unsigned").addInsertValues("null", "0", "65535") + .addExpectedValues(null, "0", "65535").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("mediumint").airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "-8388608", "8388607") + .addExpectedValues(null, "-8388608", "8388607").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("mediumint").airbyteType(JsonSchemaType.INTEGER) + .fullSourceDataType("mediumint").addInsertValues("1").addExpectedValues("1").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("int").airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "-2147483648", "2147483647") + .addExpectedValues(null, "-2147483648", "2147483647").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("int").airbyteType(JsonSchemaType.INTEGER) + .fullSourceDataType("int unsigned").addInsertValues("3428724653") + .addExpectedValues("3428724653").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("int").airbyteType(JsonSchemaType.INTEGER) + .fullSourceDataType("int").addInsertValues("1").addExpectedValues("1").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("bigint").airbyteType(JsonSchemaType.INTEGER) + .addInsertValues("null", "9223372036854775807") + .addExpectedValues(null, "9223372036854775807").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("float").airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("null", "10.5").addExpectedValues(null, "10.5").build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("double").airbyteType(JsonSchemaType.NUMBER) + .addInsertValues("null", "power(10, 308)", "1/power(10, 45)", "10.5") + .addExpectedValues(null, String.valueOf(Math.pow(10, 308)), + String.valueOf(1 / Math.pow(10, 45)), "10.5") + .build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("decimal").airbyteType(JsonSchemaType.NUMBER) + .fullSourceDataType("decimal(10,3)").addInsertValues("0.188", "null") + .addExpectedValues("0.188", null).build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("decimal").airbyteType(JsonSchemaType.INTEGER) + .fullSourceDataType("decimal(32,0)").addInsertValues("1700000.01", "123") + .addExpectedValues("1700000", "123").build()); + + for (final String type : Set.of("date", "date not null default '0000-00-00'")) { + addDataTypeTestData(TestDataHolder.builder().sourceType("date").fullSourceDataType(type) + .airbyteType(JsonSchemaType.STRING_DATE) + .addInsertValues("'1999-01-08'", "'2021-01-01'", "'2022/11/12'", "'1987.12.01'") + .addExpectedValues("1999-01-08", "2021-01-01", "2022-11-12", "1987-12-01").build()); + } + + addDataTypeTestData( + TestDataHolder.builder().sourceType("date").airbyteType(JsonSchemaType.STRING_DATE) + .addInsertValues("null").addExpectedValues((String) null).build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("date").airbyteType(JsonSchemaType.STRING_DATE) + .addInsertValues("0000-00-00").addExpectedValues("0000-00-00").build()); + + for (final String fullSourceType : Set.of("datetime", "datetime not null default now()")) { + addDataTypeTestData( + TestDataHolder.builder().sourceType("datetime").fullSourceDataType(fullSourceType) + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE) + .addInsertValues("'1000-01-01 00:00:01'", "'2005-10-10 23:22:21'", + "'2013-09-05T10:10:02'", "'2013-09-06T10:10:02'", "'9999-12-31 23:59:59'") + .addExpectedValues("1000-01-01T00:00:01", "2005-10-10T23:22:21", + "2013-09-05T10:10:02", "2013-09-06T10:10:02", "9999-12-31T23:59:59") + .build()); + } + + for (final String fullSourceType : Set.of("datetime(6)", + "datetime(6) not null default now(6)")) { + addDataTypeTestData( + TestDataHolder.builder().sourceType("datetime").fullSourceDataType(fullSourceType) + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE) + .addInsertValues("'1000-01-01 00:00:00.000001'", "'9999-12-31 23:59:59.999999'") + .addExpectedValues("1000-01-01T00:00:00.000001", "9999-12-31T23:59:59.999999") + .build()); + } + + addDataTypeTestData(TestDataHolder.builder().sourceType("datetime") + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE).addInsertValues("null") + .addExpectedValues((String) null).build()); + + for (final String fullSourceType : Set.of("timestamp", "timestamp not null default now()")) { + addDataTypeTestData( + TestDataHolder.builder().sourceType("timestamp").fullSourceDataType(fullSourceType) + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE) + .addInsertValues("'1970-01-01 00:00:01'", "'2038-01-19 03:14:07'") + .addExpectedValues("1970-01-01T00:00:01", "2038-01-19T03:14:07").build()); + } + + for (final String fullSourceType : Set.of("timestamp(6)", + "timestamp(6) not null default now(6)")) { + addDataTypeTestData( + TestDataHolder.builder().sourceType("timestamp").fullSourceDataType(fullSourceType) + .airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE) + .addInsertValues("'1970-01-01 00:00:01.000001'", "'2038-01-19 03:14:07.999999'") + .addExpectedValues("1970-01-01T00:00:01.000001", "2038-01-19T03:14:07.999999") + .build()); + } + + for (final String fullSourceType : Set.of("time", "time not null default '00:00:00'")) { + addDataTypeTestData( + TestDataHolder.builder().sourceType("time").fullSourceDataType(fullSourceType) + .airbyteType(JsonSchemaType.STRING) + // JDBC driver can process only "clock"(00:00:00-23:59:59) values. + .addInsertValues("'-838:59:59'", "'838:59:59'", "'00:00:00'") + .addExpectedValues("-838:59:59", "838:59:59", "00:00:00").build()); + } + + for (final String fullSourceType : Set.of("time(6)", + "time(6) not null default '00:00:00.000000'")) { + addDataTypeTestData( + TestDataHolder.builder().sourceType("time").fullSourceDataType(fullSourceType) + .airbyteType(JsonSchemaType.STRING) + .addInsertValues("'-838:59:59.000000'", "'837:59:59.999999'", "'00:00:00.000000'") + .addExpectedValues("-838:59:59.000000", "837:59:59.999999", "00:00:00.000000") + .build()); + } + + addDataTypeTestData( + TestDataHolder.builder().sourceType("time").airbyteType(JsonSchemaType.STRING) + // JDBC driver can process only "clock"(00:00:00-23:59:59) values. + .addInsertValues("null").addExpectedValues((String) null).build()); + + addDataTypeTestData( + TestDataHolder.builder().sourceType("year").airbyteType(JsonSchemaType.INTEGER) + // S2 converts values in the ranges '0' - '69' to YEAR value in the range 2000 - 2069 + // and '70' - '99' to 1970 - 1999. + .addInsertValues("null", "'1997'", "'0'", "'50'", "'70'", "'80'", "'99'", "'00'", + "'000'") + .addExpectedValues(null, "1997", "2000", "2050", "1970", "1980", "1999", "2000", "2000") + .build()); + // char types can be string or binary, so they are tested separately + final Set charTypes = Stream.of(SingleStoreType.CHAR, SingleStoreType.VARCHAR) + .map(Enum::name).collect(Collectors.toSet()); + for (final String charType : charTypes) { + addDataTypeTestData( + TestDataHolder.builder().sourceType(charType).airbyteType(JsonSchemaType.STRING) + .fullSourceDataType(charType + "(63)") + .addInsertValues("null", "'Airbyte'", "'!\"#$%&\\'()*+,-./:;<=>?\\@[\\]^_\\`|~'") + .addExpectedValues(null, "Airbyte", "!\"#$%&'()*+,-./:;<=>?@[]^_`|~").build()); + } + final Set blobTypes = Stream.of(SingleStoreType.TINYBLOB, SingleStoreType.BLOB, + SingleStoreType.MEDIUMBLOB, SingleStoreType.LONGBLOB).map(Enum::name) + .collect(Collectors.toSet()); + for (final String blobType : blobTypes) { + addDataTypeTestData( + TestDataHolder.builder().sourceType(blobType).airbyteType(JsonSchemaType.STRING_BASE_64) + .addInsertValues("null", "'Airbyte'").addExpectedValues(null, "QWlyYnl0ZQ==") + .build()); + } + // binary appends '\0' to the end of the string + addDataTypeTestData(TestDataHolder.builder().sourceType(SingleStoreType.BINARY.name()) + .fullSourceDataType(SingleStoreType.BINARY.name() + "(10)") + .airbyteType(JsonSchemaType.STRING_BASE_64).addInsertValues("null", "'Airbyte'") + .addExpectedValues(null, "QWlyYnl0ZQAAAA==").build()); + // varbinary does not append '\0' to the end of the string + addDataTypeTestData(TestDataHolder.builder().sourceType(SingleStoreType.VARBINARY.name()) + .fullSourceDataType(SingleStoreType.VARBINARY.name() + "(10)") + .airbyteType(JsonSchemaType.STRING_BASE_64).addInsertValues("null", "'Airbyte'") + .addExpectedValues(null, "QWlyYnl0ZQ==").build()); + + final Set textTypes = Stream.of(SingleStoreType.TINYTEXT, SingleStoreType.TEXT, + SingleStoreType.MEDIUMTEXT, SingleStoreType.LONGTEXT).map(Enum::name) + .collect(Collectors.toSet()); + final String randomText = RandomStringUtils.random(50, true, true); + for (final String textType : textTypes) { + addDataTypeTestData( + TestDataHolder.builder().sourceType(textType).airbyteType(JsonSchemaType.STRING) + .addInsertValues("null", "'Airbyte'", String.format("'%s'", randomText)) + .addExpectedValues(null, "Airbyte", randomText).build()); + } + addDataTypeTestData( + TestDataHolder.builder().sourceType("mediumtext").airbyteType(JsonSchemaType.STRING) + .addInsertValues(getLogString(1048000), "'test'") + .addExpectedValues(StringUtils.leftPad("0", 1048000, "0"), "test").build()); + addDataTypeTestData( + TestDataHolder.builder().sourceType("json").airbyteType(JsonSchemaType.STRING) + .addInsertValues("null", "'{\"a\": 10, \"b\": 15}'", "'{\"fóo\": \"bär\"}'", + "'{\"春江潮水连海平\":\"海上明月共潮生\"}'") + .addExpectedValues(null, "{\"a\":10,\"b\":15}", "{\"fóo\":\"bär\"}", + "{\"春江潮水连海平\":\"海上明月共潮生\"}") + .build()); + addDataTypeTestData(TestDataHolder.builder().sourceType("enum") + .fullSourceDataType("ENUM('xs', 's', 'm', 'l', 'xl')").airbyteType(JsonSchemaType.STRING) + .addInsertValues("null", "'xs'", "'m'").addExpectedValues(null, "xs", "m").build()); + addDataTypeTestData(TestDataHolder.builder().sourceType("set") + .fullSourceDataType("SET('xs', 's', 'm', 'l', 'xl')").airbyteType(JsonSchemaType.STRING) + .addInsertValues("null", "'xs,s'", "'m,xl'").addExpectedValues(null, "xs,s", "m,xl") + .build()); + addDataTypeTestData( + TestDataHolder.builder().sourceType("decimal").airbyteType(JsonSchemaType.NUMBER) + .fullSourceDataType("decimal(19,2)").addInsertValues("1700000.01", "'123'") + .addExpectedValues("1700000.01", "123.0").build()); + addDataTypeTestData( + TestDataHolder.builder().sourceType("GEOGRAPHYPOINT").airbyteType(JsonSchemaType.STRING) + .addInsertValues("'POINT(1.5 1.5)'").addExpectedValues("POINT(1.50000003 1.50000000)") + .build()); + } + + private String getLogString(final int length) { + final int maxLpadLength = 262144; + final StringBuilder stringBuilder = new StringBuilder("concat("); + final int fullChunks = length / maxLpadLength; + stringBuilder.append("lpad('0', 262144, '0'),".repeat(fullChunks)); + stringBuilder.append("lpad('0', ").append(length % maxLpadLength).append(", '0'))"); + return stringBuilder.toString(); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSslSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSslSourceAcceptanceTest.java new file mode 100644 index 000000000000..3e93f2167775 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test-integration/java/io/airbyte/integrations/source/singlestore/SingleStoreSslSourceAcceptanceTest.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.BaseImage; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.ContainerModifier; +import org.junit.jupiter.api.Order; + +@Order(3) +public class SingleStoreSslSourceAcceptanceTest extends SingleStoreSourceAcceptanceTest { + + @Override + protected SingleStoreTestDatabase createDatabase() { + return SingleStoreTestDatabase.in(BaseImage.SINGLESTORE_DEV, ContainerModifier.CERT); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder().withStandardReplication() + .with(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode( + ImmutableMap.of(JdbcUtils.MODE_KEY, "verify-ca", "ca_certificate", + testdb.getCertificates().caCertificate()))) + .build(); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/dummy_config.json b/airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/dummy_config.json new file mode 100644 index 000000000000..483d12bc3cd1 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/dummy_config.json @@ -0,0 +1,7 @@ +{ + "host": "default", + "port": 5555, + "database": "default", + "username": "default", + "replication_method": "STANDARD" +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/expected_spec.json b/airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/expected_spec.json new file mode 100644 index 000000000000..c53aec5a9824 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test-integration/resources/expected_spec.json @@ -0,0 +1,185 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/sources/singlestore", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SingleStore Source Spec", + "type": "object", + "required": ["host", "port", "database", "username", "replication_method"], + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 3306, + "examples": ["3306"], + "order": 1 + }, + "database": { + "title": "Database", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "Username", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "jdbc_url_params": { + "title": "JDBC URL params", + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3). For more information read about JDBC URL parameters.", + "type": "string", + "order": 5 + }, + "ssl_mode": { + "title": "SSL modes", + "description": "SSL connection modes.", + "type": "object", + "order": 7, + "oneOf": [ + { + "title": "disable", + "description": "Do not use SSL/TLS", + "required": ["mode"], + "properties": { + "mode": { + "type": "string", + "const": "disable", + "order": 0 + } + } + }, + { + "title": "required", + "description": "Only use SSL/TLS for encryption. Do not perform certificate or hostname verification. This mode is not safe for production applications.", + "required": ["mode"], + "properties": { + "mode": { + "type": "string", + "const": "required", + "order": 0 + } + } + }, + { + "title": "Verify CA", + "description": "Use SSL/TLS for encryption and perform certificates verification, but do not perform hostname verification.", + "required": ["mode", "ca_certificate"], + "properties": { + "mode": { + "type": "string", + "const": "verify-ca", + "order": 0 + }, + "ca_certificate": { + "type": "string", + "title": "CA certificate", + "description": "CA certificate", + "airbyte_secret": true, + "multiline": true, + "order": 1 + }, + "client_certificate": { + "type": "string", + "title": "Client certificate", + "description": "Client certificate (this is not a required field, but if you want to use it, you will need to add the Client key as well)", + "airbyte_secret": true, + "multiline": true, + "order": 2, + "always_show": true + }, + "client_key": { + "type": "string", + "title": "Client key", + "description": "Client key (this is not a required field, but if you want to use it, you will need to add the Client certificate as well)", + "airbyte_secret": true, + "multiline": true, + "order": 3, + "always_show": true + }, + "client_key_password": { + "type": "string", + "title": "Client key password", + "description": "Password for keystorage. This field is optional. If you do not add it - the password will be generated automatically.", + "airbyte_secret": true, + "order": 4 + } + } + }, + { + "title": "Verify Full", + "description": "Use SSL/TLS for encryption, certificate verification, and hostname verification.", + "required": ["mode", "ca_certificate"], + "properties": { + "mode": { + "type": "string", + "const": "verify-full", + "order": 0 + }, + "ca_certificate": { + "type": "string", + "title": "CA certificate", + "description": "CA certificate", + "airbyte_secret": true, + "multiline": true, + "order": 1 + }, + "client_certificate": { + "type": "string", + "title": "Client certificate", + "description": "Client certificate (this is not a required field, but if you want to use it, you will need to add the Client key as well)", + "airbyte_secret": true, + "multiline": true, + "order": 2, + "always_show": true + }, + "client_key": { + "type": "string", + "title": "Client key", + "description": "Client key (this is not a required field, but if you want to use it, you will need to add the Client certificate as well)", + "airbyte_secret": true, + "multiline": true, + "order": 3, + "always_show": true + }, + "client_key_password": { + "type": "string", + "title": "Client key password", + "description": "Password for keystorage. This field is optional. If you do not add it - the password will be generated automatically.", + "airbyte_secret": true, + "order": 4 + } + } + } + ] + }, + "replication_method": { + "title": "Replication method", + "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. ", + "type": "string", + "order": 8, + "default": "STANDARD", + "enum": ["STANDARD"] + } + } + }, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": [] +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreJdbcSourceAcceptanceTest.java new file mode 100644 index 000000000000..1e2f54d566f3 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreJdbcSourceAcceptanceTest.java @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; +import static io.airbyte.integrations.source.singlestore.initialsync.SingleStoreInitialLoadStreamStateManager.STATE_TYPE_KEY; +import static java.util.stream.Collectors.toList; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; +import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.BaseImage; +import io.airbyte.integrations.source.singlestore.internal.models.CursorBasedStatus; +import io.airbyte.integrations.source.singlestore.internal.models.InternalModels.StateType; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteCatalog; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteMessage.Type; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.v0.AirbyteStateStats; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.AirbyteStreamState; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import io.airbyte.protocol.models.v0.StreamDescriptor; +import io.airbyte.protocol.models.v0.SyncMode; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; + +@Order(2) +class SingleStoreJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { + + protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; + protected static final String PASSWORD_WITHOUT_PERMISSION = "new_password"; + + @Override + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1000"); + } + + @Override + protected JsonNode config() { + return testdb.testConfigBuilder().build(); + } + + @Override + protected SingleStoreSource source() { + return new SingleStoreSource(); + } + + @Override + protected SingleStoreTestDatabase createTestDatabase() { + return SingleStoreTestDatabase.in(BaseImage.SINGLESTORE_DEV); + } + + @Override + public boolean supportsSchemas() { + return false; + } + + @Test + @Override + protected void testReadMultipleTablesIncrementally() throws Exception { + final var config = config(); + ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1); + final String streamOneName = TABLE_NAME + "one"; + // Create a fresh first table + testdb.with("CREATE TABLE %s (id int PRIMARY KEY, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL);", streamOneName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", getFullyQualifiedTableName(streamOneName)); + + // Create a fresh second table + final String streamTwoName = TABLE_NAME + "two"; + final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); + // Insert records into second table + testdb.with("CREATE TABLE %s (id int PRIMARY KEY, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL);", streamTwoName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (40,'Jean Luc','2006-10-19')", streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')", streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')", streamTwoFullyQualifiedName); + + // Create records list that we expect to see in the state message + final List streamTwoExpectedRecords = Arrays.asList( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(COL_ID, 40, COL_NAME, "Jean Luc", COL_UPDATED_AT, "2006-10-19")), + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(COL_ID, 41, COL_NAME, "Groot", COL_UPDATED_AT, "2006-10-19")), + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(COL_ID, 42, COL_NAME, "Thanos", COL_UPDATED_AT, "2006-10-19"))); + + // Prep and create a configured catalog to perform sync + final AirbyteStream streamOne = getAirbyteStream(streamOneName, getDefaultNamespace()); + final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, getDefaultNamespace()); + + final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( + new AirbyteCatalog().withStreams(List.of(streamOne, streamTwo))); + configuredCatalog.getStreams().forEach(airbyteStream -> { + airbyteStream.setSyncMode(SyncMode.INCREMENTAL); + airbyteStream.setCursorField(List.of(COL_ID)); + airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); + airbyteStream.withPrimaryKey(List.of(List.of(COL_ID))); + }); + + // Perform initial sync + final List messagesFromFirstSync = MoreIterators.toList(source().read(config, configuredCatalog, null)); + final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); + + setEmittedAtToNull(messagesFromFirstSync); + // All records in the 2 configured streams should be present + assertThat(filterRecords(recordsFromFirstSync)).containsExactlyElementsOf( + Stream.concat(getTestMessages(streamOneName).stream().parallel(), streamTwoExpectedRecords.stream().parallel()).collect(toList())); + + final List actualFirstSyncState = extractStateMessage(messagesFromFirstSync); + // Since we are emitting a state message after each record, we should have 1 state for each record - + // 3 from stream1 and 3 from stream2 + assertEquals(6, actualFirstSyncState.size()); + + // The expected state type should be 2 primaryKey's and the last one being standard + final List expectedStateTypesFromFirstSync = List.of("primary_key", "primary_key", "cursor_based"); + final List stateTypeOfStreamOneStatesFromFirstSync = extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, + STATE_TYPE_KEY); + final List stateTypeOfStreamTwoStatesFromFirstSync = extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamTwoName, + STATE_TYPE_KEY); + // It should be the same for stream1 and stream2 + assertEquals(stateTypeOfStreamOneStatesFromFirstSync, expectedStateTypesFromFirstSync); + assertEquals(stateTypeOfStreamTwoStatesFromFirstSync, expectedStateTypesFromFirstSync); + + // Create the expected primaryKeys that we should see + final List expectedPrimaryKeysFromFirstSync = List.of("1", "2"); + final List primaryKeyFromStreamOneStatesFromFirstSync = extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, + "pk_val"); + final List primaryKeyFromStreamTwoStatesFromFirstSync = extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, + "pk_val"); + + // Verifying each element and its index to match. + // Only checking the first 2 elements since we have verified that the last state_type is + // "cursor_based" + assertEquals(primaryKeyFromStreamOneStatesFromFirstSync.get(0), expectedPrimaryKeysFromFirstSync.get(0)); + assertEquals(primaryKeyFromStreamOneStatesFromFirstSync.get(1), expectedPrimaryKeysFromFirstSync.get(1)); + assertEquals(primaryKeyFromStreamTwoStatesFromFirstSync.get(0), expectedPrimaryKeysFromFirstSync.get(0)); + assertEquals(primaryKeyFromStreamTwoStatesFromFirstSync.get(1), expectedPrimaryKeysFromFirstSync.get(1)); + + // Extract only state messages for each stream + final List streamOneStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamOneName); + final List streamTwoStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamTwoName); + + // Extract the incremental states of each stream's first and second state message + final List streamOneIncrementalStatesFromFirstSync = List.of( + streamOneStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"), + streamOneStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state")); + final JsonNode streamOneFinalStreamStateFromFirstSync = streamOneStateMessagesFromFirstSync.get(2).getStream().getStreamState(); + + final List streamTwoIncrementalStatesFromFirstSync = List.of( + streamTwoStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"), + streamTwoStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state")); + final JsonNode streamTwoFinalStreamStateFromFirstSync = streamTwoStateMessagesFromFirstSync.get(2).getStream().getStreamState(); + + // The incremental_state of each stream's first and second incremental states is expected + // to be identical to the stream_state of the final state message for each stream + assertEquals(streamOneIncrementalStatesFromFirstSync.get(0), streamOneFinalStreamStateFromFirstSync); + assertEquals(streamOneIncrementalStatesFromFirstSync.get(1), streamOneFinalStreamStateFromFirstSync); + assertEquals(streamTwoIncrementalStatesFromFirstSync.get(0), streamTwoFinalStreamStateFromFirstSync); + assertEquals(streamTwoIncrementalStatesFromFirstSync.get(1), streamTwoFinalStreamStateFromFirstSync); + + // Sync should work with a primaryKey state AND a cursor-based state from each stream + // Forcing a sync with + // - stream one state still being the first record read via Primary Key. + // - stream two state being the Primary Key state before the final emitted state before the cursor + // switch + final List messagesFromSecondSyncWithMixedStates = MoreIterators.toList(source().read(config, configuredCatalog, + Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), streamTwoStateMessagesFromFirstSync.get(1))))); + + // Extract only state messages for each stream after second sync + final List streamOneStateMessagesFromSecondSync = extractStateMessage(messagesFromSecondSyncWithMixedStates, streamOneName); + final List stateTypeOfStreamOneStatesFromSecondSync = extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, + streamOneName, STATE_TYPE_KEY); + + final List streamTwoStateMessagesFromSecondSync = extractStateMessage(messagesFromSecondSyncWithMixedStates, streamTwoName); + final List stateTypeOfStreamTwoStatesFromSecondSync = extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, + streamTwoName, STATE_TYPE_KEY); + + // Stream One states after the second sync are expected to have 2 stream states + // - 1 with PrimaryKey state_type and 1 state that is of cursorBased state type + assertEquals(2, streamOneStateMessagesFromSecondSync.size()); + assertEquals(List.of("primary_key", "cursor_based"), stateTypeOfStreamOneStatesFromSecondSync); + + // Stream Two states after the second sync are expected to have 1 stream state + // - The state that is of cursorBased state type + assertEquals(1, streamTwoStateMessagesFromSecondSync.size()); + assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromSecondSync); + + // Add some data to each table and perform a third read. + // Expect to see all records be synced via cursorBased method and not primaryKey + testdb.with("INSERT INTO %s(id, name, updated_at) VALUES (4,'Hooper','2006-10-19')", getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')", streamTwoFullyQualifiedName); + + final List messagesFromThirdSync = MoreIterators.toList(source().read(config, configuredCatalog, + Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), streamTwoStateMessagesFromSecondSync.get(0))))); + + // Extract only state messages, state type, and cursor for each stream after second sync + final List streamOneStateMessagesFromThirdSync = extractStateMessage(messagesFromThirdSync, streamOneName); + final List stateTypeOfStreamOneStatesFromThirdSync = extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, + STATE_TYPE_KEY); + final List cursorOfStreamOneStatesFromThirdSync = extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, + "cursor"); + + final List streamTwoStateMessagesFromThirdSync = extractStateMessage(messagesFromThirdSync, streamTwoName); + final List stateTypeOfStreamTwoStatesFromThirdSync = extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, + STATE_TYPE_KEY); + final List cursorOfStreamTwoStatesFromThirdSync = extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, + "cursor"); + + // Both streams should now be synced via standard cursor and have updated max cursor values + // cursor: 4 for stream one + // cursor: 43 for stream two + assertEquals(1, streamOneStateMessagesFromThirdSync.size()); + assertEquals(List.of("cursor_based"), stateTypeOfStreamOneStatesFromThirdSync); + assertEquals(List.of("4"), cursorOfStreamOneStatesFromThirdSync); + + assertEquals(1, streamTwoStateMessagesFromThirdSync.size()); + assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromThirdSync); + assertEquals(List.of("43"), cursorOfStreamTwoStatesFromThirdSync); + } + + @Test + void testCheckIncorrectPasswordFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); + final AirbyteConnectionStatus status = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 28000; Error code: 1045;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectUsernameFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); + final AirbyteConnectionStatus status = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 28000; Error code: 1045;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectHostFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); + final AirbyteConnectionStatus status = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 08000;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectPortFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000"); + final AirbyteConnectionStatus status = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 08000;"), status.getMessage()); + } + + @Test + public void testCheckIncorrectDataBaseFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); + final AirbyteConnectionStatus status = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 42000; Error code: 1044;"), status.getMessage()); + } + + @Test + public void testUserHasNoPermissionToDataBase() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); + final String usernameWithoutPermission = testdb.withNamespace(USERNAME_WITHOUT_PERMISSION); + testdb.singlestoreCmd( + Stream.of(String.format("CREATE USER '%s'@'%%' IDENTIFIED BY '%s';", usernameWithoutPermission, PASSWORD_WITHOUT_PERMISSION))).forEach(c -> { + try { + testdb.getContainer().execInContainer(c); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, usernameWithoutPermission); + ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION); + final AirbyteConnectionStatus status = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); + assertTrue(status.getMessage().contains("State code: 28000; Error code: 1045;"), status.getMessage()); + } + + @Override + protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configuredAirbyteStream, + final String cursorField, + final String cursorValue) { + return new CursorBasedStatus().withStateType(StateType.CURSOR_BASED).withStreamName(configuredAirbyteStream.getStream().getName()) + .withStreamNamespace(configuredAirbyteStream.getStream().getNamespace()).withCursorField(List.of(cursorField)).withCursor(cursorValue) + .withCursorRecordCount(1L); + } + + @Override + protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { + final List expectedMessages = new ArrayList<>(); + expectedMessages.add(new AirbyteMessage().withType(Type.RECORD).withRecord( + new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) + .withData(Jsons.jsonNode(ImmutableMap.of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19"))))); + expectedMessages.add(new AirbyteMessage().withType(Type.RECORD).withRecord( + new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) + .withData(Jsons.jsonNode(ImmutableMap.of(COL_ID, ID_VALUE_5, COL_NAME, "data", COL_UPDATED_AT, "2006-10-19"))))); + final DbStreamState state = new CursorBasedStatus().withStateType(StateType.CURSOR_BASED).withStreamName(streamName()) + .withStreamNamespace(namespace).withCursorField(ImmutableList.of(COL_ID)).withCursor("5").withCursorRecordCount(1L); + + expectedMessages.addAll(createExpectedTestMessages(List.of(state), 2L)); + return expectedMessages; + } + + @Override + protected List getTestMessages() { + return getTestMessages(streamName()); + } + + protected List getTestMessages(final String streamName) { + return List.of(new AirbyteMessage().withType(Type.RECORD).withRecord( + new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withData(Jsons.jsonNode(Map.of(COL_ID, ID_VALUE_1, COL_NAME, "picard", COL_UPDATED_AT, "2004-10-19")))), + new AirbyteMessage().withType(Type.RECORD).withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withData(Jsons.jsonNode(Map.of(COL_ID, ID_VALUE_2, COL_NAME, "crusher", COL_UPDATED_AT, "2005-10-19")))), + new AirbyteMessage().withType(Type.RECORD).withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withData(Jsons.jsonNode(Map.of(COL_ID, ID_VALUE_3, COL_NAME, "vash", COL_UPDATED_AT, "2006-10-19"))))); + } + + private AirbyteStream getAirbyteStream(final String tableName, final String namespace) { + return CatalogHelpers.createAirbyteStream(tableName, namespace, Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))); + } + + @Override + protected AirbyteCatalog getCatalog(final String defaultNamespace) { + return new AirbyteCatalog().withStreams(Lists.newArrayList( + CatalogHelpers.createAirbyteStream(TABLE_NAME, defaultNamespace, Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))), + CatalogHelpers.createAirbyteStream(TABLE_NAME_WITHOUT_PK, defaultNamespace, Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(Collections.emptyList()), + CatalogHelpers.createAirbyteStream(TABLE_NAME_COMPOSITE_PK, defaultNamespace, Field.of(COL_FIRST_NAME, JsonSchemaType.STRING), + Field.of(COL_LAST_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME))))); + } + + @Override + protected List createExpectedTestMessages(final List states, final long numRecords) { + return states.stream().map(s -> new AirbyteMessage().withType(Type.STATE).withState(new AirbyteStateMessage().withType(AirbyteStateType.STREAM) + .withStream( + new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s))) + .withSourceStats(new AirbyteStateStats().withRecordCount((double) numRecords)))) + .collect(Collectors.toList()); + } + + @Override + protected List createState(final List states) { + return states.stream().map(s -> new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream( + new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s)))) + .collect(Collectors.toList()); + } + + @Override + protected JsonNode getStateData(final AirbyteMessage airbyteMessage, final String streamName) { + final JsonNode streamState = airbyteMessage.getState().getStream().getStreamState(); + if (streamState.get("stream_name").asText().equals(streamName)) { + return streamState; + } + + throw new IllegalArgumentException("Stream not found in state message: " + streamName); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperationsTest.java b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperationsTest.java new file mode 100644 index 000000000000..6c41e4c8e6ea --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceOperationsTest.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static org.testcontainers.shaded.org.hamcrest.MatcherAssert.assertThat; +import static org.testcontainers.shaded.org.hamcrest.Matchers.containsInAnyOrder; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.db.jdbc.DateTimeConverter; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.BaseImage; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; +import java.util.function.IntFunction; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; + +@Order(1) +public class SingleStoreSourceOperationsTest { + + private final SingleStoreSourceOperations sqlSourceOperations = new SingleStoreSourceOperations(); + + @Test + public void varcharAsCursor() throws SQLException { + testImpl("VARCHAR(30)", i -> "test" + i, v -> v, v -> v, SingleStoreType.VARCHAR, "test1"); + } + + @Test + public void dateColumnAsCursor() throws SQLException { + testImpl("DATE", i -> LocalDate.of(2019, 1, i), DateTimeConverter::convertToDate, + LocalDate::toString, SingleStoreType.DATE, + DateTimeConverter.convertToDate(LocalDate.of(2019, 1, 1))); + } + + @Test + public void timeColumnAsCursor() throws SQLException { + testImpl("TIME", i -> "20:0" + i + ":00", i -> i, + i -> i, SingleStoreType.TIME, + "20:01:00"); + } + + @Test + public void dateTimeColumnAsCursor() throws SQLException { + testImpl("DATETIME", i -> LocalDateTime.of(2019, i, 20, 3, 0, 0), + DateTimeConverter::convertToTimestamp, LocalDateTime::toString, SingleStoreType.DATETIME, + DateTimeConverter.convertToTimestamp(LocalDateTime.of(2019, 1, 20, 3, 0, 0))); + } + + @Test + public void timeStampColumnAsCursor() throws SQLException { + testImpl("TIMESTAMP", i -> LocalDateTime.of(2019, i, 20, 3, 0, 0), + DateTimeConverter::convertToTimestamp, LocalDateTime::toString, SingleStoreType.DATETIME, + DateTimeConverter.convertToTimestamp(LocalDateTime.of(2019, 1, 20, 3, 0, 0))); + testImpl("TIMESTAMP(6)", i -> LocalDateTime.of(2019, i, 20, 3, 0, 0), + DateTimeConverter::convertToTimestamp, LocalDateTime::toString, SingleStoreType.DATETIME, + DateTimeConverter.convertToTimestamp(LocalDateTime.of(2019, 1, 20, 3, 0, 0))); + } + + private void testImpl(final String sqlType, + IntFunction recordBuilder, + Function airbyteRecordStringifier, + Function sqlRecordStringifier, + SingleStoreType singlestoreType, + String initialCursorFieldValue) + throws SQLException { + final String cursorColumn = "cursor_column"; + try (final var testdb = SingleStoreTestDatabase.in(BaseImage.SINGLESTORE_DEV) + .with("CREATE TABLE cursor_table (id INTEGER PRIMARY KEY, %s %s);", cursorColumn, + sqlType)) { + final List expectedRecords = new ArrayList<>(); + for (int i = 1; i <= 4; i++) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + jsonNode.put("id", i); + final T cursorValue = recordBuilder.apply(i); + jsonNode.put("cursor_column", airbyteRecordStringifier.apply(cursorValue)); + testdb.with("INSERT INTO cursor_table VALUES (%d, '%s');", i, + sqlRecordStringifier.apply(cursorValue)); + if (i >= 2) { + expectedRecords.add(jsonNode); + } + } + try (final Connection connection = testdb.getContainer().createConnection("")) { + final PreparedStatement preparedStatement = connection.prepareStatement( + "SELECT * FROM " + testdb.getDatabaseName() + ".cursor_table WHERE " + cursorColumn + + " > ?"); + sqlSourceOperations.setCursorField(preparedStatement, 1, singlestoreType, + initialCursorFieldValue); + final List actualRecords = new ArrayList<>(); + try (final ResultSet resultSet = preparedStatement.executeQuery()) { + while (resultSet.next()) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { + sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); + } + actualRecords.add(jsonNode); + } + } + assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); + } + } + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceTest.java b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceTest.java new file mode 100644 index 000000000000..d749d079b689 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSourceTest.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static org.junit.jupiter.api.Assertions.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.commons.json.Jsons; +import org.junit.jupiter.api.Test; + +public class SingleStoreSourceTest { + + private static final String EXPECTED_JDBC_ESCAPED_URL = "jdbc:singlestore://localhost:1111/db%2Ffoo?"; + + public SingleStoreSource source() { + return new SingleStoreSource(); + } + + @Test + void testJdbcUrlWithEscapedDatabaseName() { + final JsonNode jdbcConfig = source().toDatabaseConfig(buildConfigEscapingNeeded()); + assertNotNull(jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText()); + assertTrue( + jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText().startsWith(EXPECTED_JDBC_ESCAPED_URL)); + } + + @Test + void testJdbcUrlWithSslParameters() { + final JsonNode jdbcConfig = source().toDatabaseConfig(Jsons.jsonNode( + ImmutableMap.of(JdbcUtils.HOST_KEY, "localhost", JdbcUtils.PORT_KEY, 3306, + JdbcUtils.USERNAME_KEY, "user", JdbcUtils.DATABASE_KEY, "db", JdbcUtils.SSL_MODE_KEY, + Jsons.jsonNode(ImmutableMap.of("mode", "verify-full", "client_key", "test_client_key", + "client_key_password", "password"))))); + String jdbcUrl = jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(); + assertEquals( + "jdbc:singlestore://localhost:3306/db?yearIsDateType=false&tinyInt1isBit=false&_connector_name=Airbyte Source Connector&sslMode=VERIFY_FULL", + jdbcUrl); + } + + private JsonNode buildConfigEscapingNeeded() { + return Jsons.jsonNode(ImmutableMap.of(JdbcUtils.HOST_KEY, "localhost", JdbcUtils.PORT_KEY, 1111, + JdbcUtils.USERNAME_KEY, "user", JdbcUtils.DATABASE_KEY, "db/foo")); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSpecTest.java b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSpecTest.java new file mode 100644 index 000000000000..b4b2058b1fc7 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSpecTest.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.v0.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class SingleStoreSpecTest { + + private static final String CONFIGURATION = """ + { + "host": "localhost", + "port": 3306, + "username": "usr", + "password": "pwd", + "database": "testDb", + "replication_method": "STANDARD", + "jdbc_url_params": "property1=pValue1&property2=pValue2", + "ssl_mode": {"mode": "disable"} + } + """; + + private static JsonNode schema; + private static JsonSchemaValidator validator; + + @BeforeAll + static void init() throws IOException { + final String spec = MoreResources.readResource("spec.json"); + final File schemaFile = IOs.writeFile( + Files.createTempDirectory(Path.of("/tmp"), "pg-spec-test"), "schema.json", spec).toFile(); + schema = JsonSchemaValidator.getSchema(schemaFile).get("connectionSpecification"); + validator = new JsonSchemaValidator(); + } + + @Test + void testHostMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("host"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testPortMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("port"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testUsernameMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("username"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testPasswordMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("password"); + assertTrue(validator.test(schema, config)); + } + + @Test + void testReplicationMethodMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("replication_method"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testSchemaMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("database"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testAdditionalJdbcParamMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("jdbc_url_params"); + assertTrue(validator.test(schema, config)); + } + + @Test + void testWithJdbcAdditionalProperty() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + assertTrue(validator.test(schema, config)); + } + + @Test + void testJdbcAdditionalProperty() throws Exception { + final ConnectorSpecification spec = new SingleStoreSource().spec(); + assertNotNull(spec.getConnectionSpecification().get("properties").get("jdbc_url_params")); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSslJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSslJdbcSourceAcceptanceTest.java new file mode 100644 index 000000000000..b94a08cebfdf --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/test/java/io/airbyte/integrations/source/singlestore/SingleStoreSslJdbcSourceAcceptanceTest.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.BaseImage; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.ContainerModifier; +import org.junit.jupiter.api.Order; + +@Order(3) +class SingleStoreSslJdbcSourceAcceptanceTest extends SingleStoreJdbcSourceAcceptanceTest { + + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .with(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode( + ImmutableMap.of(JdbcUtils.MODE_KEY, "verify-ca", "ca_certificate", + testdb.getCertificates().caCertificate()))) + .build(); + } + + @Override + protected SingleStoreTestDatabase createTestDatabase() { + return SingleStoreTestDatabase.in(BaseImage.SINGLESTORE_DEV, ContainerModifier.CERT); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/AirbyteSingleStoreTestContainer.java b/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/AirbyteSingleStoreTestContainer.java new file mode 100644 index 000000000000..0e62ad011efa --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/AirbyteSingleStoreTestContainer.java @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import static java.time.temporal.ChronoUnit.SECONDS; +import static java.util.Collections.singleton; + +import java.time.Duration; +import java.util.Set; +import java.util.concurrent.Future; +import org.apache.commons.lang3.StringUtils; +import org.testcontainers.containers.JdbcDatabaseContainer; +import org.testcontainers.containers.wait.strategy.LogMessageWaitStrategy; +import org.testcontainers.utility.DockerImageName; + +public final class AirbyteSingleStoreTestContainer extends JdbcDatabaseContainer { + + private static final DockerImageName DEFAULT_IMAGE_NAME = DockerImageName.parse("ghcr.io/singlestore-labs/singlestoredb-dev"); + static final String DEFAULT_TAG = "latest"; + private static final int PORT = 3306; + private static final int DEFAULT_STARTUP_TIMEOUT_SECONDS = 300; + private static final int DEFAULT_CONNECT_TIMEOUT_SECONDS = 120; + + // Container defaults + static final String ROOT_USER = "root"; + static final String DEFAULT_ROOT_USER_PASSWORD = "root"; + private String databaseName; + private String username = ROOT_USER; + private String password = DEFAULT_ROOT_USER_PASSWORD; + private static final String SINGLESTORE_LICENSE = System.getenv("SINGLESTORE_LICENSE"); + + public AirbyteSingleStoreTestContainer() { + this(DEFAULT_IMAGE_NAME.withTag(DEFAULT_TAG)); + } + + public AirbyteSingleStoreTestContainer(final String dockerImageName) { + this(DockerImageName.parse(dockerImageName)); + } + + public AirbyteSingleStoreTestContainer(final DockerImageName dockerImageName) { + super(dockerImageName); + dockerImageName.assertCompatibleWith(DEFAULT_IMAGE_NAME); + preconfigure(); + } + + public AirbyteSingleStoreTestContainer(final Future dockerImageName) { + super(dockerImageName); + preconfigure(); + } + + private void preconfigure() { + this.waitStrategy = new LogMessageWaitStrategy().withRegEx(".*Log Opened*\\s").withTimes(1) + .withStartupTimeout(Duration.of(DEFAULT_STARTUP_TIMEOUT_SECONDS, SECONDS)); + this.withConnectTimeoutSeconds(DEFAULT_CONNECT_TIMEOUT_SECONDS); + this.addExposedPorts(PORT); + } + + @Override + protected void waitUntilContainerStarted() { + getWaitStrategy().waitUntilReady(this); + } + + @Override + public Set getLivenessCheckPortNumbers() { + return singleton(getMappedPort(PORT)); + } + + @Override + public String getDriverClassName() { + return "com.singlestore.jdbc.Driver"; + } + + @Override + public String getJdbcUrl() { + return String.format("jdbc:singlestore://%s:%d/%s", getHost(), getPort(), getDatabaseName()); + } + + @Override + public String getUsername() { + return username; + } + + @Override + public String getPassword() { + return password; + } + + @Override + public String getDatabaseName() { + return databaseName; + } + + @Override + public AirbyteSingleStoreTestContainer withUsername(final String username) { + if (StringUtils.isEmpty(username)) { + throw new IllegalArgumentException("Username cannot be null or empty"); + } + this.username = username; + return self(); + } + + @Override + public AirbyteSingleStoreTestContainer withPassword(final String password) { + if (StringUtils.isEmpty(password)) { + throw new IllegalArgumentException("Password cannot be null or empty"); + } + this.password = password; + return self(); + } + + @Override + public AirbyteSingleStoreTestContainer withDatabaseName(final String databaseName) { + if (StringUtils.isEmpty(databaseName)) { + throw new IllegalArgumentException("Database name cannot be null or empty"); + } + this.databaseName = databaseName; + return self(); + } + + @Override + public AirbyteSingleStoreTestContainer withUrlParam(final String paramName, final String paramValue) { + throw new UnsupportedOperationException("The SingleStore Database driver does not support this"); + } + + public void restart() { + String tag = this.getContainerId(); + dockerClient.commitCmd(this.getContainerId()).withRepository("singlestore-ssl").withTag(tag).exec(); + this.stop(); + this.setDockerImageName("singlestore-ssl:" + tag); + this.start(); + } + + public Integer getPort() { + return getMappedPort(PORT); + } + + @Override + public String getTestQueryString() { + return "SELECT 1"; + } + + @Override + protected void configure() { + withEnv("ROOT_PASSWORD", DEFAULT_ROOT_USER_PASSWORD); + withEnv("SINGLESTORE_LICENSE", SINGLESTORE_LICENSE); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreContainerFactory.java b/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreContainerFactory.java new file mode 100644 index 000000000000..1bc5fe3e7377 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreContainerFactory.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import io.airbyte.cdk.testutils.ContainerFactory; +import java.io.IOException; +import java.io.UncheckedIOException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.Network; +import org.testcontainers.utility.DockerImageName; + +public class SingleStoreContainerFactory extends ContainerFactory { + + private static final Logger LOGGER = LoggerFactory.getLogger(SingleStoreContainerFactory.class); + + @Override + protected AirbyteSingleStoreTestContainer createNewContainer(DockerImageName imageName) { + return new AirbyteSingleStoreTestContainer(imageName.asCompatibleSubstituteFor("ghcr.io/singlestore-labs/singlestoredb-dev")); + } + + /** + * Create a new network and bind it to the container. + */ + public void withNetwork(AirbyteSingleStoreTestContainer container) { + container.withNetwork(Network.newNetwork()); + } + + private static void execInContainer(AirbyteSingleStoreTestContainer container, String... commands) { + container.start(); + try { + for (String command : commands) { + var output = container.execInContainerWithUser("root", "/bin/bash", "-c", command); + LOGGER.info("Execute command: {}", output); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + /** + * Generate SSL certificates and enable SSL connections. + */ + public static void withCert(AirbyteSingleStoreTestContainer container) { + String[] commands = {"mkdir certs", "/bin/openssl genrsa 2048 > /certs/ca-key.pem", + "/bin/openssl req -new -x509 -nodes -days 3600 -key /certs/ca-key.pem -out /certs/ca-cert.pem -subj '/C=US/ST=CA/L=San Francisco/O=MemSQL/CN=memsql.ssl.test.ca'", + "/bin/openssl req -newkey rsa:2048 -nodes -keyout /certs/server-key.pem -out /certs/server-req.pem -subj '/C=US/ST=CA/L=San Francisco/O=MemSQL/CN=memsql.ssl.test.server'", + "/bin/openssl rsa -in /certs/server-key.pem -out /certs/server-key.pem", + "/bin/openssl x509 -req -in /certs/server-req.pem -days 3600 -CA /certs/ca-cert.pem -CAkey /certs/ca-key.pem -set_serial 01 -out /certs/server-cert.pem", + "/bin/openssl verify -CAfile /certs/ca-cert.pem /certs/server-cert.pem", + "echo -e 'ssl_cert = /certs/server-cert.pem \\nssl_key = /certs/server-key.pem \\nssl_ca = /certs/ca-cert.pem' >> /data/master/memsql.cnf", + "echo -e 'ssl_cert = /certs/server-cert.pem \\nssl_key = /certs/server-key.pem \\nssl_ca = /certs/ca-cert.pem' >> /data/leaf/memsql.cnf", + "chown -R memsql /certs", "chmod -R 777 /certs"}; + execInContainer(container, commands); + container.restart(); + } + +} diff --git a/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreTestDatabase.java b/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreTestDatabase.java new file mode 100644 index 000000000000..083e8043f4f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-singlestore/src/testFixtures/java/io/airbyte/integrations/source/singlestore/SingleStoreTestDatabase.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.singlestore; + +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.testutils.ContainerFactory.NamedContainerModifier; +import io.airbyte.cdk.testutils.TestDatabase; +import io.airbyte.integrations.source.singlestore.SingleStoreTestDatabase.SingleStoreConfigBuilder; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.jooq.SQLDialect; + +public class SingleStoreTestDatabase extends TestDatabase { + + public enum BaseImage { + + SINGLESTORE_DEV("ghcr.io/singlestore-labs/singlestoredb-dev:latest"); + + public final String reference; + + BaseImage(String reference) { + this.reference = reference; + } + + } + + public enum ContainerModifier implements NamedContainerModifier { + + CERT(SingleStoreContainerFactory::withCert); + + private Consumer modifer; + + ContainerModifier(final Consumer modifer) { + this.modifer = modifer; + } + + @Override + public Consumer modifier() { + return modifer; + } + + } + + static public SingleStoreTestDatabase in(BaseImage baseImage, ContainerModifier... modifiers) { + final var container = new SingleStoreContainerFactory().shared(baseImage.reference, modifiers); + return new SingleStoreTestDatabase(container).initialized(); + } + + public SingleStoreTestDatabase(AirbyteSingleStoreTestContainer container) { + super(container); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + final var sql = Stream.of(String.format("CREATE DATABASE %s", getDatabaseName()), + String.format("CREATE USER %s IDENTIFIED BY '%s'", getUserName(), getPassword()), + String.format("GRANT ALL ON %s.* TO %s", getDatabaseName(), getUserName())); + getContainer().withUsername(getUserName()).withPassword(getPassword()).withDatabaseName(getDatabaseName()); + return Stream.of(singlestoreCmd(sql)); + } + + @Override + protected Stream inContainerUndoBootstrapCmd() { + return singlestoreCmd(Stream.of(String.format("DROP USER %s", getUserName()), String.format("DROP DATABASE \\`%s\\`", getDatabaseName()))); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.SINGLESTORE; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.DEFAULT; + } + + @Override + public SingleStoreConfigBuilder configBuilder() { + return new SingleStoreConfigBuilder(this); + } + + public Stream singlestoreCmd(Stream sql) { + return Stream.of("/bin/bash", "-c", String.format("set -o errexit -o pipefail; echo \"%s\" | singlestore -v -v -v --user=root --password=root", + sql.collect(Collectors.joining("; ")))); + } + + private Certificates cachedCerts; + + public synchronized Certificates getCertificates() { + if (cachedCerts == null) { + final String caCert, serverKey, serverCert; + try { + caCert = getContainer().execInContainer("/bin/bash", "-c", "cat /certs/ca-cert.pem").getStdout().trim(); + serverKey = getContainer().execInContainer("/bin/bash", "-c", "cat /certs/server-key.pem").getStdout().trim(); + serverCert = getContainer().execInContainer("/bin/bash", "-c", "cat /certs/server-cert.pem").getStdout().trim(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + cachedCerts = new Certificates(caCert, serverKey, serverCert); + } + return cachedCerts; + } + + public record Certificates(String caCertificate, String serverKey, String serverCert) { + + } + + static public class SingleStoreConfigBuilder extends ConfigBuilder { + + protected SingleStoreConfigBuilder(SingleStoreTestDatabase testDatabase) { + super(testDatabase); + } + + public SingleStoreConfigBuilder withStandardReplication() { + return with("replication_method", "STANDARD"); + } + + } + +} diff --git a/docs/integrations/sources/singlestore.md b/docs/integrations/sources/singlestore.md new file mode 100644 index 000000000000..708182cb9eb8 --- /dev/null +++ b/docs/integrations/sources/singlestore.md @@ -0,0 +1,176 @@ +# SingleStore + +## Overview + +[SingleStore](https://www.singlestore.com/) is a distributed SQL database that offers +high-throughput transactions (inserts and upserts), low-latency analytics and context from real-time +vector data. + +## Features + +| Feature | Supported | Notes | +|:--------------------------|:----------|:------| +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Change Data Capture | No | | +| SSL Support | Yes | | +| SSH Tunnel Connection | Yes | | + +The contents below include a 'Quick Start' guide, advanced setup steps, and reference information ( +data type mapping and changelogs). + +## Getting Started + +#### Requirements + +1. SingleStore instance +2. Allow connections from Airbyte to your SingleStore database \(if they exist in separate VPCs\) +3. Create a dedicated read-only Airbyte user with access to all tables needed for replication + +#### 1. Make sure your database is accessible from the machine running Airbyte + +This is dependent on your networking setup. The easiest way to verify if Airbyte is able to connect +to your SingleStore instance is via the check connection tool in the UI. + +#### 2. Create a dedicated read-only user with access to the relevant tables \(Recommended but optional\) + +This step is optional but highly recommended to allow for better permission control and auditing. +Alternatively, you can use Airbyte with an existing user in your database. + +To create a dedicated database user, run the following commands against your database: + +```sql +CREATE +USER airbyte IDENTIFIED BY ; +``` + +Next, grant the user read-only access to the relevant tables. The simplest way is to grant read +access to all tables in the database as follows: + +```sql +GRANT +SELECT +ON .* TO airbyte; +``` + +Or you can be more granular: + +```sql +GRANT SELECT ON ""."" TO airbyte; +GRANT SELECT ON ""."" TO airbyte; +``` + +Your database user should now be ready for use with Airbyte. + +## Connecting with SSL or SSH Tunneling + + + +### SSL Modes + +Here is a breakdown of available SSL connection modes: + +- `disable` to disable encrypted communication between Airbyte and the source +- `required` to always require encryption. Note: The connection will fail if the source doesn't + support encryption. +- `verify-ca` to always require encryption and verify that the source has a valid SSL certificate +- `verify-full` to always require encryption and verify the identity of the source + + + +### Connection via SSH Tunnel + +Airbyte has the ability to connect to a SingleStore instance via an SSH Tunnel. The reason you might +want +to do this because it is not possible \(or against security policy\) to connect to the database +directly \(e.g. it does not have a public IP address\). + +When using an SSH tunnel, you are configuring Airbyte to connect to an intermediate server \(a.k.a. +a bastion sever\) that _does_ have direct access to the database. Airbyte connects to the bastion +and then asks the bastion to connect directly to the server. + +Using this feature requires additional configuration, when creating the source. We will talk through +what each piece of configuration means. + +1. Configure all fields for the source as you normally would, except `SSH Tunnel Method`. +2. `SSH Tunnel Method` defaults to `No Tunnel` \(meaning a direct connection\). If you want to use + an SSH Tunnel choose `SSH Key Authentication` or `Password Authentication`. + 1. Choose `Key Authentication` if you will be using an RSA private key as your secret for + establishing the SSH Tunnel \(see below for more information on generating this key\). + 2. Choose `Password Authentication` if you will be using a password as your secret for + establishing the SSH Tunnel. +3. `SSH Tunnel Jump Server Host` refers to the intermediate \(bastion\) server that Airbyte will + connect to. This should be a hostname or an IP Address. +4. `SSH Connection Port` is the port on the bastion server with which to make the SSH connection. + The default port for SSH connections is `22`, so unless you have explicitly changed something, go + with the default. +5. `SSH Login Username` is the username that Airbyte should use when connection to the bastion + server. This is NOT the SingleStore username. +6. If you are using `Password Authentication`, then `SSH Login Username` should be set to the + password of the User from the previous step. If you are using `SSH Key Authentication` leave this + blank. Again, this is not the SingleStore password, but the password for the OS-user that Airbyte + is + using to perform commands on the bastion. +7. If you are using `SSH Key Authentication`, then `SSH Private Key` should be set to the RSA + Private Key that you are using to create the SSH connection. This should be the full contents of + the key file starting with `-----BEGIN RSA PRIVATE KEY-----` and ending + with `-----END RSA PRIVATE KEY-----`. + +#### Generating a private key for SSH Tunneling + +The connector expects an RSA key in PEM format. To generate this key: + +```text +ssh-keygen -t rsa -m PEM -f myuser_rsa +``` + +This produces the private key in pem format, and the public key remains in the standard format used +by the `authorized_keys` file on your bastion host. The public key should be added to your bastion +host to whichever user you want to use with Airbyte. The private key is provided via copy-and-paste +to the Airbyte connector configuration screen, so it may log in to the bastion. + +## Data Type Mapping + +SingleStore data types are mapped to the following data types when synchronizing data. + +| SingleStore
Type | Resulting Type | Notes | +|:----------------------|:-----------------------|:------| +| `BIT` | base64 binary string | | +| `TINYINT` | number | | +| `SMALLINT` | number | | +| `MEDIUMINT` | number | | +| `INT` | number | | +| `BIGINT` | number | | +| `FLOAT` | number | | +| `DOUBLE` | number | | +| `DECIMAL` | number | | +| `DATE` | string | | +| `TIME` | string | | +| `DATETIME` | string | | +| `TIMESTAMP` | string | | +| `YEAR` | year string | | +| `CHAR` | string | | +| `VARCHAR` | string | | +| `LONGTEXT` | string | | +| `MEDIUMTEXT` | string | | +| `TEXT` | string | | +| `TINYTEXT` | string | | +| `BINARY` | base64 binary string | | +| `VARBINARY` | base64 binary string | | +| `LONGBLOB` | base64 binary string | | +| `MEDIUMBLOB` | base64 binary string | | +| `BLOB` | base64 binary string | | +| `TINYBLOB` | base64 binary string | | +| `JSON` | serialized json string | | +| `ENUM` | string | | +| `SET` | string | | +| `GEOGRAPHYPOINT` | string | | +| `GEOGRAPHY` | string | | +| `VECTOR` | string | | + +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:-------------------------------------------------------|:---------------------------------| +| 0.1.0 | 2024-04-16 | [37337](https://github.com/airbytehq/airbyte/pull/37337) | Add SingleStore source connector | + \ No newline at end of file