diff --git a/.vscode/cspell.json b/.vscode/cspell.json index d1ce21af5739..32f0621d9435 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -140,6 +140,7 @@ "sdk/cosmos/azure-cosmos-encryption/**", "sdk/cosmos/azure-cosmos-spark_3_2-12/**", "sdk/spring/azure-spring-data-cosmos/**", + "sdk/cosmos/azure-cosmos-kafka-connect/**", "sdk/deviceupdate/azure-iot-deviceupdate/**", "sdk/e2e/src/**", "sdk/eventgrid/azure-messaging-eventgrid-cloudnative-cloudevents/**", @@ -723,7 +724,7 @@ "words": [ "Pfast", "Pdirect", - "Pmulti", + "Pmulti", "Psplit", "Pquery", "Pcfp", diff --git a/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml b/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml index f8ac3f277664..db3789c74a93 100755 --- a/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml +++ b/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml @@ -316,6 +316,9 @@ the main ServiceBusClientBuilder. --> files="com.azure.cosmos.ClientSideRequestStatistics"/> + + diff --git a/eng/versioning/external_dependencies.txt b/eng/versioning/external_dependencies.txt index a75828c48d2c..183fb2585631 100644 --- a/eng/versioning/external_dependencies.txt +++ b/eng/versioning/external_dependencies.txt @@ -395,6 +395,10 @@ cosmos_org.scalastyle:scalastyle-maven-plugin;1.0.0 # Cosmos Kafka connector runtime dependencies cosmos_org.apache.kafka:connect-api;3.6.0 # Cosmos Kafka connector tests only +cosmos_org.apache.kafka:connect-runtime;3.6.0 +cosmos_org.testcontainers:testcontainers;1.19.5 +cosmos_org.testcontainers:kafka;1.19.5 +cosmos_org.sourcelab:kafka-connect-client;4.0.4 # Maven Tools for Cosmos Kafka connector only cosmos_io.confluent:kafka-connect-maven-plugin;0.12.0 diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/CHANGELOG.md b/sdk/cosmos/azure-cosmos-kafka-connect/CHANGELOG.md index 8d7cb3d876bc..532c376c58a9 100644 --- a/sdk/cosmos/azure-cosmos-kafka-connect/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-kafka-connect/CHANGELOG.md @@ -3,6 +3,7 @@ ### 1.0.0-beta.1 (Unreleased) #### Features Added +* Added Source connector. See [PR 39410](https://github.com/Azure/azure-sdk-for-java/pull/39410) #### Breaking Changes diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/doc/configuration-reference.md b/sdk/cosmos/azure-cosmos-kafka-connect/doc/configuration-reference.md new file mode 100644 index 000000000000..8512ba8d7af7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/doc/configuration-reference.md @@ -0,0 +1,25 @@ +## Configuration Reference: + +## Generic Configuration +| Config Property Name | Default | Description | +|:---------------------------------------------|:--------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `kafka.connect.cosmos.accountEndpoint` | None | Cosmos DB Account Endpoint Uri | +| `kafka.connect.cosmos.accountEndpoint` | None | Cosmos DB Account Key | +| `kafka.connect.cosmos.useGatewayMode` | `false` | Flag to indicate whether to use gateway mode. By default it is false. | +| `kafka.connect.cosmos.preferredRegionsList` | `[]` | Preferred regions list to be used for a multi region Cosmos DB account. This is a comma separated value (e.g., `[East US, West US]` or `East US, West US`) provided preferred regions will be used as hint. You should use a collocated kafka cluster with your Cosmos DB account and pass the kafka cluster region as preferred region. See list of azure regions [here](https://docs.microsoft.com/dotnet/api/microsoft.azure.documents.locationnames?view=azure-dotnet&preserve-view=true). | +| `kafka.connect.cosmos.applicationName` | `""` | Application name. Will be added as the userAgent suffix. | + +## Source Connector Configuration +| Config Property Name | Default | Description | +|:----------------------------------------------------------|:-------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `kafka.connect.cosmos.source.database.name` | None | Cosmos DB database name. | +| `kafka.connect.cosmos.source.containers.includeAll` | `false` | Flag to indicate whether reading from all containers. | +| `kafka.connect.cosmos.source.containers.includedList` | `[]` | Containers included. This config will be ignored if kafka.connect.cosmos.source.includeAllContainers is true. | +| `kafka.connect.cosmos.source.containers.topicMap` | `[]` | A comma delimited list of Kafka topics mapped to Cosmos containers. For example: topic1#con1,topic2#con2. By default, container name is used as the name of the kafka topic to publish data to, can use this property to override the default config | +| `kafka.connect.cosmos.source.changeFeed.startFrom` | `Beginning` | ChangeFeed Start from settings (Now, Beginning or a certain point in time (UTC) for example 2020-02-10T14:15:03) - the default value is 'Beginning'. | +| `kafka.connect.cosmos.source.changeFeed.mode` | `LatestVersion` | ChangeFeed mode (LatestVersion or AllVersionsAndDeletes). | +| `kafka.connect.cosmos.source.changeFeed.maxItemCountHint` | `1000` | The maximum number of documents returned in a single change feed request. But the number of items received might be higher than the specified value if multiple items are changed by the same transaction. | +| `kafka.connect.cosmos.source.metadata.poll.delay.ms` | `300000` | Indicates how often to check the metadata changes (including container split/merge, adding/removing/recreated containers). When changes are detected, it will reconfigure the tasks. Default is 5 minutes. | +| `kafka.connect.cosmos.source.metadata.storage.topic` | `_cosmos.metadata.topic` | The name of the topic where the metadata are stored. The metadata topic will be created if it does not already exist, else it will use the pre-created topic. | +| `kafka.connect.cosmos.source.messageKey.enabled` | `true` | Whether to set the kafka record message key. | +| `kafka.connect.cosmos.source.messageKey.field` | `id` | The field to use as the message key. | diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/pom.xml b/sdk/cosmos/azure-cosmos-kafka-connect/pom.xml index 47496e3a0ea3..76bdce066b67 100644 --- a/sdk/cosmos/azure-cosmos-kafka-connect/pom.xml +++ b/sdk/cosmos/azure-cosmos-kafka-connect/pom.xml @@ -37,8 +37,6 @@ Licensed under the MIT License. UTF-8 0.01 0.02 - 11 - 11 azure_cosmos_kafka_connect @@ -48,7 +46,12 @@ Licensed under the MIT License. --add-opens com.azure.cosmos.kafka.connect/com.azure.cosmos.kafka.connect=ALL-UNNAMED --add-opens com.azure.cosmos.kafka.connect/com.azure.cosmos.kafka.connect.implementation=ALL-UNNAMED - --add-opens com.azure.cosmos.kafka.connect/com.azure.cosmos.kafka.connect.models=ALL-UNNAMED + --add-opens com.azure.cosmos.kafka.connect/com.azure.cosmos.kafka.connect.implementation.source=com.fasterxml.jackson.databind,ALL-UNNAMED + --add-opens com.azure.cosmos/com.azure.cosmos.implementation.routing=ALL-UNNAMED + --add-opens com.azure.cosmos/com.azure.cosmos.implementation.apachecommons.lang=ALL-UNNAMED + --add-exports com.azure.cosmos/com.azure.cosmos.implementation.changefeed.common=com.azure.cosmos.kafka.connect + --add-exports com.azure.cosmos/com.azure.cosmos.implementation.feedranges=com.azure.cosmos.kafka.connect + --add-exports com.azure.cosmos/com.azure.cosmos.implementation.query=com.azure.cosmos.kafka.connect @@ -94,6 +97,19 @@ Licensed under the MIT License. 1.10.0 + + org.apache.kafka + connect-runtime + 3.6.0 + test + + + jackson-jaxrs-json-provider + com.fasterxml.jackson.jaxrs + + + + org.testng testng @@ -160,6 +176,24 @@ Licensed under the MIT License. 1.14.12 test + + org.testcontainers + testcontainers + 1.19.5 + test + + + org.testcontainers + kafka + 1.19.5 + test + + + org.sourcelab + kafka-connect-client + 4.0.4 + test + @@ -204,6 +238,7 @@ Licensed under the MIT License. com.azure:* org.apache.kafka:connect-api:[3.6.0] io.confluent:kafka-connect-maven-plugin:[0.12.0] + org.sourcelab:kafka-connect-client:[4.0.4] @@ -221,6 +256,7 @@ Licensed under the MIT License. shade + ${project.artifactId}-${project.version}-jar-with-dependencies *:*:*:* diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/.gitignore b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/.gitignore new file mode 100644 index 000000000000..b170e557735b --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/.gitignore @@ -0,0 +1,8 @@ +connectors/ +log.txt + +# Exclude all temporary files in resources +!resources/*example +resources/sink.properties +resources/source.properties +resources/standalone.properties diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/Dockerfile b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/Dockerfile new file mode 100644 index 000000000000..37da2123ab86 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/Dockerfile @@ -0,0 +1,7 @@ +# Build the Cosmos DB Connectors on top of the Kafka Connect image +FROM confluentinc/cp-kafka-connect:7.5.0 + +# Install datagen connector +RUN confluent-hub install --no-prompt confluentinc/kafka-connect-datagen:latest + +COPY connectors/ /etc/kafka-connect/jars diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/docker-compose.yml b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/docker-compose.yml new file mode 100644 index 000000000000..6f733fee3ab7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/docker-compose.yml @@ -0,0 +1,191 @@ +# Adapted from https://github.com/confluentinc/cp-all-in-one and https://github.com/simplesteph/kafka-stack-docker-compose +version: '2.1' + +services: + zookeeper: + image: confluentinc/cp-zookeeper:7.5.0 + restart: unless-stopped + hostname: zookeeper + container_name: zookeeper + ports: + - "2181:2181" + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + + broker: + image: confluentinc/cp-server:7.5.0 + hostname: broker + container_name: broker + ports: + - "9092:9092" + - "9101:9101" + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 + KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + KAFKA_CONFLUENT_LICENSE_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_CONFLUENT_BALANCER_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_JMX_PORT: 9101 + KAFKA_JMX_HOSTNAME: localhost + KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081 + CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: broker:29092 + CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 + CONFLUENT_METRICS_ENABLE: 'true' + CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous' + depends_on: + - zookeeper + + schema-registry: + image: confluentinc/cp-schema-registry:7.5.0 + hostname: schema-registry + container_name: schema-registry + ports: + - "8081:8081" + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092' + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081 + depends_on: + - broker + + schema-registry-ui: + image: landoop/schema-registry-ui:0.9.4 + hostname: schema-registry-ui + container_name: schema-registry-ui + ports: + - "9001:8000" + environment: + SCHEMAREGISTRY_URL: http://schema-registry:8081/ + PROXY: "true" + depends_on: + - schema-registry + + rest-proxy: + image: confluentinc/cp-kafka-rest:7.5.0 + ports: + - "8082:8082" + hostname: rest-proxy + container_name: rest-proxy + environment: + KAFKA_REST_HOST_NAME: rest-proxy + KAFKA_REST_BOOTSTRAP_SERVERS: 'broker:29092' + KAFKA_REST_LISTENERS: "http://0.0.0.0:8082" + KAFKA_REST_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081' + depends_on: + - broker + - schema-registry + + kafka-topics-ui: + image: landoop/kafka-topics-ui:0.9.4 + hostname: kafka-topics-ui + container_name: kafka-topics-ui + ports: + - "9000:8000" + environment: + KAFKA_REST_PROXY_URL: "http://rest-proxy:8082/" + PROXY: "true" + depends_on: + - zookeeper + - broker + - schema-registry + - rest-proxy + + connect: + # Using modified version of confluentinc/cp-kafka-connect:6.0.0 to avoid dealing with volume mounts + image: cosmosdb-kafka-connect:latest + hostname: connect + container_name: connect + ports: + - "8083:8083" + environment: + CONNECT_BOOTSTRAP_SERVERS: "broker:29092" + CONNECT_REST_ADVERTISED_HOST_NAME: "connect" + CONNECT_REST_PORT: 8083 + CONNECT_GROUP_ID: compose-connect-group + CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs + CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1" + CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000 + CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets + CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1" + CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status + CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1" + CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.storage.StringConverter" + CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" + CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081' + CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081' + CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.storage.StringConverter" + CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" + # CLASSPATH required due to CC-2422 + CLASSPATH: /usr/share/java/monitoring-interceptors/monitoring-interceptors-6.0.0.jar + CONNECT_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor" + CONNECT_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor" + CONNECT_LOG4J_ROOT_LOGLEVEL: "WARN" + CONNECT_LOG4J_LOGGERS: "org.apache.kafka=INFO,org.reflections=ERROR,com.azure.cosmos.kafka=DEBUG" + CONNECT_PLUGIN_PATH: '/usr/share/java,/usr/share/confluent-hub-components,/etc/kafka-connect/jars' + depends_on: + - zookeeper + - broker + - schema-registry + - rest-proxy + + control-center: + image: confluentinc/cp-enterprise-control-center:7.5.0 + hostname: control-center + container_name: control-center + ports: + - "9021:9021" + environment: + CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092' + CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'http://connect:8083' + CONTROL_CENTER_KSQL_KSQLDB1_URL: "http://ksqldb-server:8088" + CONTROL_CENTER_KSQL_KSQLDB1_ADVERTISED_URL: "http://localhost:8088" + CONTROL_CENTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" + CONTROL_CENTER_REPLICATION_FACTOR: 1 + CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1 + CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1 + CONTROL_CENTER_CONNECT_HEALTHCHECK_ENDPOINT: '/connectors' + CONFLUENT_METRICS_TOPIC_REPLICATION: 1 + PORT: 9021 + depends_on: + - broker + - schema-registry + - connect + + ksqldb-server: + image: confluentinc/cp-ksqldb-server:7.5.0 + hostname: ksqldb-server + container_name: ksqldb-server + ports: + - "8088:8088" + environment: + KSQL_CONFIG_DIR: "/etc/ksql" + KSQL_BOOTSTRAP_SERVERS: "broker:29092" + KSQL_HOST_NAME: ksqldb-server + KSQL_LISTENERS: "http://0.0.0.0:8088" + KSQL_CACHE_MAX_BYTES_BUFFERING: 0 + KSQL_KSQL_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" + KSQL_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor" + KSQL_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor" + KSQL_KSQL_CONNECT_URL: "http://connect:8083" + KSQL_KSQL_LOGGING_PROCESSING_TOPIC_REPLICATION_FACTOR: 1 + KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: 'true' + KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: 'true' + depends_on: + - broker + - connect + + zoonavigator: + image: elkozmon/zoonavigator:0.8.0 + container_name: zoonavigator + ports: + - "9004:8000" + environment: + HTTP_PORT: 8000 + AUTO_CONNECT_CONNECTION_STRING: zookeeper:2181 \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/startup.ps1 b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/startup.ps1 new file mode 100644 index 000000000000..9cb5c13150cd --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/startup.ps1 @@ -0,0 +1,29 @@ +#!/usr/bin/env pwsh +$ErrorActionPreference='Stop' +cd $PSScriptRoot +Write-Host "Shutting down Docker Compose orchestration..." +docker-compose down + +Write-Host "Deleting prior Cosmos DB connectors..." +rm -rf "$PSScriptRoot/connectors" +New-Item -Path "$PSScriptRoot" -ItemType "directory" -Name "connectors" -Force | Out-Null +cd $PSScriptRoot/../.. + +Write-Host "Rebuilding Cosmos DB connectors..." +mvn clean package -DskipTests -Dmaven.javadoc.skip +copy target\*-jar-with-dependencies.jar $PSScriptRoot/connectors +cd $PSScriptRoot + +Write-Host "Adding custom Insert UUID SMT" +cd $PSScriptRoot/connectors +git clone https://github.com/confluentinc/kafka-connect-insert-uuid.git insertuuid -q && cd insertuuid +mvn clean package -DskipTests=true +copy target\*.jar $PSScriptRoot/connectors +rm -rf "$PSScriptRoot/connectors/insertuuid" +cd $PSScriptRoot + +Write-Host "Building Cosmos DB Kafka Connect Docker image" +docker build . -t cosmosdb-kafka-connect:latest + +Write-Host "Starting Docker Compose..." +docker-compose up -d \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/startup.sh b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/startup.sh new file mode 100755 index 000000000000..1f5dbd056648 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/docker/startup.sh @@ -0,0 +1,27 @@ +#!/bin/bash +echo "Shutting down Docker Compose orchestration..." +docker-compose down + +echo "Deleting prior Cosmos DB connectors..." +rm -rf connectors +mkdir connectors +cd ../../ + +echo "Rebuilding Cosmos DB connectors..." +mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true +cp target/*-jar-with-dependencies.jar src/docker/connectors +cd src/docker + +echo "Adding custom Insert UUID SMT" +cd connectors +git clone https://github.com/confluentinc/kafka-connect-insert-uuid.git insertuuid -q && cd insertuuid +mvn clean package -DskipTests=true +cp target/*.jar ../ +cd .. && rm -rf insertuuid +cd ../ + +echo "Building Cosmos DB Kafka Connect Docker image" +docker build . -t cosmosdb-kafka-connect:latest + +echo "Starting Docker Compose..." +docker-compose up -d \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/CosmosConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/CosmosConfig.java deleted file mode 100644 index 41a7703fde72..000000000000 --- a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/CosmosConfig.java +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -package com.azure.cosmos.kafka.connect; - -import org.apache.kafka.common.config.AbstractConfig; -import org.apache.kafka.common.config.ConfigDef; - -import java.util.Map; - -/** - * Configuration for Cosmos DB Kafka connector - */ -public class CosmosConfig extends AbstractConfig { - - /** - * Initializes a new instance of the Cosmos DB Kafka Connector configuration - * @param definition The configuration definition - * @param originals The original config values - * @param configProviderProps The configuration overrides for this provider - * @param doLog Flag indicating whether the configuration should be logged - */ - public CosmosConfig(ConfigDef definition, Map originals, Map configProviderProps, boolean doLog) { - super(definition, originals, configProviderProps, doLog); - } -} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/CosmosDBSourceConnector.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/CosmosDBSourceConnector.java new file mode 100644 index 000000000000..d9f92e3731cd --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/CosmosDBSourceConnector.java @@ -0,0 +1,350 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; +import com.azure.cosmos.kafka.connect.implementation.CosmosClientStore; +import com.azure.cosmos.kafka.connect.implementation.CosmosConstants; +import com.azure.cosmos.kafka.connect.implementation.CosmosExceptionsHelper; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceConfig; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceOffsetStorageReader; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceTask; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceTaskConfig; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangeContinuationTopicOffset; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangeTaskUnit; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangesMetadataTopicOffset; +import com.azure.cosmos.kafka.connect.implementation.source.KafkaCosmosChangeFeedState; +import com.azure.cosmos.kafka.connect.implementation.source.MetadataMonitorThread; +import com.azure.cosmos.kafka.connect.implementation.source.MetadataTaskUnit; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.FeedRange; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.connect.connector.Task; +import org.apache.kafka.connect.source.SourceConnector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +/*** + * The CosmosDb source connector. + */ +public class CosmosDBSourceConnector extends SourceConnector { + private static final Logger LOGGER = LoggerFactory.getLogger(CosmosDBSourceConnector.class); + private CosmosSourceConfig config; + private CosmosAsyncClient cosmosClient; + private MetadataMonitorThread monitorThread; + private CosmosSourceOffsetStorageReader offsetStorageReader; + + @Override + public void start(Map props) { + LOGGER.info("Starting the kafka cosmos source connector"); + this.config = new CosmosSourceConfig(props); + this.cosmosClient = CosmosClientStore.getCosmosClient(this.config.getAccountConfig()); + this.offsetStorageReader = new CosmosSourceOffsetStorageReader(this.context().offsetStorageReader()); + this.monitorThread = new MetadataMonitorThread( + this.config.getContainersConfig(), + this.config.getMetadataConfig(), + this.context(), + this.offsetStorageReader, + this.cosmosClient + ); + + this.monitorThread.start(); + } + + @Override + public Class taskClass() { + return CosmosSourceTask.class; + } + + @Override + public List> taskConfigs(int maxTasks) { + // For now, we start with copying data by feed range + // but in the future, we can have more optimization based on the data size etc. + return this.getTaskConfigs(maxTasks); + } + + @Override + public void stop() { + LOGGER.info("Stopping Kafka CosmosDB source connector"); + if (this.cosmosClient != null) { + LOGGER.debug("Closing cosmos client"); + this.cosmosClient.close(); + } + + if (this.monitorThread != null) { + LOGGER.debug("Closing monitoring thread"); + this.monitorThread.close(); + } + } + + @Override + public ConfigDef config() { + return CosmosSourceConfig.getConfigDef(); + } + + @Override + public String version() { + return CosmosConstants.CURRENT_VERSION; + } // TODO[public preview]: how this is being used + + private List> getTaskConfigs(int maxTasks) { + Pair> taskUnits = this.getAllTaskUnits(); + + // The metadataTaskUnit is a one time only task when the connector starts/restarts, + // so there is no need to assign a dedicated task thread for it + // we are just going to assign it to one of the tasks which processing feedRanges tasks + List> partitionedTaskUnits = new ArrayList<>(); + if (taskUnits.getRight().size() <= maxTasks) { + partitionedTaskUnits.addAll( + taskUnits.getRight().stream().map(taskUnit -> Arrays.asList(taskUnit)).collect(Collectors.toList())); + } else { + // using round-robin fashion to assign tasks to each buckets + for (int i = 0; i < maxTasks; i++) { + partitionedTaskUnits.add(new ArrayList<>()); + } + + for (int i = 0; i < taskUnits.getRight().size(); i++) { + partitionedTaskUnits.get(i % maxTasks).add(taskUnits.getRight().get(i)); + } + } + + List> allSourceTaskConfigs = new ArrayList<>(); + partitionedTaskUnits.forEach(feedRangeTaskUnits -> { + Map taskConfigs = this.config.originalsStrings(); + taskConfigs.putAll( + CosmosSourceTaskConfig.getFeedRangeTaskUnitsConfigMap(feedRangeTaskUnits)); + allSourceTaskConfigs.add(taskConfigs); + }); + + // assign the metadata task to the last of the task config as it has least number of feedRange task units + allSourceTaskConfigs + .get(allSourceTaskConfigs.size() - 1) + .putAll(CosmosSourceTaskConfig.getMetadataTaskUnitConfigMap(taskUnits.getLeft())); + + return allSourceTaskConfigs; + } + + private Pair> getAllTaskUnits() { + List allContainers = this.monitorThread.getAllContainers().block(); + Map containerTopicMap = this.getContainersTopicMap(allContainers); + List allFeedRangeTaskUnits = new ArrayList<>(); + Map> updatedContainerToFeedRangesMap = new ConcurrentHashMap<>(); + + for (CosmosContainerProperties containerProperties : allContainers) { + Map effectiveFeedRangesContinuationMap = + this.getEffectiveFeedRangesContinuationMap( + this.config.getContainersConfig().getDatabaseName(), + containerProperties); + + updatedContainerToFeedRangesMap.put( + containerProperties.getResourceId(), + effectiveFeedRangesContinuationMap.keySet().stream().collect(Collectors.toList()) + ); + + // add feedRange task unit + for (FeedRange effectiveFeedRange : effectiveFeedRangesContinuationMap.keySet()) { + allFeedRangeTaskUnits.add( + new FeedRangeTaskUnit( + this.config.getContainersConfig().getDatabaseName(), + containerProperties.getId(), + containerProperties.getResourceId(), + effectiveFeedRange, + effectiveFeedRangesContinuationMap.get(effectiveFeedRange), + containerTopicMap.get(containerProperties.getId()) + ) + ); + } + } + + MetadataTaskUnit metadataTaskUnit = + new MetadataTaskUnit( + this.config.getContainersConfig().getDatabaseName(), + allContainers.stream().map(CosmosContainerProperties::getResourceId).collect(Collectors.toList()), + updatedContainerToFeedRangesMap, + this.config.getMetadataConfig().getMetadataTopicName()); + + return Pair.of(metadataTaskUnit, allFeedRangeTaskUnits); + } + + private Map getEffectiveFeedRangesContinuationMap( + String databaseName, + CosmosContainerProperties containerProperties) { + // Return effective feed ranges to be used for copying data from container + // - If there is no existing offset, then use the result from container.getFeedRanges + // - If there is existing offset, then deciding the final range sets based on: + // -----If we can find offset by matching the feedRange, then use the feedRange + // -----If we can not find offset by matching the exact feedRange, + // then it means the feedRanges of the containers have changed either due to split or merge. + // If a merge is detected, we will use the matched feedRanges from the offsets, + // otherwise use the current feedRange, but constructing the continuationState based on the previous feedRange + + List containerFeedRanges = this.getFeedRanges(containerProperties); + + FeedRangesMetadataTopicOffset feedRangesMetadataTopicOffset = + this.offsetStorageReader.getFeedRangesMetadataOffset(databaseName, containerProperties.getResourceId()); + + Map effectiveFeedRangesContinuationMap = new LinkedHashMap<>(); + CosmosAsyncContainer container = this.cosmosClient.getDatabase(databaseName).getContainer(containerProperties.getId()); + + Flux.fromIterable(containerFeedRanges) + .flatMap(containerFeedRange -> { + if (feedRangesMetadataTopicOffset == null) { + return Mono.just( + Collections.singletonMap(containerFeedRange, (KafkaCosmosChangeFeedState) null)); + } else { + // there is existing offsets, need to find out effective feedRanges based on the offset + return this.getEffectiveContinuationMapForSingleFeedRange( + databaseName, + containerProperties.getResourceId(), + containerFeedRange, + container, + feedRangesMetadataTopicOffset.getFeedRanges()); + } + }) + .doOnNext(map -> { + effectiveFeedRangesContinuationMap.putAll(map); + }) + .blockLast(); + + return effectiveFeedRangesContinuationMap; + } + + private Mono> getEffectiveContinuationMapForSingleFeedRange( + String databaseName, + String containerRid, + FeedRange containerFeedRange, + CosmosAsyncContainer cosmosAsyncContainer, + List rangesFromMetadataTopicOffset) { + + //first try to find out whether there is exact feedRange matching + FeedRangeContinuationTopicOffset feedRangeContinuationTopicOffset = + this.offsetStorageReader.getFeedRangeContinuationOffset(databaseName, containerRid, containerFeedRange); + + Map effectiveContinuationMap = new LinkedHashMap<>(); + if (feedRangeContinuationTopicOffset != null) { + // we can find the continuation offset based on exact feedRange matching + effectiveContinuationMap.put( + containerFeedRange, + this.getContinuationStateFromOffset( + feedRangeContinuationTopicOffset, + containerFeedRange)); + + return Mono.just(effectiveContinuationMap); + } + + // we can not find the continuation offset based on the exact feed range matching + // it means the previous Partition key range could have gone due to container split/merge + // need to find out overlapped feedRanges from offset + return Flux.fromIterable(rangesFromMetadataTopicOffset) + .flatMap(rangeFromOffset -> { + return ImplementationBridgeHelpers + .CosmosAsyncContainerHelper + .getCosmosAsyncContainerAccessor() + .checkFeedRangeOverlapping(cosmosAsyncContainer, rangeFromOffset, containerFeedRange) + .flatMap(overlapped -> { + if (overlapped) { + return Mono.just(rangeFromOffset); + } else { + return Mono.empty(); + } + }); + }) + .collectList() + .flatMap(overlappedFeedRangesFromOffset -> { + if (overlappedFeedRangesFromOffset.size() == 1) { + // split - use the current containerFeedRange, but construct the continuationState based on the feedRange from offset + effectiveContinuationMap.put( + containerFeedRange, + this.getContinuationStateFromOffset( + this.offsetStorageReader.getFeedRangeContinuationOffset(databaseName, containerRid, overlappedFeedRangesFromOffset.get(0)), + containerFeedRange)); + return Mono.just(effectiveContinuationMap); + } + + if (overlappedFeedRangesFromOffset.size() > 1) { + // merge - use the feed ranges from the offset + for (FeedRange overlappedRangeFromOffset : overlappedFeedRangesFromOffset) { + effectiveContinuationMap.put( + overlappedRangeFromOffset, + this.getContinuationStateFromOffset( + this.offsetStorageReader.getFeedRangeContinuationOffset(databaseName, containerRid, overlappedRangeFromOffset), + overlappedRangeFromOffset)); + } + + return Mono.just(effectiveContinuationMap); + } + + // Can not find overlapped ranges from offset, this should never happen, fail + LOGGER.error("Can not find overlapped ranges for feedRange {}", containerFeedRange); + return Mono.error(new IllegalStateException("Can not find overlapped ranges for feedRange " + containerFeedRange)); + }); + } + + private KafkaCosmosChangeFeedState getContinuationStateFromOffset( + FeedRangeContinuationTopicOffset feedRangeContinuationTopicOffset, + FeedRange feedRange) { + + KafkaCosmosChangeFeedState changeFeedState = + new KafkaCosmosChangeFeedState( + feedRangeContinuationTopicOffset.getResponseContinuation(), + feedRange, + feedRangeContinuationTopicOffset.getItemLsn()); + + return changeFeedState; + } + + private List getFeedRanges(CosmosContainerProperties containerProperties) { + return this.cosmosClient + .getDatabase(this.config.getContainersConfig().getDatabaseName()) + .getContainer(containerProperties.getId()) + .getFeedRanges() + .onErrorMap(throwable -> + CosmosExceptionsHelper.convertToConnectException( + throwable, + "GetFeedRanges failed for container " + containerProperties.getId())) + .block(); + } + + private Map getContainersTopicMap(List allContainers) { + Map topicMapFromConfig = + this.config.getContainersConfig().getContainersTopicMap() + .stream() + .map(containerTopicMapString -> containerTopicMapString.split("#")) + .collect( + Collectors.toMap( + containerTopicMapArray -> containerTopicMapArray[1], + containerTopicMapArray -> containerTopicMapArray[0])); + + Map effectiveContainersTopicMap = new HashMap<>(); + allContainers.forEach(containerProperties -> { + // by default, we are using container id as the topic name as well unless customer override through containers.topicMap + if (topicMapFromConfig.containsKey(containerProperties.getId())) { + effectiveContainersTopicMap.put( + containerProperties.getId(), + topicMapFromConfig.get(containerProperties.getId())); + } else { + effectiveContainersTopicMap.put( + containerProperties.getId(), + containerProperties.getId()); + } + }); + + return effectiveContainersTopicMap; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosAccountConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosAccountConfig.java new file mode 100644 index 000000000000..49e2f731a05a --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosAccountConfig.java @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; + +import java.util.List; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; + +public class CosmosAccountConfig { + private final String endpoint; + private final String accountKey; + private final String applicationName; + private final boolean useGatewayMode; + private final List preferredRegionsList; + + public CosmosAccountConfig( + String endpoint, + String accountKey, + String applicationName, + boolean useGatewayMode, + List preferredRegionsList) { + + checkArgument(StringUtils.isNotEmpty(endpoint), "Argument 'endpoint' should not be null"); + checkArgument(StringUtils.isNotEmpty(accountKey), "Argument 'accountKey' should not be null"); + + this.endpoint = endpoint; + this.accountKey = accountKey; + this.applicationName = applicationName; + this.useGatewayMode = useGatewayMode; + this.preferredRegionsList = preferredRegionsList; + } + + public String getEndpoint() { + return endpoint; + } + + public String getAccountKey() { + return accountKey; + } + + public String getApplicationName() { + return applicationName; + } + + public boolean isUseGatewayMode() { + return useGatewayMode; + } + + public List getPreferredRegionsList() { + return preferredRegionsList; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosClientStore.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosClientStore.java new file mode 100644 index 000000000000..40812a54500c --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosClientStore.java @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.GatewayConnectionConfig; +import com.azure.cosmos.ThrottlingRetryOptions; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; + +import java.time.Duration; + +public class CosmosClientStore { + public static CosmosAsyncClient getCosmosClient(CosmosAccountConfig accountConfig) { + if (accountConfig == null) { + return null; + } + + CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() + .endpoint(accountConfig.getEndpoint()) + .key(accountConfig.getAccountKey()) + .preferredRegions(accountConfig.getPreferredRegionsList()) + .throttlingRetryOptions( + new ThrottlingRetryOptions() + .setMaxRetryAttemptsOnThrottledRequests(Integer.MAX_VALUE) + .setMaxRetryWaitTime(Duration.ofSeconds((Integer.MAX_VALUE / 1000) - 1))) + .userAgentSuffix(getUserAgentSuffix(accountConfig)); + + if (accountConfig.isUseGatewayMode()) { + cosmosClientBuilder.gatewayMode(new GatewayConnectionConfig().setMaxConnectionPoolSize(10000)); + } + + return cosmosClientBuilder.buildAsyncClient(); + } + + private static String getUserAgentSuffix(CosmosAccountConfig accountConfig) { + if (StringUtils.isNotEmpty(accountConfig.getApplicationName())) { + return CosmosConstants.USER_AGENT_SUFFIX + "|" + accountConfig.getApplicationName(); + } + + return CosmosConstants.USER_AGENT_SUFFIX; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosConfig.java new file mode 100644 index 000000000000..4aecdff89c87 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosConfig.java @@ -0,0 +1,187 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation; + +import com.azure.cosmos.implementation.Strings; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import org.apache.kafka.common.config.AbstractConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.config.ConfigException; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Common Configuration for Cosmos DB Kafka source connector and sink connector. + */ +public class CosmosConfig extends AbstractConfig { + protected static final ConfigDef.Validator NON_EMPTY_STRING = new ConfigDef.NonEmptyString(); + private static final String CONFIG_PREFIX = "kafka.connect.cosmos."; + + // Account config + private static final String ACCOUNT_ENDPOINT_CONFIG = CONFIG_PREFIX + "accountEndpoint"; + private static final String ACCOUNT_ENDPOINT_CONFIG_DOC = "Cosmos DB Account Endpoint Uri."; + private static final String ACCOUNT_ENDPOINT_CONFIG_DISPLAY = "Cosmos DB Account Endpoint Uri."; + + private static final String ACCOUNT_KEY_CONFIG = CONFIG_PREFIX + "accountKey"; + private static final String ACCOUNT_KEY_CONFIG_DOC = "Cosmos DB Account Key."; + private static final String ACCOUNT_KEY_CONFIG_DISPLAY = "Cosmos DB Account Key."; + + private static final String USE_GATEWAY_MODE = CONFIG_PREFIX + "useGatewayMode"; + private static final String USE_GATEWAY_MODE_DOC = "Flag to indicate whether to use gateway mode. By default it is false."; + private static final String USE_GATEWAY_MODE_DISPLAY = "Use gateway mode."; + private static final boolean DEFAULT_USE_GATEWAY_MODE = false; + + private static final String PREFERRED_REGIONS_LIST = CONFIG_PREFIX + "preferredRegionsList"; + private static final String PREFERRED_REGIONS_LIST_DOC = "Preferred regions list to be used for a multi region Cosmos DB account. " + + "This is a comma separated value (e.g., `[East US, West US]` or `East US, West US`) provided preferred regions will be used as hint. " + + "You should use a collocated kafka cluster with your Cosmos DB account and pass the kafka cluster region as preferred region. " + + "See list of azure regions [here](https://docs.microsoft.com/dotnet/api/microsoft.azure.documents.locationnames?view=azure-dotnet&preserve-view=true)."; + private static final String PREFERRED_REGIONS_LIST_DISPLAY = "Preferred regions list."; + + private static final String APPLICATION_NAME = CONFIG_PREFIX + "applicationName"; + private static final String APPLICATION_NAME_DOC = "Application name. Will be added as the userAgent suffix."; + private static final String APPLICATION_NAME_DISPLAY = "Application name."; + + private final CosmosAccountConfig accountConfig; + + public CosmosConfig(ConfigDef config, Map parsedConfig) { + super(config, parsedConfig); + this.accountConfig = this.parseAccountConfig(); + } + + private CosmosAccountConfig parseAccountConfig() { + String endpoint = this.getString(ACCOUNT_ENDPOINT_CONFIG); + String accountKey = this.getPassword(ACCOUNT_KEY_CONFIG).value(); + String applicationName = this.getString(APPLICATION_NAME); + boolean useGatewayMode = this.getBoolean(USE_GATEWAY_MODE); + List preferredRegionList = this.getPreferredRegionList(); + + return new CosmosAccountConfig( + endpoint, + accountKey, + applicationName, + useGatewayMode, + preferredRegionList); + } + + private List getPreferredRegionList() { + return convertToList(this.getString(PREFERRED_REGIONS_LIST)); + } + + public static ConfigDef getConfigDef() { + ConfigDef configDef = new ConfigDef(); + + defineAccountConfig(configDef); + + return configDef; + } + + private static void defineAccountConfig(ConfigDef result) { + final String accountGroupName = "account"; + int accountGroupOrder = 0; + + // For optional config, need to provide a default value + result + .define( + ACCOUNT_ENDPOINT_CONFIG, + ConfigDef.Type.STRING, + ConfigDef.NO_DEFAULT_VALUE, + new AccountEndpointValidator(), + ConfigDef.Importance.HIGH, + ACCOUNT_ENDPOINT_CONFIG_DOC, + accountGroupName, + accountGroupOrder++, + ConfigDef.Width.LONG, + ACCOUNT_ENDPOINT_CONFIG_DISPLAY + ) + .define( + ACCOUNT_KEY_CONFIG, + ConfigDef.Type.PASSWORD, + ConfigDef.NO_DEFAULT_VALUE, + ConfigDef.Importance.HIGH, + ACCOUNT_KEY_CONFIG_DOC, + accountGroupName, + accountGroupOrder++, + ConfigDef.Width.LONG, + ACCOUNT_KEY_CONFIG_DISPLAY + ) + .define( + APPLICATION_NAME, + ConfigDef.Type.STRING, + Strings.Emtpy, + ConfigDef.Importance.MEDIUM, + APPLICATION_NAME_DOC, + accountGroupName, + accountGroupOrder++, + ConfigDef.Width.LONG, + APPLICATION_NAME_DISPLAY + ) + .define( + USE_GATEWAY_MODE, + ConfigDef.Type.BOOLEAN, + DEFAULT_USE_GATEWAY_MODE, + ConfigDef.Importance.LOW, + USE_GATEWAY_MODE_DOC, + accountGroupName, + accountGroupOrder++, + ConfigDef.Width.MEDIUM, + USE_GATEWAY_MODE_DISPLAY + ) + .define( + PREFERRED_REGIONS_LIST, + ConfigDef.Type.STRING, + Strings.Emtpy, + ConfigDef.Importance.HIGH, + PREFERRED_REGIONS_LIST_DOC, + accountGroupName, + accountGroupOrder++, + ConfigDef.Width.LONG, + PREFERRED_REGIONS_LIST_DISPLAY + ); + } + + public CosmosAccountConfig getAccountConfig() { + return accountConfig; + } + + public static class AccountEndpointValidator implements ConfigDef.Validator { + @Override + @SuppressWarnings("unchecked") + public void ensureValid(String name, Object o) { + String accountEndpointUriString = (String) o; + if (StringUtils.isEmpty(accountEndpointUriString)) { + throw new ConfigException(name, o, "Account endpoint can not be empty"); + } + + try { + new URL(accountEndpointUriString); + } catch (MalformedURLException e) { + throw new ConfigException(name, o, "Invalid account endpoint."); + } + } + + @Override + public String toString() { + return "Account endpoint"; + } + } + + protected static List convertToList(String configValue) { + if (StringUtils.isNotEmpty(configValue)) { + if (configValue.startsWith("[") && configValue.endsWith("]")) { + configValue = configValue.substring(1, configValue.length() - 1); + } + + return Arrays.stream(configValue.split(",")).map(String::trim).collect(Collectors.toList()); + } + + return new ArrayList<>(); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosConstants.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosConstants.java new file mode 100644 index 000000000000..afeac46866b9 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosConstants.java @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation; + +import com.azure.core.util.CoreUtils; + +public class CosmosConstants { + public static final String PROPERTIES_FILE_NAME = "azure-cosmos-kafka-connect.properties"; + public static final String CURRENT_VERSION = CoreUtils.getProperties(PROPERTIES_FILE_NAME).get("version"); + public static final String CURRENT_NAME = CoreUtils.getProperties(PROPERTIES_FILE_NAME).get("name"); + public static final String USER_AGENT_SUFFIX = String.format("KafkaConnect/%s/%s", CURRENT_NAME, CURRENT_VERSION); +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosExceptionsHelper.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosExceptionsHelper.java new file mode 100644 index 000000000000..90af873d622c --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/CosmosExceptionsHelper.java @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation; + +import com.azure.cosmos.CosmosException; +import org.apache.kafka.connect.errors.ConnectException; +import org.apache.kafka.connect.errors.RetriableException; + +public class CosmosExceptionsHelper { + public static boolean isTransientFailure(int statusCode, int substatusCode) { + return statusCode == KafkaCosmosConstants.StatusCodes.GONE + || statusCode == KafkaCosmosConstants.StatusCodes.SERVICE_UNAVAILABLE + || statusCode == KafkaCosmosConstants.StatusCodes.INTERNAL_SERVER_ERROR + || statusCode == KafkaCosmosConstants.StatusCodes.REQUEST_TIMEOUT + || (statusCode == KafkaCosmosConstants.StatusCodes.NOTFOUND && substatusCode == KafkaCosmosConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE); + + } + + public static boolean isTransientFailure(Throwable e) { + if (e instanceof CosmosException) { + return isTransientFailure(((CosmosException) e).getStatusCode(), ((CosmosException) e).getSubStatusCode()); + } + + return false; + } + + public static boolean isFeedRangeGoneException(Throwable throwable) { + if (throwable instanceof CosmosException) { + return isFeedRangeGoneException( + ((CosmosException) throwable).getStatusCode(), + ((CosmosException) throwable).getSubStatusCode()); + } + + return false; + } + + public static boolean isFeedRangeGoneException(int statusCode, int substatusCode) { + return statusCode == KafkaCosmosConstants.StatusCodes.GONE + && (substatusCode == KafkaCosmosConstants.SubStatusCodes.PARTITION_KEY_RANGE_GONE + || substatusCode == KafkaCosmosConstants.SubStatusCodes.COMPLETING_SPLIT_OR_MERGE); + } + + public static ConnectException convertToConnectException(Throwable throwable, String message) { + if (CosmosExceptionsHelper.isTransientFailure(throwable)) { + return new RetriableException(message, throwable); + } + + return new ConnectException(message, throwable); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/KafkaCosmosConstants.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/KafkaCosmosConstants.java new file mode 100644 index 000000000000..e2d80b719e4e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/KafkaCosmosConstants.java @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation; + +public class KafkaCosmosConstants { + + public static class StatusCodes { + public static final int NOTFOUND = 404; + public static final int REQUEST_TIMEOUT = 408; + public static final int GONE = 410; + + public static final int SERVICE_UNAVAILABLE = 503; + public static final int INTERNAL_SERVER_ERROR = 500; + } + + public static class SubStatusCodes { + public static final int READ_SESSION_NOT_AVAILABLE = 1002; + public static final int PARTITION_KEY_RANGE_GONE = 1002; + public static final int COMPLETING_SPLIT_OR_MERGE = 1007; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ContainersMetadataTopicOffset.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ContainersMetadataTopicOffset.java new file mode 100644 index 000000000000..10d7885bbde2 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ContainersMetadataTopicOffset.java @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.Utils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +/** + * Containers metadata topic offset. + */ +public class ContainersMetadataTopicOffset { + public static final String CONTAINERS_RESOURCE_IDS_NAME_KEY = "cosmos.source.metadata.containerRids"; + public static final ObjectMapper OBJECT_MAPPER = Utils.getSimpleObjectMapper(); + + private final List containerRids; + public ContainersMetadataTopicOffset(List containerRids) { + checkNotNull(containerRids, "Argument 'containerRids' can not be null"); + this.containerRids = containerRids; + } + + public List getContainerRids() { + return containerRids; + } + + public static Map toMap(ContainersMetadataTopicOffset offset) { + Map map = new HashMap<>(); + try { + map.put( + CONTAINERS_RESOURCE_IDS_NAME_KEY, + OBJECT_MAPPER.writeValueAsString(offset.getContainerRids())); + return map; + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + @SuppressWarnings("unchecked") + public static ContainersMetadataTopicOffset fromMap(Map offsetMap) { + if (offsetMap == null) { + return null; + } + + try { + List containerRids = + OBJECT_MAPPER + .readValue(offsetMap.get(CONTAINERS_RESOURCE_IDS_NAME_KEY).toString(), new TypeReference>() {}); + return new ContainersMetadataTopicOffset(containerRids); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ContainersMetadataTopicPartition.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ContainersMetadataTopicPartition.java new file mode 100644 index 000000000000..b2ae0e6de93e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ContainersMetadataTopicPartition.java @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; + +import java.util.HashMap; +import java.util.Map; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; + +public class ContainersMetadataTopicPartition { + public static final String DATABASE_NAME_KEY = "cosmos.source.metadata.database.name"; + + private final String databaseName; + + public ContainersMetadataTopicPartition(String databaseName) { + checkArgument(StringUtils.isNotEmpty(databaseName), "Argument 'databaseName' can not be null"); + + this.databaseName = databaseName; + } + + public String getDatabaseName() { + return databaseName; + } + + public static Map toMap(ContainersMetadataTopicPartition topicPartition) { + Map map = new HashMap<>(); + map.put(DATABASE_NAME_KEY, topicPartition.getDatabaseName()); + return map; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosChangeFeedModes.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosChangeFeedModes.java new file mode 100644 index 000000000000..3753001f3e44 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosChangeFeedModes.java @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +public enum CosmosChangeFeedModes { + LATEST_VERSION("LatestVersion"), + ALL_VERSION_AND_DELETES("AllVersionsAndDeletes"); + + private final String name; + CosmosChangeFeedModes(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public static CosmosChangeFeedModes fromName(String name) { + for (CosmosChangeFeedModes mode : CosmosChangeFeedModes.values()) { + if (mode.getName().equals(name)) { + return mode; + } + } + return null; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosChangeFeedStartFromModes.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosChangeFeedStartFromModes.java new file mode 100644 index 000000000000..439df9f826a5 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosChangeFeedStartFromModes.java @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +public enum CosmosChangeFeedStartFromModes { + BEGINNING("Beginning"), + NOW("Now"), + POINT_IN_TIME("PointInTime"); + + private final String name; + CosmosChangeFeedStartFromModes(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public static CosmosChangeFeedStartFromModes fromName(String name) { + for (CosmosChangeFeedStartFromModes startFromModes : CosmosChangeFeedStartFromModes.values()) { + if (startFromModes.getName().equals(name)) { + return startFromModes; + } + } + return null; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosMetadataConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosMetadataConfig.java new file mode 100644 index 000000000000..91f54088cc31 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosMetadataConfig.java @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; + +public class CosmosMetadataConfig { + private final int metadataPollDelayInMs; + private final String metadataTopicName; + + public CosmosMetadataConfig(int metadataPollDelayInMs, String metadataTopicName) { + checkArgument(StringUtils.isNotEmpty(metadataTopicName), "Argument 'metadataTopicName' can not be null"); + checkArgument(metadataPollDelayInMs > 0, "Argument 'metadataPollDelayInMs' should be larger than 0"); + + this.metadataPollDelayInMs = metadataPollDelayInMs; + this.metadataTopicName = metadataTopicName; + } + + public int getMetadataPollDelayInMs() { + return metadataPollDelayInMs; + } + + public String getMetadataTopicName() { + return metadataTopicName; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceChangeFeedConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceChangeFeedConfig.java new file mode 100644 index 000000000000..cff4305fcaa2 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceChangeFeedConfig.java @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import java.time.Instant; + +public class CosmosSourceChangeFeedConfig { + private final CosmosChangeFeedModes changeFeedModes; + private final CosmosChangeFeedStartFromModes changeFeedStartFromModes; + private final Instant startFrom; + private final int maxItemCountHint; + + public CosmosSourceChangeFeedConfig( + CosmosChangeFeedModes changeFeedModes, + CosmosChangeFeedStartFromModes changeFeedStartFromModes, + Instant startFrom, + int maxItemCountHint) { + this.changeFeedModes = changeFeedModes; + this.changeFeedStartFromModes = changeFeedStartFromModes; + this.startFrom = startFrom; + this.maxItemCountHint = maxItemCountHint; + } + + public CosmosChangeFeedModes getChangeFeedModes() { + return changeFeedModes; + } + + public CosmosChangeFeedStartFromModes getChangeFeedStartFromModes() { + return changeFeedStartFromModes; + } + + public Instant getStartFrom() { + return startFrom; + } + + public int getMaxItemCountHint() { + return maxItemCountHint; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceConfig.java new file mode 100644 index 000000000000..1ede731b5498 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceConfig.java @@ -0,0 +1,472 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.Strings; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.kafka.connect.implementation.CosmosConfig; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.config.ConfigException; + +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Map; + +/** + * Common Configuration for Cosmos DB Kafka source connector. + */ +public class CosmosSourceConfig extends CosmosConfig { + + // configuration only targets to source connector + private static final String SOURCE_CONFIG_PREFIX = "kafka.connect.cosmos.source."; + + // database name + private static final String DATABASE_NAME_CONF = SOURCE_CONFIG_PREFIX + "database.name"; + private static final String DATABASE_NAME_CONF_DOC = "Cosmos DB database name."; + private static final String DATABASE_NAME_CONF_DISPLAY = "Cosmos DB database name."; + + // Source containers config + private static final String CONTAINERS_INCLUDE_ALL_CONFIG = SOURCE_CONFIG_PREFIX + "containers.includeAll"; + private static final String CONTAINERS_INCLUDE_ALL_CONFIG_DOC = "Flag to indicate whether reading from all containers."; + private static final String CONTAINERS_INCLUDE_ALL_CONFIG_DISPLAY = "Include all containers."; + private static final boolean DEFAULT_CONTAINERS_INCLUDE_ALL = false; + + private static final String CONTAINERS_INCLUDED_LIST_CONFIG = SOURCE_CONFIG_PREFIX + "containers.includedList"; + private static final String CONTAINERS_INCLUDED_LIST_CONFIG_DOC = + "Containers included. This config will be ignored if kafka.connect.cosmos.source.includeAllContainers is true."; + private static final String CONTAINERS_INCLUDED_LIST_CONFIG_DISPLAY = "Containers included."; + + private static final String CONTAINERS_TOPIC_MAP_CONFIG = SOURCE_CONFIG_PREFIX + "containers.topicMap"; + private static final String CONTAINERS_TOPIC_MAP_CONFIG_DOC = + "A comma delimited list of Kafka topics mapped to Cosmos containers. For example: topic1#con1,topic2#con2. " + + "By default, container name is used as the name of the kafka topic to publish data to, " + + "can use this property to override the default config "; + private static final String CONTAINERS_TOPIC_MAP_CONFIG_DISPLAY = "Cosmos container topic map."; + + // changeFeed config + private static final String CHANGE_FEED_START_FROM_CONFIG = SOURCE_CONFIG_PREFIX + "changeFeed.startFrom"; + private static final String CHANGE_FEED_START_FROM_CONFIG_DOC = "ChangeFeed Start from settings (Now, Beginning " + + "or a certain point in time (UTC) for example 2020-02-10T14:15:03) - the default value is 'Beginning'. "; + private static final String CHANGE_FEED_START_FROM_CONFIG_DISPLAY = "Change feed start from."; + private static final String DEFAULT_CHANGE_FEED_START_FROM = CosmosChangeFeedStartFromModes.BEGINNING.getName(); + + private static final String CHANGE_FEED_MODE_CONFIG = SOURCE_CONFIG_PREFIX + "changeFeed.mode"; + private static final String CHANGE_FEED_MODE_CONFIG_DOC = "ChangeFeed mode (LatestVersion or AllVersionsAndDeletes)"; + private static final String CHANGE_FEED_MODE_CONFIG_DISPLAY = "ChangeFeed mode (LatestVersion or AllVersionsAndDeletes)"; + private static final String DEFAULT_CHANGE_FEED_MODE = CosmosChangeFeedModes.LATEST_VERSION.getName(); + + private static final String CHANGE_FEED_MAX_ITEM_COUNT_CONFIG = SOURCE_CONFIG_PREFIX + "changeFeed.maxItemCountHint"; + private static final String CHANGE_FEED_MAX_ITEM_COUNT_CONFIG_DOC = + "The maximum number of documents returned in a single change feed request." + + " But the number of items received might be higher than the specified value if multiple items are changed by the same transaction." + + " The default is 1000."; + private static final String CHANGE_FEED_MAX_ITEM_COUNT_CONFIG_DISPLAY = "The maximum number hint of documents returned in a single request. "; + private static final int DEFAULT_CHANGE_FEED_MAX_ITEM_COUNT = 1000; + + // Metadata config + private static final String METADATA_POLL_DELAY_MS_CONFIG = SOURCE_CONFIG_PREFIX + "metadata.poll.delay.ms"; + private static final String METADATA_POLL_DELAY_MS_CONFIG_DOC = + "Indicates how often to check the metadata changes (including container split/merge, adding/removing/recreated containers). " + + "When changes are detected, it will reconfigure the tasks. Default is 5 minutes."; + private static final String METADATA_POLL_DELAY_MS_CONFIG_DISPLAY = "Metadata polling delay in ms."; + private static final int DEFAULT_METADATA_POLL_DELAY_MS = 5 * 60 * 1000; // default is every 5 minutes + + private static final String METADATA_STORAGE_TOPIC_CONFIG = SOURCE_CONFIG_PREFIX + "metadata.storage.topic"; + private static final String METADATA_STORAGE_TOPIC_CONFIG_DOC = "The name of the topic where the metadata are stored. " + + "The metadata topic will be created if it does not already exist, else it will use the pre-created topic."; + private static final String METADATA_STORAGE_TOPIC_CONFIG_DISPLAY = "Metadata storage topic."; + private static final String DEFAULT_METADATA_STORAGE_TOPIC = "_cosmos.metadata.topic"; + + // messageKey + private static final String MESSAGE_KEY_ENABLED_CONF = SOURCE_CONFIG_PREFIX + "messageKey.enabled"; + private static final String MESSAGE_KEY_ENABLED_CONF_DOC = "Whether to set the kafka record message key."; + private static final String MESSAGE_KEY_ENABLED_CONF_DISPLAY = "Kafka record message key enabled."; + private static final boolean DEFAULT_MESSAGE_KEY_ENABLED = true; + + private static final String MESSAGE_KEY_FIELD_CONFIG = SOURCE_CONFIG_PREFIX + "messageKey.field"; + private static final String MESSAGE_KEY_FIELD_CONFIG_DOC = "The field to use as the message key."; + private static final String MESSAGE_KEY_FIELD_CONFIG_DISPLAY = "Kafka message key field."; + private static final String DEFAULT_MESSAGE_KEY_FIELD = "id"; // TODO: should we use pk instead? + + private final CosmosSourceContainersConfig containersConfig; + private final CosmosMetadataConfig metadataConfig; + private final CosmosSourceChangeFeedConfig changeFeedConfig; + private final CosmosSourceMessageKeyConfig messageKeyConfig; + + public CosmosSourceConfig(Map parsedConfigs) { + this(getConfigDef(), parsedConfigs); + } + + public CosmosSourceConfig(ConfigDef configDef, Map parsedConfigs) { + super(configDef, parsedConfigs); + this.containersConfig = this.parseContainersConfig(); + this.metadataConfig = this.parseMetadataConfig(); + this.changeFeedConfig = this.parseChangeFeedConfig(); + this.messageKeyConfig = this.parseMessageKeyConfig(); + } + + public static ConfigDef getConfigDef() { + ConfigDef configDef = CosmosConfig.getConfigDef(); + + defineContainersConfig(configDef); + defineMetadataConfig(configDef); + defineChangeFeedConfig(configDef); + defineMessageKeyConfig(configDef); + + return configDef; + } + + private static void defineContainersConfig(ConfigDef result) { + final String containersGroupName = "Containers"; + int containersGroupOrder = 0; + + result + .define( + DATABASE_NAME_CONF, + ConfigDef.Type.STRING, + ConfigDef.NO_DEFAULT_VALUE, + NON_EMPTY_STRING, + ConfigDef.Importance.HIGH, + DATABASE_NAME_CONF_DOC, + containersGroupName, + containersGroupOrder++, + ConfigDef.Width.LONG, + DATABASE_NAME_CONF_DISPLAY + ) + .define( + CONTAINERS_INCLUDE_ALL_CONFIG, + ConfigDef.Type.BOOLEAN, + DEFAULT_CONTAINERS_INCLUDE_ALL, + ConfigDef.Importance.HIGH, + CONTAINERS_INCLUDE_ALL_CONFIG_DOC, + containersGroupName, + containersGroupOrder++, + ConfigDef.Width.MEDIUM, + CONTAINERS_INCLUDE_ALL_CONFIG_DISPLAY + ) + .define( + CONTAINERS_INCLUDED_LIST_CONFIG, + ConfigDef.Type.STRING, + Strings.Emtpy, + ConfigDef.Importance.MEDIUM, + CONTAINERS_INCLUDED_LIST_CONFIG_DOC, + containersGroupName, + containersGroupOrder++, + ConfigDef.Width.LONG, + CONTAINERS_INCLUDED_LIST_CONFIG_DISPLAY + ) + .define( + CONTAINERS_TOPIC_MAP_CONFIG, + ConfigDef.Type.STRING, + Strings.Emtpy, + new ContainersTopicMapValidator(), + ConfigDef.Importance.MEDIUM, + CONTAINERS_TOPIC_MAP_CONFIG_DOC, + containersGroupName, + containersGroupOrder++, + ConfigDef.Width.LONG, + CONTAINERS_TOPIC_MAP_CONFIG_DISPLAY + ); + } + + private static void defineMetadataConfig(ConfigDef result) { + final String metadataGroupName = "Metadata"; + int metadataGroupOrder = 0; + + result + .define( + METADATA_POLL_DELAY_MS_CONFIG, + ConfigDef.Type.INT, + DEFAULT_METADATA_POLL_DELAY_MS, + new PositiveValueValidator(), + ConfigDef.Importance.MEDIUM, + METADATA_POLL_DELAY_MS_CONFIG_DOC, + metadataGroupName, + metadataGroupOrder++, + ConfigDef.Width.MEDIUM, + METADATA_POLL_DELAY_MS_CONFIG_DISPLAY + ) + .define( + METADATA_STORAGE_TOPIC_CONFIG, + ConfigDef.Type.STRING, + DEFAULT_METADATA_STORAGE_TOPIC, + NON_EMPTY_STRING, + ConfigDef.Importance.HIGH, + METADATA_STORAGE_TOPIC_CONFIG_DOC, + metadataGroupName, + metadataGroupOrder++, + ConfigDef.Width.LONG, + METADATA_STORAGE_TOPIC_CONFIG_DISPLAY + ); + } + + private static void defineChangeFeedConfig(ConfigDef result) { + final String changeFeedGroupName = "ChangeFeed"; + int changeFeedGroupOrder = 0; + + result + .define( + CHANGE_FEED_MODE_CONFIG, + ConfigDef.Type.STRING, + DEFAULT_CHANGE_FEED_MODE, + new ChangeFeedModeValidator(), + ConfigDef.Importance.HIGH, + CHANGE_FEED_MODE_CONFIG_DOC, + changeFeedGroupName, + changeFeedGroupOrder++, + ConfigDef.Width.MEDIUM, + CHANGE_FEED_MODE_CONFIG_DISPLAY + ) + .define( + CHANGE_FEED_START_FROM_CONFIG, + ConfigDef.Type.STRING, + DEFAULT_CHANGE_FEED_START_FROM, + new ChangeFeedStartFromValidator(), + ConfigDef.Importance.HIGH, + CHANGE_FEED_START_FROM_CONFIG_DOC, + changeFeedGroupName, + changeFeedGroupOrder++, + ConfigDef.Width.MEDIUM, + CHANGE_FEED_START_FROM_CONFIG_DISPLAY + ) + .define( + CHANGE_FEED_MAX_ITEM_COUNT_CONFIG, + ConfigDef.Type.INT, + DEFAULT_CHANGE_FEED_MAX_ITEM_COUNT, + new PositiveValueValidator(), + ConfigDef.Importance.MEDIUM, + CHANGE_FEED_MAX_ITEM_COUNT_CONFIG_DOC, + changeFeedGroupName, + changeFeedGroupOrder++, + ConfigDef.Width.MEDIUM, + CHANGE_FEED_MAX_ITEM_COUNT_CONFIG_DISPLAY + ); + } + + private static void defineMessageKeyConfig(ConfigDef result) { + final String messageGroupName = "Message Key"; + int messageGroupOrder = 0; + + result + .define( + MESSAGE_KEY_ENABLED_CONF, + ConfigDef.Type.BOOLEAN, + DEFAULT_MESSAGE_KEY_ENABLED, + ConfigDef.Importance.MEDIUM, + MESSAGE_KEY_ENABLED_CONF_DOC, + messageGroupName, + messageGroupOrder++, + ConfigDef.Width.SHORT, + MESSAGE_KEY_ENABLED_CONF_DISPLAY + ) + .define( + MESSAGE_KEY_FIELD_CONFIG, + ConfigDef.Type.STRING, + DEFAULT_MESSAGE_KEY_FIELD, + ConfigDef.Importance.HIGH, + MESSAGE_KEY_FIELD_CONFIG_DOC, + messageGroupName, + messageGroupOrder++, + ConfigDef.Width.MEDIUM, + MESSAGE_KEY_FIELD_CONFIG_DISPLAY + ); + } + + private CosmosSourceContainersConfig parseContainersConfig() { + String databaseName = this.getString(DATABASE_NAME_CONF); + boolean includeAllContainers = this.getBoolean(CONTAINERS_INCLUDE_ALL_CONFIG); + List containersIncludedList = this.getContainersIncludedList(); + List containersTopicMap = this.getContainersTopicMap(); + + return new CosmosSourceContainersConfig( + databaseName, + includeAllContainers, + containersIncludedList, + containersTopicMap + ); + } + + private List getContainersIncludedList() { + return convertToList(this.getString(CONTAINERS_INCLUDED_LIST_CONFIG)); + } + + private List getContainersTopicMap() { + return convertToList(this.getString(CONTAINERS_TOPIC_MAP_CONFIG)); + } + + private CosmosMetadataConfig parseMetadataConfig() { + int metadataPollDelayInMs = this.getInt(METADATA_POLL_DELAY_MS_CONFIG); + String metadataTopicName = this.getString(METADATA_STORAGE_TOPIC_CONFIG); + + return new CosmosMetadataConfig(metadataPollDelayInMs, metadataTopicName); + } + + private CosmosSourceChangeFeedConfig parseChangeFeedConfig() { + CosmosChangeFeedModes changeFeedModes = this.parseChangeFeedMode(); + CosmosChangeFeedStartFromModes changeFeedStartFromMode = this.parseChangeFeedStartFromMode(); + Instant changeFeedStartFrom = this.parseChangeFeedStartFrom(changeFeedStartFromMode); + Integer changeFeedMaxItemCountHint = this.getInt(CHANGE_FEED_MAX_ITEM_COUNT_CONFIG); + + return new CosmosSourceChangeFeedConfig( + changeFeedModes, + changeFeedStartFromMode, + changeFeedStartFrom, + changeFeedMaxItemCountHint); + } + + private CosmosSourceMessageKeyConfig parseMessageKeyConfig() { + boolean messageKeyEnabled = this.getBoolean(MESSAGE_KEY_ENABLED_CONF); + String messageKeyField = this.getString(MESSAGE_KEY_FIELD_CONFIG); + + return new CosmosSourceMessageKeyConfig(messageKeyEnabled, messageKeyField); + } + private CosmosChangeFeedStartFromModes parseChangeFeedStartFromMode() { + String changeFeedStartFrom = this.getString(CHANGE_FEED_START_FROM_CONFIG); + if (changeFeedStartFrom.equalsIgnoreCase(CosmosChangeFeedStartFromModes.BEGINNING.getName())) { + return CosmosChangeFeedStartFromModes.BEGINNING; + } + + if (changeFeedStartFrom.equalsIgnoreCase(CosmosChangeFeedStartFromModes.NOW.getName())) { + return CosmosChangeFeedStartFromModes.NOW; + } + + return CosmosChangeFeedStartFromModes.POINT_IN_TIME; + } + + private Instant parseChangeFeedStartFrom(CosmosChangeFeedStartFromModes startFromMode) { + if (startFromMode == CosmosChangeFeedStartFromModes.POINT_IN_TIME) { + String changeFeedStartFrom = this.getString(CHANGE_FEED_START_FROM_CONFIG); + return Instant.from(DateTimeFormatter.ISO_INSTANT.parse(changeFeedStartFrom.trim())); + } + + return null; + } + + private CosmosChangeFeedModes parseChangeFeedMode() { + String changeFeedMode = this.getString(CHANGE_FEED_MODE_CONFIG); + return CosmosChangeFeedModes.fromName(changeFeedMode); + } + + public CosmosSourceContainersConfig getContainersConfig() { + return containersConfig; + } + + public CosmosMetadataConfig getMetadataConfig() { + return metadataConfig; + } + + public CosmosSourceChangeFeedConfig getChangeFeedConfig() { + return changeFeedConfig; + } + + public CosmosSourceMessageKeyConfig getMessageKeyConfig() { + return messageKeyConfig; + } + + public static class ContainersTopicMapValidator implements ConfigDef.Validator { + private static final String INVALID_TOPIC_MAP_FORMAT = + "Invalid entry for topic-container map. The topic-container map should be a comma-delimited " + + "list of Kafka topic to Cosmos containers. Each mapping should be a pair of Kafka " + + "topic and Cosmos container separated by '#'. For example: topic1#con1,topic2#con2."; + + @Override + @SuppressWarnings("unchecked") + public void ensureValid(String name, Object o) { + String configValue = (String) o; + if (StringUtils.isEmpty(configValue)) { + return; + } + + List containerTopicMapList = convertToList(configValue); + + // validate each item should be in topic#container format + boolean invalidFormatExists = + containerTopicMapList + .stream() + .anyMatch(containerTopicMap -> + containerTopicMap + .split(CosmosSourceContainersConfig.CONTAINER_TOPIC_MAP_SEPARATOR) + .length != 2); + + if (invalidFormatExists) { + throw new ConfigException(name, o, INVALID_TOPIC_MAP_FORMAT); + } + } + + @Override + public String toString() { + return "Containers topic map"; + } + } + + public static class ChangeFeedModeValidator implements ConfigDef.Validator { + @Override + @SuppressWarnings("unchecked") + public void ensureValid(String name, Object o) { + String changeFeedModeString = (String) o; + if (StringUtils.isEmpty(changeFeedModeString)) { + throw new ConfigException(name, o, "ChangeFeedMode can not be empty or null"); + } + + CosmosChangeFeedModes changeFeedMode = CosmosChangeFeedModes.fromName(changeFeedModeString); + if (changeFeedMode == null) { + throw new ConfigException(name, o, "Invalid ChangeFeedMode, only allow LatestVersion or AllVersionsAndDeletes"); + } + } + + @Override + public String toString() { + return "ChangeFeedMode. Only allow " + CosmosChangeFeedModes.values(); + } + } + + public static class ChangeFeedStartFromValidator implements ConfigDef.Validator { + @Override + @SuppressWarnings("unchecked") + public void ensureValid(String name, Object o) { + String changeFeedStartFromString = (String) o; + if (StringUtils.isEmpty(changeFeedStartFromString)) { + throw new ConfigException(name, o, "ChangeFeedStartFrom can not be empty or null"); + } + + CosmosChangeFeedStartFromModes changeFeedStartFromModes = + CosmosChangeFeedStartFromModes.fromName(changeFeedStartFromString); + if (changeFeedStartFromModes == null) { + try { + Instant.parse(changeFeedStartFromString); + } catch (DateTimeParseException dateTimeParseException) { + throw new ConfigException( + name, + o, + "Invalid changeFeedStartFrom." + + " only allow Now, Beginning or a certain point in time (UTC) for example 2020-02-10T14:15:03 "); + } + } + } + + @Override + public String toString() { + return "ChangeFeedStartFrom. Only allow Now, Beginning or a certain point in time (UTC) for example 2020-02-10T14:15:03"; + } + } + + public static class PositiveValueValidator implements ConfigDef.Validator { + @Override + @SuppressWarnings("unchecked") + public void ensureValid(String name, Object o) { + int value = Integer.parseInt(o.toString()); + + if (value <= 0) { + throw new ConfigException(name, o, "Invalid value, need to be >= 0"); + } + } + + @Override + public String toString() { + return "Value need to be >= 0"; + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceContainersConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceContainersConfig.java new file mode 100644 index 000000000000..2d61b49fedb0 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceContainersConfig.java @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; + +import java.util.List; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +public class CosmosSourceContainersConfig { + public static final String CONTAINER_TOPIC_MAP_SEPARATOR = "#"; + + private final String databaseName; + private final boolean includeAllContainers; + private final List includedContainers; + private final List containersTopicMap; + + public CosmosSourceContainersConfig( + String databaseName, + boolean includeAllContainers, + List includedContainers, + List containersTopicMap) { + + checkArgument(StringUtils.isNotEmpty(databaseName), "Argument 'databaseName' can not be null"); + checkNotNull(includedContainers, "Argument 'includedContainers' can not be null"); + + this.databaseName = databaseName; + this.includeAllContainers = includeAllContainers; + this.includedContainers = includedContainers; + this.containersTopicMap = containersTopicMap; + } + + public String getDatabaseName() { + return databaseName; + } + + public boolean isIncludeAllContainers() { + return includeAllContainers; + } + + public List getIncludedContainers() { + return includedContainers; + } + + public List getContainersTopicMap() { + return containersTopicMap; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceMessageKeyConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceMessageKeyConfig.java new file mode 100644 index 000000000000..82c59531d059 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceMessageKeyConfig.java @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +public class CosmosSourceMessageKeyConfig { + private final boolean messageKeyEnabled; + private final String messageKeyField; + + public CosmosSourceMessageKeyConfig(boolean messageKeyEnabled, String messageKeyField) { + this.messageKeyEnabled = messageKeyEnabled; + this.messageKeyField = messageKeyField; + } + + public boolean isMessageKeyEnabled() { + return messageKeyEnabled; + } + + public String getMessageKeyField() { + return messageKeyField; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceOffsetStorageReader.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceOffsetStorageReader.java new file mode 100644 index 000000000000..073c9edc30d7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceOffsetStorageReader.java @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.models.FeedRange; +import org.apache.kafka.connect.storage.OffsetStorageReader; + +import java.util.Map; + +public class CosmosSourceOffsetStorageReader { + private final OffsetStorageReader offsetStorageReader; + + public CosmosSourceOffsetStorageReader(OffsetStorageReader offsetStorageReader) { + this.offsetStorageReader = offsetStorageReader; + } + + public FeedRangesMetadataTopicOffset getFeedRangesMetadataOffset(String databaseName, String containerRid) { + Map topicOffsetMap = + this.offsetStorageReader + .offset( + FeedRangesMetadataTopicPartition.toMap( + new FeedRangesMetadataTopicPartition(databaseName, containerRid))); + + return FeedRangesMetadataTopicOffset.fromMap(topicOffsetMap); + } + + public ContainersMetadataTopicOffset getContainersMetadataOffset(String databaseName) { + Map topicOffsetMap = + this.offsetStorageReader + .offset( + ContainersMetadataTopicPartition.toMap( + new ContainersMetadataTopicPartition(databaseName))); + + return ContainersMetadataTopicOffset.fromMap(topicOffsetMap); + } + + public FeedRangeContinuationTopicOffset getFeedRangeContinuationOffset( + String databaseName, + String collectionRid, + FeedRange feedRange) { + + Map topicOffsetMap = + this.offsetStorageReader + .offset( + FeedRangeContinuationTopicPartition.toMap( + new FeedRangeContinuationTopicPartition(databaseName, collectionRid, feedRange))); + + return FeedRangeContinuationTopicOffset.fromMap(topicOffsetMap); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTask.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTask.java new file mode 100644 index 000000000000..7fc6eeef822b --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTask.java @@ -0,0 +1,334 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; +import com.azure.cosmos.implementation.guava25.base.Stopwatch; +import com.azure.cosmos.kafka.connect.implementation.CosmosClientStore; +import com.azure.cosmos.kafka.connect.implementation.CosmosConstants; +import com.azure.cosmos.kafka.connect.implementation.CosmosExceptionsHelper; +import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; +import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.FeedResponse; +import com.azure.cosmos.models.ModelBridgeInternal; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaAndValue; +import org.apache.kafka.connect.source.SourceRecord; +import org.apache.kafka.connect.source.SourceTask; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.stream.Collectors; + +public class CosmosSourceTask extends SourceTask { + private static final Logger LOGGER = LoggerFactory.getLogger(CosmosSourceTask.class); + private static final String LSN_ATTRIBUTE_NAME = "_lsn"; + + private CosmosSourceTaskConfig taskConfig; + private CosmosAsyncClient cosmosClient; + private Queue taskUnitsQueue = new LinkedList<>(); + + @Override + public String version() { + return CosmosConstants.CURRENT_VERSION; + } + + @Override + public void start(Map map) { + LOGGER.info("Starting the kafka cosmos source task..."); + + this.taskConfig = new CosmosSourceTaskConfig(map); + if (this.taskConfig.getMetadataTaskUnit() != null) { + // adding metadata task units into the head of the queue + this.taskUnitsQueue.add(this.taskConfig.getMetadataTaskUnit()); + } + + this.taskUnitsQueue.addAll(this.taskConfig.getFeedRangeTaskUnits()); + LOGGER.info("Creating the cosmos client"); + + // TODO[GA]: optimize the client creation, client metadata cache? + this.cosmosClient = CosmosClientStore.getCosmosClient(this.taskConfig.getAccountConfig()); + } + + @Override + public List poll() { + // do not poll it from the queue yet + // we need to make sure not losing tasks for failure cases + ITaskUnit taskUnit = this.taskUnitsQueue.poll(); + try { + if (taskUnit == null) { + // there is no task to do + return new ArrayList<>(); + } + + List results = new ArrayList<>(); + if (taskUnit instanceof MetadataTaskUnit) { + results.addAll(executeMetadataTask((MetadataTaskUnit) taskUnit)); + LOGGER.info( + "Return {} metadata records, databaseName {}", results.size(), ((MetadataTaskUnit) taskUnit).getDatabaseName()); + + } else { + Stopwatch stopwatch = Stopwatch.createStarted(); + + LOGGER.trace("Polling for task {}", taskUnit); + Pair, Boolean> feedRangeTaskResults = executeFeedRangeTask((FeedRangeTaskUnit) taskUnit); + results.addAll(feedRangeTaskResults.getLeft()); + + // for split, new feedRangeTaskUnit will be created, so we do not need to add the original taskUnit back to the queue + if (!feedRangeTaskResults.getRight()) { + LOGGER.trace("Adding task {} back to queue", taskUnit); + this.taskUnitsQueue.add(taskUnit); + } + + stopwatch.stop(); + LOGGER.debug( + "Return {} records, databaseName {}, containerName {}, containerRid {}, feedRange {}, durationInMs {}", + results.size(), + ((FeedRangeTaskUnit) taskUnit).getDatabaseName(), + ((FeedRangeTaskUnit) taskUnit).getContainerName(), + ((FeedRangeTaskUnit) taskUnit).getContainerRid(), + ((FeedRangeTaskUnit) taskUnit).getFeedRange(), + stopwatch.elapsed().toMillis() + ); + } + return results; + } catch (Exception e) { + // for error cases, we should always the task back to the queue + this.taskUnitsQueue.add(taskUnit); + + // TODO[Public Preview]: add checking for max retries checking + throw CosmosExceptionsHelper.convertToConnectException(e, "PollTask failed"); + } + } + + private List executeMetadataTask(MetadataTaskUnit taskUnit) { + List sourceRecords = new ArrayList<>(); + + // add the containers metadata record - it track the databaseName -> List[containerRid] mapping + ContainersMetadataTopicPartition metadataTopicPartition = + new ContainersMetadataTopicPartition(taskUnit.getDatabaseName()); + ContainersMetadataTopicOffset metadataTopicOffset = + new ContainersMetadataTopicOffset(taskUnit.getContainerRids()); + + sourceRecords.add( + new SourceRecord( + ContainersMetadataTopicPartition.toMap(metadataTopicPartition), + ContainersMetadataTopicOffset.toMap(metadataTopicOffset), + taskUnit.getTopic(), + SchemaAndValue.NULL.schema(), + SchemaAndValue.NULL.value())); + + // add the container feedRanges metadata record - it tracks the containerRid -> List[FeedRange] mapping + for (String containerRid : taskUnit.getContainersEffectiveRangesMap().keySet()) { + FeedRangesMetadataTopicPartition feedRangesMetadataTopicPartition = + new FeedRangesMetadataTopicPartition(taskUnit.getDatabaseName(), containerRid); + FeedRangesMetadataTopicOffset feedRangesMetadataTopicOffset = + new FeedRangesMetadataTopicOffset(taskUnit.getContainersEffectiveRangesMap().get(containerRid)); + + sourceRecords.add( + new SourceRecord( + FeedRangesMetadataTopicPartition.toMap(feedRangesMetadataTopicPartition), + FeedRangesMetadataTopicOffset.toMap(feedRangesMetadataTopicOffset), + taskUnit.getTopic(), + SchemaAndValue.NULL.schema(), + SchemaAndValue.NULL.value())); + } + + LOGGER.info("There are {} metadata records being created/updated", sourceRecords.size()); + return sourceRecords; + } + + private Pair, Boolean> executeFeedRangeTask(FeedRangeTaskUnit feedRangeTaskUnit) { + // each time we will only pull one page + CosmosChangeFeedRequestOptions changeFeedRequestOptions = + this.getChangeFeedRequestOptions(feedRangeTaskUnit); + + // split/merge will be handled in source task + ModelBridgeInternal.getChangeFeedIsSplitHandlingDisabled(changeFeedRequestOptions); + + CosmosAsyncContainer container = + this.cosmosClient + .getDatabase(feedRangeTaskUnit.getDatabaseName()) + .getContainer(feedRangeTaskUnit.getContainerName()); + + return container.queryChangeFeed(changeFeedRequestOptions, JsonNode.class) + .byPage(this.taskConfig.getChangeFeedConfig().getMaxItemCountHint()) + .next() + .map(feedResponse -> { + List records = handleSuccessfulResponse(feedResponse, feedRangeTaskUnit); + return Pair.of(records, false); + }) + .onErrorResume(throwable -> { + if (CosmosExceptionsHelper.isFeedRangeGoneException(throwable)) { + return this.handleFeedRangeGone(feedRangeTaskUnit) + .map(shouldRemoveOriginalTaskUnit -> Pair.of(new ArrayList<>(), shouldRemoveOriginalTaskUnit)); + } + + return Mono.error(throwable); + }) + .block(); + } + + private List handleSuccessfulResponse( + FeedResponse feedResponse, + FeedRangeTaskUnit feedRangeTaskUnit) { + + List sourceRecords = new ArrayList<>(); + for (JsonNode item : feedResponse.getResults()) { + FeedRangeContinuationTopicPartition feedRangeContinuationTopicPartition = + new FeedRangeContinuationTopicPartition( + feedRangeTaskUnit.getDatabaseName(), + feedRangeTaskUnit.getContainerRid(), + feedRangeTaskUnit.getFeedRange()); + FeedRangeContinuationTopicOffset feedRangeContinuationTopicOffset = + new FeedRangeContinuationTopicOffset( + feedResponse.getContinuationToken(), + getItemLsn(item)); + + // Set the Kafka message key if option is enabled and field is configured in document + String messageKey = this.getMessageKey(item); + + // Convert JSON to Kafka Connect struct and JSON schema + SchemaAndValue schemaAndValue = JsonToStruct.recordToSchemaAndValue(item); + + sourceRecords.add( + new SourceRecord( + FeedRangeContinuationTopicPartition.toMap(feedRangeContinuationTopicPartition), + FeedRangeContinuationTopicOffset.toMap(feedRangeContinuationTopicOffset), + feedRangeTaskUnit.getTopic(), + Schema.STRING_SCHEMA, + messageKey, + schemaAndValue.schema(), + schemaAndValue.value())); + } + + // Important: track the continuationToken + feedRangeTaskUnit.setContinuationState( + new KafkaCosmosChangeFeedState(feedResponse.getContinuationToken(), feedRangeTaskUnit.getFeedRange())); + return sourceRecords; + } + + private Mono handleFeedRangeGone(FeedRangeTaskUnit feedRangeTaskUnit) { + // need to find out whether it is split or merge + CosmosAsyncContainer container = + this.cosmosClient + .getDatabase(feedRangeTaskUnit.getDatabaseName()) + .getContainer(feedRangeTaskUnit.getContainerName()); + + return ImplementationBridgeHelpers + .CosmosAsyncContainerHelper + .getCosmosAsyncContainerAccessor() + .getOverlappingFeedRanges(container, feedRangeTaskUnit.getFeedRange()) + .flatMap(overlappedRanges -> { + + if (overlappedRanges.size() == 1) { + // merge happens + LOGGER.info( + "FeedRange {} is merged into {}, but we will continue polling data from feedRange {}", + feedRangeTaskUnit.getFeedRange(), + overlappedRanges.get(0).toString(), + feedRangeTaskUnit.getFeedRange()); + + // Continue using polling data from the current task unit feedRange + return Mono.just(false); + } else { + LOGGER.info( + "FeedRange {} is split into {}. Will create new task units. ", + feedRangeTaskUnit.getFeedRange(), + overlappedRanges.stream().map(FeedRange::toString).collect(Collectors.toList()) + ); + + for (FeedRange pkRange : overlappedRanges) { + FeedRangeTaskUnit childTaskUnit = + new FeedRangeTaskUnit( + feedRangeTaskUnit.getDatabaseName(), + feedRangeTaskUnit.getContainerName(), + feedRangeTaskUnit.getContainerRid(), + pkRange, + feedRangeTaskUnit.getContinuationState(), + feedRangeTaskUnit.getTopic()); + this.taskUnitsQueue.add(childTaskUnit); + } + + // remove the current task unit from the queue + return Mono.just(true); + } + }); + } + + private String getItemLsn(JsonNode item) { + return item.get(LSN_ATTRIBUTE_NAME).asText(); + } + + private String getMessageKey(JsonNode item) { + String messageKey = ""; + if (this.taskConfig.getMessageKeyConfig().isMessageKeyEnabled()) { + JsonNode messageKeyFieldNode = item.get(this.taskConfig.getMessageKeyConfig().getMessageKeyField()); + if (messageKeyFieldNode != null) { + messageKey = messageKeyFieldNode.asText(); + } + } + + return messageKey; + } + + private CosmosChangeFeedRequestOptions getChangeFeedRequestOptions(FeedRangeTaskUnit feedRangeTaskUnit) { + CosmosChangeFeedRequestOptions changeFeedRequestOptions = null; + FeedRange changeFeedRange = feedRangeTaskUnit.getFeedRange(); + if (feedRangeTaskUnit.getContinuationState() == null) { + switch (this.taskConfig.getChangeFeedConfig().getChangeFeedStartFromModes()) { + case BEGINNING: + changeFeedRequestOptions = + CosmosChangeFeedRequestOptions.createForProcessingFromBeginning(changeFeedRange); + break; + case NOW: + changeFeedRequestOptions = + CosmosChangeFeedRequestOptions.createForProcessingFromNow(changeFeedRange); + break; + case POINT_IN_TIME: + changeFeedRequestOptions = + CosmosChangeFeedRequestOptions + .createForProcessingFromPointInTime( + this.taskConfig.getChangeFeedConfig().getStartFrom(), + changeFeedRange); + break; + default: + throw new IllegalArgumentException(feedRangeTaskUnit.getContinuationState() + " is not supported"); + } + + if (this.taskConfig.getChangeFeedConfig().getChangeFeedModes() == CosmosChangeFeedModes.ALL_VERSION_AND_DELETES) { + changeFeedRequestOptions.allVersionsAndDeletes(); + } + } else { + KafkaCosmosChangeFeedState kafkaCosmosChangeFeedState = feedRangeTaskUnit.getContinuationState(); + + changeFeedRequestOptions = + ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper + .getCosmosChangeFeedRequestOptionsAccessor() + .createForProcessingFromContinuation( + kafkaCosmosChangeFeedState.getResponseContinuation(), + kafkaCosmosChangeFeedState.getTargetRange(), + kafkaCosmosChangeFeedState.getItemLsn()); + } + + return changeFeedRequestOptions; + } + + @Override + public void stop() { + if (this.cosmosClient != null) { + this.cosmosClient.close(); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTaskConfig.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTaskConfig.java new file mode 100644 index 000000000000..147bb61814e9 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTaskConfig.java @@ -0,0 +1,139 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.kafka.common.config.ConfigDef; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class CosmosSourceTaskConfig extends CosmosSourceConfig { + private static final ObjectMapper OBJECT_MAPPER = Utils.getSimpleObjectMapper(); + private static final String SOURCE_TASK_CONFIG_PREFIX = "kafka.connect.cosmos.source.task."; + + public static final String SOURCE_METADATA_TASK_UNIT = SOURCE_TASK_CONFIG_PREFIX + "metadataTaskUnit"; + public static final String SOURCE_FEED_RANGE_TASK_UNITS = SOURCE_TASK_CONFIG_PREFIX + "feedRangeTaskUnits"; + + private final List feedRangeTaskUnits; + private MetadataTaskUnit metadataTaskUnit; + + public CosmosSourceTaskConfig(Map parsedConfigs) { + super(getConfigDef(), parsedConfigs); + + this.feedRangeTaskUnits = this.parseFeedRangeTaskUnits(); + this.metadataTaskUnit = this.parseMetadataTaskUnit(); + } + + public static ConfigDef getConfigDef() { + ConfigDef configDef = CosmosSourceConfig.getConfigDef(); + defineTaskUnitsConfig(configDef); + + return configDef; + } + + private static void defineTaskUnitsConfig(ConfigDef result) { + result + .defineInternal( + SOURCE_FEED_RANGE_TASK_UNITS, + ConfigDef.Type.STRING, + ConfigDef.NO_DEFAULT_VALUE, + ConfigDef.Importance.HIGH + ) + .defineInternal( + SOURCE_METADATA_TASK_UNIT, + ConfigDef.Type.STRING, + null, + ConfigDef.Importance.HIGH + ); + } + + private List parseFeedRangeTaskUnits() { + String feedRangesTaskUnitsConfig = this.getString(SOURCE_FEED_RANGE_TASK_UNITS); + + try { + if (!StringUtils.isEmpty(feedRangesTaskUnitsConfig)) { + return OBJECT_MAPPER + .readValue(feedRangesTaskUnitsConfig, new TypeReference>() {}) + .stream() + .map(taskUnitConfigJson -> { + try { + return OBJECT_MAPPER.readValue(taskUnitConfigJson, FeedRangeTaskUnit.class); + } catch (JsonProcessingException e) { + throw new IllegalArgumentException("Failed to parseFeedRangeTaskUnit[" + taskUnitConfigJson + "]", e); + } + }) + .collect(Collectors.toList()); + } + + return new ArrayList<>(); + } catch (JsonProcessingException e) { + throw new IllegalArgumentException("Failed to parseFeedRangeTaskUnits[" + feedRangesTaskUnitsConfig + "]", e); + } + + } + + private MetadataTaskUnit parseMetadataTaskUnit() { + String metadataTaskUnitConfig = this.getString(SOURCE_METADATA_TASK_UNIT); + if (!StringUtils.isEmpty(metadataTaskUnitConfig)) { + try { + return OBJECT_MAPPER.readValue(metadataTaskUnitConfig, MetadataTaskUnit.class); + } catch (JsonProcessingException e) { + throw new IllegalArgumentException("Failed to parseMetadataTaskUnit[" + metadataTaskUnitConfig + "]", e); + } + } + + return null; + } + + public static Map getFeedRangeTaskUnitsConfigMap(List feedRangeTaskUnits) { + try { + Map taskConfigMap = new HashMap<>(); + taskConfigMap.put( + SOURCE_FEED_RANGE_TASK_UNITS, + OBJECT_MAPPER.writeValueAsString( + feedRangeTaskUnits + .stream() + .map(taskUnit -> { + try { + return OBJECT_MAPPER.writeValueAsString(taskUnit); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }) + .collect(Collectors.toList()) + )); + return taskConfigMap; + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + public static Map getMetadataTaskUnitConfigMap(MetadataTaskUnit metadataTaskUnit) { + try { + Map taskConfigMap = new HashMap<>(); + if (metadataTaskUnit != null) { + taskConfigMap.put(SOURCE_METADATA_TASK_UNIT, OBJECT_MAPPER.writeValueAsString(metadataTaskUnit)); + } + return taskConfigMap; + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + public List getFeedRangeTaskUnits() { + return feedRangeTaskUnits; + } + + public MetadataTaskUnit getMetadataTaskUnit() { + return metadataTaskUnit; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeContinuationTopicOffset.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeContinuationTopicOffset.java new file mode 100644 index 000000000000..6f28fd42128c --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeContinuationTopicOffset.java @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; + +import java.util.HashMap; +import java.util.Map; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; + +public class FeedRangeContinuationTopicOffset { + private static final String ITEM_LSN_KEY = "cosmos.source.feedRange.item.lsn"; + private static final String CONTINUATION_KEY = "cosmos.source.feedRange.responseContinuation"; + + private final String responseContinuation; + private final String itemLsn; + + public FeedRangeContinuationTopicOffset( + String responseContinuation, + String itemLsn) { + checkArgument(StringUtils.isNotEmpty(responseContinuation), "Argument 'responseContinuation' should not be null"); + checkArgument(StringUtils.isNotEmpty(itemLsn), "Argument 'itemLsn' should not be null"); + + this.itemLsn = itemLsn; + this.responseContinuation = responseContinuation; + } + + public String getResponseContinuation() { + return responseContinuation; + } + + public String getItemLsn() { + return itemLsn; + } + + public static Map toMap(FeedRangeContinuationTopicOffset offset) { + Map map = new HashMap<>(); + map.put(CONTINUATION_KEY, offset.getResponseContinuation()); + map.put(ITEM_LSN_KEY, offset.getItemLsn()); + + return map; + } + + public static FeedRangeContinuationTopicOffset fromMap(Map offsetMap) { + if (offsetMap == null) { + return null; + } + + String continuationState = offsetMap.get(CONTINUATION_KEY).toString(); + String itemLsn = offsetMap.get(ITEM_LSN_KEY).toString(); + return new FeedRangeContinuationTopicOffset(continuationState, itemLsn); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeContinuationTopicPartition.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeContinuationTopicPartition.java new file mode 100644 index 000000000000..9734eac7d732 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeContinuationTopicPartition.java @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.models.FeedRange; + +import java.util.HashMap; +import java.util.Map; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +public class FeedRangeContinuationTopicPartition { + private static final String DATABASE_NAME_KEY = "cosmos.source.database.name"; + public static final String CONTAINER_RESOURCE_ID_KEY = "cosmos.source.container.resourceId"; + private static final String CONTAINER_FEED_RANGE_KEY = "cosmos.source.feedRange"; + + private final String databaseName; + private final String containerRid; + private final FeedRange feedRange; + + public FeedRangeContinuationTopicPartition( + String databaseName, + String containerRid, + FeedRange feedRange) { + checkArgument(StringUtils.isNotEmpty(databaseName), "Argument 'databaseName' should not be null"); + checkArgument(StringUtils.isNotEmpty(containerRid), "Argument 'containerRid' should not be null"); + checkNotNull(feedRange, "Argument 'feedRange' can not be null"); + + this.databaseName = databaseName; + this.containerRid = containerRid; + this.feedRange = feedRange; + } + + public String getDatabaseName() { + return databaseName; + } + + public String getContainerRid() { + return containerRid; + } + + public FeedRange getFeedRange() { + return feedRange; + } + + public static Map toMap(FeedRangeContinuationTopicPartition partition) { + Map map = new HashMap<>(); + map.put(DATABASE_NAME_KEY, partition.getDatabaseName()); + map.put(CONTAINER_RESOURCE_ID_KEY, partition.getContainerRid()); + map.put(CONTAINER_FEED_RANGE_KEY, partition.getFeedRange().toString()); + + return map; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeTaskUnit.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeTaskUnit.java new file mode 100644 index 000000000000..1a7fdcd9d1db --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangeTaskUnit.java @@ -0,0 +1,167 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.models.FeedRange; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; + +import java.io.IOException; +import java.util.Objects; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +@JsonSerialize(using = FeedRangeTaskUnit.FeedRangeTaskUnitSerializer.class) +@JsonDeserialize(using = FeedRangeTaskUnit.FeedRangeTaskUnitDeserializer.class) +public class FeedRangeTaskUnit implements ITaskUnit { + private String databaseName; + private String containerName; + private String containerRid; + private FeedRange feedRange; + private KafkaCosmosChangeFeedState continuationState; + private String topic; + + public FeedRangeTaskUnit( + String databaseName, + String containerName, + String containerRid, + FeedRange feedRange, + KafkaCosmosChangeFeedState continuationState, + String topic) { + + checkArgument(StringUtils.isNotEmpty(databaseName), "Argument 'databaseName' should not be null"); + checkArgument(StringUtils.isNotEmpty(containerName), "Argument 'containerName' should not be null"); + checkArgument(StringUtils.isNotEmpty(containerRid), "Argument 'containerRid' should not be null"); + checkNotNull(feedRange, "Argument 'feedRange' can not be null"); + checkArgument(StringUtils.isNotEmpty(topic), "Argument 'topic' should not be null"); + + this.databaseName = databaseName; + this.containerName = containerName; + this.containerRid = containerRid; + this.feedRange = feedRange; + this.continuationState = continuationState; + this.topic = topic; + } + + public String getDatabaseName() { + return databaseName; + } + + public String getContainerName() { + return containerName; + } + + public String getContainerRid() { + return containerRid; + } + + public FeedRange getFeedRange() { + return feedRange; + } + + public KafkaCosmosChangeFeedState getContinuationState() { + return continuationState; + } + + public void setContinuationState(KafkaCosmosChangeFeedState continuationState) { + this.continuationState = continuationState; + } + + public String getTopic() { + return topic; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + FeedRangeTaskUnit that = (FeedRangeTaskUnit) o; + return Objects.equals(databaseName, that.databaseName) + && Objects.equals(containerName, that.containerName) + && Objects.equals(containerRid, that.containerRid) + && Objects.equals(feedRange, that.feedRange) + && Objects.equals(continuationState, that.continuationState) + && Objects.equals(topic, that.topic); + } + + @Override + public int hashCode() { + return Objects.hash(databaseName, containerName, containerRid, feedRange, continuationState, topic); + } + + @Override + public String toString() { + return "FeedRangeTaskUnit{" + + "databaseName='" + databaseName + '\'' + + ", containerName='" + containerName + '\'' + + ", containerRid='" + containerRid + '\'' + + ", feedRange=" + feedRange + + ", continuationState=" + continuationState + + ", topic='" + topic + '\'' + + '}'; + } + + public static class FeedRangeTaskUnitSerializer extends com.fasterxml.jackson.databind.JsonSerializer { + @Override + public void serialize(FeedRangeTaskUnit feedRangeTaskUnit, + JsonGenerator writer, + SerializerProvider serializerProvider) throws IOException { + writer.writeStartObject(); + writer.writeStringField("databaseName", feedRangeTaskUnit.getDatabaseName()); + writer.writeStringField("containerName", feedRangeTaskUnit.getContainerName()); + writer.writeStringField("containerRid", feedRangeTaskUnit.getContainerRid()); + writer.writeStringField("feedRange", feedRangeTaskUnit.getFeedRange().toString()); + if (feedRangeTaskUnit.getContinuationState() != null) { + writer.writeStringField( + "continuationState", + Utils.getSimpleObjectMapper().writeValueAsString(feedRangeTaskUnit.getContinuationState())); + } + writer.writeStringField("topic", feedRangeTaskUnit.getTopic()); + writer.writeEndObject(); + } + } + + static class FeedRangeTaskUnitDeserializer extends StdDeserializer { + FeedRangeTaskUnitDeserializer() { + super(FeedRangeTaskUnit.class); + } + + @Override + public FeedRangeTaskUnit deserialize( + JsonParser jsonParser, + DeserializationContext deserializationContext) throws IOException { + + final JsonNode rootNode = jsonParser.getCodec().readTree(jsonParser); + String databaseName = rootNode.get("databaseName").asText(); + String containerName = rootNode.get("containerName").asText(); + String containerRid = rootNode.get("containerRid").asText(); + FeedRange feedRange = FeedRange.fromString(rootNode.get("feedRange").asText()); + KafkaCosmosChangeFeedState continuationState = null; + if (rootNode.has("continuationState")) { + continuationState = + Utils.getSimpleObjectMapper() + .readValue(rootNode.get("continuationState").asText(), KafkaCosmosChangeFeedState.class); + } + + String topic = rootNode.get("topic").asText(); + + return new FeedRangeTaskUnit(databaseName, containerName, containerRid, feedRange, continuationState, topic); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangesMetadataTopicOffset.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangesMetadataTopicOffset.java new file mode 100644 index 000000000000..2ee85864335f --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangesMetadataTopicOffset.java @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.models.FeedRange; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +public class FeedRangesMetadataTopicOffset { + public static final String CONTAINER_FEED_RANGES_KEY = "cosmos.source.metadata.container.feedRanges"; + public static final ObjectMapper OBJECT_MAPPER = Utils.getSimpleObjectMapper(); + + private final List feedRanges; + + public FeedRangesMetadataTopicOffset(List feedRanges) { + checkNotNull(feedRanges, "Argument 'feedRanges' can not be null"); + + this.feedRanges = feedRanges; + } + + public List getFeedRanges() { + return feedRanges; + } + + public static Map toMap(FeedRangesMetadataTopicOffset offset) { + try { + Map map = new HashMap<>(); + + // offset can only contain primitive types + map.put( + CONTAINER_FEED_RANGES_KEY, + OBJECT_MAPPER + .writeValueAsString( + offset.getFeedRanges().stream().map(range -> range.toString()).collect(Collectors.toList()))); + + return map; + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + public static FeedRangesMetadataTopicOffset fromMap(Map offsetMap) { + if (offsetMap == null) { + return null; + } + + String feedRangesValue = offsetMap.get(CONTAINER_FEED_RANGES_KEY).toString(); + try { + List feedRanges = + OBJECT_MAPPER + .readValue(feedRangesValue, new TypeReference>() {}) + .stream() + .map(rangeJson -> FeedRange.fromString(rangeJson)) + .collect(Collectors.toList()); + + return new FeedRangesMetadataTopicOffset(feedRanges); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangesMetadataTopicPartition.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangesMetadataTopicPartition.java new file mode 100644 index 000000000000..03c0c50e8c7e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/FeedRangesMetadataTopicPartition.java @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; + +import java.util.HashMap; +import java.util.Map; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; + +public class FeedRangesMetadataTopicPartition { + public static final String DATABASE_NAME_KEY = "cosmos.source.metadata.database.name"; + public static final String CONTAINER_RESOURCE_ID_KEY = "cosmos.source.metadata.container.resourceId"; + private final String databaseName; + private final String containerRid; + + public FeedRangesMetadataTopicPartition(String databaseName, String containerRid) { + checkArgument(StringUtils.isNotEmpty(databaseName), "Argument 'databaseName' should not be null"); + checkArgument(StringUtils.isNotEmpty(containerRid), "Argument 'containerRid' should not be null"); + + this.databaseName = databaseName; + this.containerRid = containerRid; + } + + public String getDatabaseName() { + return databaseName; + } + + public String getContainerRid() { + return containerRid; + } + + public static Map toMap(FeedRangesMetadataTopicPartition partition) { + Map map = new HashMap<>(); + map.put(DATABASE_NAME_KEY, partition.getDatabaseName()); + map.put(CONTAINER_RESOURCE_ID_KEY, partition.getContainerRid()); + return map; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ITaskUnit.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ITaskUnit.java new file mode 100644 index 000000000000..2c7f6efdff16 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/ITaskUnit.java @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +public interface ITaskUnit { +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/JsonToStruct.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/JsonToStruct.java new file mode 100644 index 000000000000..c73af26327b5 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/JsonToStruct.java @@ -0,0 +1,207 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaAndValue; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.data.Struct; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static java.lang.String.format; +import static org.apache.kafka.connect.data.Values.convertToByte; +import static org.apache.kafka.connect.data.Values.convertToDouble; +import static org.apache.kafka.connect.data.Values.convertToFloat; +import static org.apache.kafka.connect.data.Values.convertToInteger; +import static org.apache.kafka.connect.data.Values.convertToLong; +import static org.apache.kafka.connect.data.Values.convertToShort; + +public class JsonToStruct { + private static final Logger LOGGER = LoggerFactory.getLogger(JsonToStruct.class); + private static final String SCHEMA_NAME_TEMPLATE = "inferred_name_%s"; + + public static SchemaAndValue recordToSchemaAndValue(final JsonNode node) { + Schema nodeSchema = inferSchema(node); + Struct struct = new Struct(nodeSchema); + + if (nodeSchema != null) { + nodeSchema.fields().forEach(field -> { + JsonNode fieldValue = node.get(field.name()); + if (fieldValue != null) { + SchemaAndValue schemaAndValue = toSchemaAndValue(field.schema(), fieldValue); + struct.put(field, schemaAndValue.value()); + } else { + boolean optionalField = field.schema().isOptional(); + Object defaultValue = field.schema().defaultValue(); + if (optionalField || defaultValue != null) { + struct.put(field, defaultValue); + } else { + LOGGER.error("Missing value for field {}", field.name()); + } + } + }); + } + return new SchemaAndValue(nodeSchema, struct); + } + + private static Schema inferSchema(JsonNode jsonNode) { + switch (jsonNode.getNodeType()) { + case NULL: + return Schema.OPTIONAL_STRING_SCHEMA; + case BOOLEAN: + return Schema.BOOLEAN_SCHEMA; + case NUMBER: + if (jsonNode.isIntegralNumber()) { + return Schema.INT64_SCHEMA; + } else { + return Schema.FLOAT64_SCHEMA; + } + case ARRAY: + List jsonValues = new ArrayList<>(); + SchemaBuilder arrayBuilder; + jsonNode.forEach(jn -> jsonValues.add(jn)); + + Schema firstItemSchema = jsonValues.isEmpty() ? Schema.OPTIONAL_STRING_SCHEMA + : inferSchema(jsonValues.get(0)); + if (jsonValues.isEmpty() || jsonValues.stream() + .anyMatch(jv -> !Objects.equals(inferSchema(jv), firstItemSchema))) { + // If array is emtpy or it contains elements with different schema types + arrayBuilder = SchemaBuilder.array(Schema.OPTIONAL_STRING_SCHEMA); + arrayBuilder.name(generateName(arrayBuilder)); + return arrayBuilder.optional().build(); + } + arrayBuilder = SchemaBuilder.array(inferSchema(jsonValues.get(0))); + arrayBuilder.name(generateName(arrayBuilder)); + return arrayBuilder.optional().build(); + case OBJECT: + SchemaBuilder structBuilder = SchemaBuilder.struct(); + Iterator> it = jsonNode.fields(); + while (it.hasNext()) { + Map.Entry entry = it.next(); + structBuilder.field(entry.getKey(), inferSchema(entry.getValue())); + } + structBuilder.name(generateName(structBuilder)); + return structBuilder.build(); + case STRING: + return Schema.STRING_SCHEMA; + // TODO[GA]: do we need to support binary/pojo? + case BINARY: + case MISSING: + case POJO: + default: + return null; + } + } + + // Generate Unique Schema Name + private static String generateName(final SchemaBuilder builder) { + return format(SCHEMA_NAME_TEMPLATE, Objects.hashCode(builder.build())).replace("-", "_"); + } + + private static SchemaAndValue toSchemaAndValue(final Schema schema, final JsonNode node) { + SchemaAndValue schemaAndValue = new SchemaAndValue(schema, node); + if (schema.isOptional() && node.isNull()) { + return new SchemaAndValue(schema, null); + } + switch (schema.type()) { + case INT8: + case INT16: + case INT32: + case INT64: + case FLOAT32: + case FLOAT64: + schemaAndValue = numberToSchemaAndValue(schema, node); + break; + case BOOLEAN: + schemaAndValue = new SchemaAndValue(schema, node.asBoolean()); + break; + case STRING: + schemaAndValue = stringToSchemaAndValue(schema, node); + break; + case BYTES: + schemaAndValue = new SchemaAndValue(schema, node); + break; + case ARRAY: + schemaAndValue = arrayToSchemaAndValue(schema, node); + break; + case MAP: + schemaAndValue = new SchemaAndValue(schema, node); + break; + case STRUCT: + schemaAndValue = recordToSchemaAndValue(node); + break; + default: + LOGGER.error("Unsupported Schema type: {}", schema.type()); + } + return schemaAndValue; + } + + private static SchemaAndValue stringToSchemaAndValue(final Schema schema, final JsonNode nodeValue) { + String value; + if (nodeValue.isTextual()) { + value = nodeValue.asText(); + } else { + value = nodeValue.toString(); + } + return new SchemaAndValue(schema, value); + } + + private static SchemaAndValue arrayToSchemaAndValue(final Schema schema, final JsonNode nodeValue) { + if (!nodeValue.isArray()) { + LOGGER.error("Unexpected array value for schema {}", schema); + } + List values = new ArrayList<>(); + nodeValue.forEach(v -> + values.add(toSchemaAndValue(schema.valueSchema(), v).value()) + ); + return new SchemaAndValue(schema, values); + } + + private static SchemaAndValue numberToSchemaAndValue(final Schema schema, final JsonNode nodeValue) { + Object value = null; + if (nodeValue.isNumber()) { + if (nodeValue.isInt()) { + value = nodeValue.intValue(); + } else if (nodeValue.isDouble()) { + value = nodeValue.doubleValue(); + } else if (nodeValue.isLong()) { + value = nodeValue.longValue(); + } + } else { + LOGGER.error("Unexpected value for schema {}", schema); + } + + switch (schema.type()) { + case INT8: + value = convertToByte(schema, value); + break; + case INT16: + value = convertToShort(schema, value); + break; + case INT32: + value = convertToInteger(schema, value); + break; + case INT64: + value = convertToLong(schema, value); + break; + case FLOAT32: + value = convertToFloat(schema, value); + break; + case FLOAT64: + value = convertToDouble(schema, value); + break; + default: + LOGGER.error("Unsupported Schema type: {}", schema.type()); + } + return new SchemaAndValue(schema, value); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/KafkaCosmosChangeFeedState.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/KafkaCosmosChangeFeedState.java new file mode 100644 index 000000000000..e5e0fe89a8a1 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/KafkaCosmosChangeFeedState.java @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.models.FeedRange; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; + +import java.io.IOException; +import java.util.Objects; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +@JsonSerialize(using = KafkaCosmosChangeFeedState.KafkaCosmosChangeFeedStateSerializer.class) +@JsonDeserialize(using = KafkaCosmosChangeFeedState.KafkaCosmosChangeFeedStateDeserializer.class) +public class KafkaCosmosChangeFeedState { + private final String responseContinuation; + private final FeedRange targetRange; + private final String itemLsn; + + public KafkaCosmosChangeFeedState(String responseContinuation, FeedRange targetRange) { + this(responseContinuation, targetRange, null); + } + + public KafkaCosmosChangeFeedState(String responseContinuation, FeedRange targetRange, String itemLsn) { + checkArgument(StringUtils.isNotEmpty(responseContinuation), "Argument 'responseContinuation' can not be null nor empty"); + checkNotNull(targetRange, "Argument 'targetRange' can not be null"); + + this.responseContinuation = responseContinuation; + this.targetRange = targetRange; + this.itemLsn = itemLsn; + } + + public String getResponseContinuation() { + return responseContinuation; + } + + public FeedRange getTargetRange() { + return targetRange; + } + + public String getItemLsn() { + return itemLsn; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + KafkaCosmosChangeFeedState that = (KafkaCosmosChangeFeedState) o; + return Objects.equals(responseContinuation, that.responseContinuation) + && Objects.equals(targetRange, that.targetRange) + && Objects.equals(itemLsn, that.itemLsn); + } + + @Override + public String toString() { + return "KafkaCosmosChangeFeedState{" + + "responseContinuation='" + responseContinuation + '\'' + + ", targetRange=" + targetRange + + ", itemLsn='" + itemLsn + '\'' + + '}'; + } + + @Override + public int hashCode() { + return Objects.hash(responseContinuation, targetRange, itemLsn); + } + + public static class KafkaCosmosChangeFeedStateSerializer extends com.fasterxml.jackson.databind.JsonSerializer { + @Override + public void serialize(KafkaCosmosChangeFeedState kafkaCosmosChangeFeedState, + JsonGenerator writer, + SerializerProvider serializerProvider) throws IOException { + writer.writeStartObject(); + writer.writeStringField("responseContinuation", kafkaCosmosChangeFeedState.getResponseContinuation()); + writer.writeStringField("targetRange", kafkaCosmosChangeFeedState.getTargetRange().toString()); + writer.writeStringField("itemLsn", kafkaCosmosChangeFeedState.getItemLsn()); + writer.writeEndObject(); + } + } + + static class KafkaCosmosChangeFeedStateDeserializer extends StdDeserializer { + KafkaCosmosChangeFeedStateDeserializer() { + super(KafkaCosmosChangeFeedState.class); + } + + @Override + public KafkaCosmosChangeFeedState deserialize( + JsonParser jsonParser, + DeserializationContext deserializationContext) throws IOException { + + final JsonNode rootNode = jsonParser.getCodec().readTree(jsonParser); + String continuationState = rootNode.get("responseContinuation").asText(); + FeedRange targetRange = FeedRange.fromString(rootNode.get("targetRange").asText()); + String continuationLsn = rootNode.get("itemLsn").asText(); + return new KafkaCosmosChangeFeedState(continuationState, targetRange, continuationLsn); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataMonitorThread.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataMonitorThread.java new file mode 100644 index 000000000000..49901acbfbf0 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataMonitorThread.java @@ -0,0 +1,320 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.kafka.connect.implementation.CosmosExceptionsHelper; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.SqlParameter; +import com.azure.cosmos.models.SqlQuerySpec; +import org.apache.kafka.connect.source.SourceConnectorContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; +import reactor.core.scheduler.Scheduler; +import reactor.core.scheduler.Schedulers; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +public class MetadataMonitorThread extends Thread { + private static final Logger LOGGER = LoggerFactory.getLogger(MetadataMonitorThread.class); + + // TODO[Public Preview]: using a threadPool with less threads or single thread + public static final Scheduler CONTAINERS_MONITORING_SCHEDULER = Schedulers.newBoundedElastic( + Schedulers.DEFAULT_BOUNDED_ELASTIC_SIZE, + Schedulers.DEFAULT_BOUNDED_ELASTIC_QUEUESIZE, + "cosmos-source-metadata-monitoring-bounded-elastic", + 60, + true + ); + + private final CosmosSourceContainersConfig sourceContainersConfig; + private final CosmosMetadataConfig metadataConfig; + private final SourceConnectorContext connectorContext; + private final CosmosSourceOffsetStorageReader offsetStorageReader; + private final CosmosAsyncClient cosmosClient; + private final SqlQuerySpec containersQuerySpec; + private final ContainersMetadataTopicPartition containersMetadataTopicPartition; + private final AtomicBoolean isRunning = new AtomicBoolean(true); + + public MetadataMonitorThread( + CosmosSourceContainersConfig containersConfig, + CosmosMetadataConfig metadataConfig, + SourceConnectorContext connectorContext, + CosmosSourceOffsetStorageReader offsetStorageReader, + CosmosAsyncClient cosmosClient) { + + checkNotNull(containersConfig, "Argument 'containersConfig' can not be null"); + checkNotNull(metadataConfig, "Argument 'metadataConfig' can not be null"); + checkNotNull(connectorContext, "Argument 'connectorContext' can not be null"); + checkNotNull(offsetStorageReader, "Argument 'offsetStorageReader' can not be null"); + checkNotNull(cosmosClient, "Argument 'cosmosClient' can not be null"); + + this.sourceContainersConfig = containersConfig; + this.metadataConfig = metadataConfig; + this.connectorContext = connectorContext; + this.offsetStorageReader = offsetStorageReader; + this.cosmosClient = cosmosClient; + this.containersQuerySpec = this.getContainersQuerySpec(); + this.containersMetadataTopicPartition = new ContainersMetadataTopicPartition(containersConfig.getDatabaseName()); + } + + @Override + public void run() { + LOGGER.info("Start containers monitoring task"); + + int containersPollDelayInMs = this.metadataConfig.getMetadataPollDelayInMs(); + if (containersPollDelayInMs >= 0) { + Mono + .delay(Duration.ofMillis(containersPollDelayInMs)) + .flatMap(t -> { + if (this.isRunning.get()) { + LOGGER.trace("ValidateContainersMetadataChange..."); + return shouldRequestTaskReconfiguration(); + } + return Mono.empty(); + }) + .doOnNext(shouldRequestReconfiguration -> { + if (shouldRequestReconfiguration) { + LOGGER.info("Changes detected, requestTaskReconfiguration"); + this.connectorContext.requestTaskReconfiguration(); + } + }) + .onErrorResume(throwable -> { + LOGGER.warn("Containers metadata checking failed. Will retry in next polling cycle", throwable); + // TODO: only allow continue for transient errors, for others raiseError + return Mono.empty(); + }) + .repeat(() -> this.isRunning.get()) + .subscribeOn(CONTAINERS_MONITORING_SCHEDULER) + .subscribe(); + } + + LOGGER.info("Containers monitoring task not started due to negative containers poll delay"); + } + + private Mono shouldRequestTaskReconfiguration() { + // First check any containers to be copied changes + // Container re-created, add or remove will request task reconfiguration + // If there are no changes on the containers, then check for each container any feedRanges change need to request task reconfiguration + if (containersMetadataOffsetExists()) { + return this.getAllContainers() + .flatMap(containersList -> { + if (hasContainersChange(containersList)) { + return Mono.just(true); + } + + return shouldRequestTaskReconfigurationOnFeedRanges(containersList); + }); + } + + // there is no existing containers offset for comparison. + // Could be this is the first time for the connector to start and the metadata task has not been initialized. + // will skip and validate in next cycle. + return Mono.just(false); + } + + public boolean containersMetadataOffsetExists() { + return this.offsetStorageReader.getContainersMetadataOffset(this.sourceContainersConfig.getDatabaseName()) != null; + } + + public Mono> getAllContainers() { + return this.cosmosClient + .getDatabase(this.sourceContainersConfig.getDatabaseName()) + .queryContainers(this.containersQuerySpec) + .byPage() + .flatMapIterable(response -> response.getResults()) + .collectList() + .onErrorMap(throwable -> CosmosExceptionsHelper.convertToConnectException(throwable, "getAllContainers failed.")); + } + + public List getContainerRidsFromOffset() { + ContainersMetadataTopicOffset topicOffset = + this.offsetStorageReader + .getContainersMetadataOffset(this.sourceContainersConfig.getDatabaseName()); + return topicOffset == null ? new ArrayList<>() : topicOffset.getContainerRids(); + } + + private boolean hasContainersChange(List allContainers) { + List containerRidsFromOffset = this.getContainerRidsFromOffset(); + + List containersRidToBeCopied = + allContainers + .stream() + .map(CosmosContainerProperties::getResourceId) + .collect(Collectors.toList()); + + return !(containerRidsFromOffset.size() == containersRidToBeCopied.size() + && containerRidsFromOffset.containsAll(containersRidToBeCopied)); + } + + private Mono shouldRequestTaskReconfigurationOnFeedRanges(List allContainers) { + AtomicBoolean shouldRequestTaskReconfiguration = new AtomicBoolean(false); + AtomicInteger containerIndex = new AtomicInteger(0); + + // loop through containers to check any feedRanges change + return Mono.just(allContainers.get(containerIndex.get())) + .flatMap(containerProperties -> shouldRequestTaskReconfigurationOnFeedRanges(containerProperties)) + .doOnNext(hasChange -> { + shouldRequestTaskReconfiguration.set(hasChange); + containerIndex.incrementAndGet(); + }) + .repeat(() -> !shouldRequestTaskReconfiguration.get() && containerIndex.get() < allContainers.size()) + .then(Mono.defer(() -> Mono.just(shouldRequestTaskReconfiguration.get()))); + } + + private Mono shouldRequestTaskReconfigurationOnFeedRanges(CosmosContainerProperties containerProperties) { + if (feedRangesMetadataOffsetExists(containerProperties)) { + CosmosAsyncContainer container = + this.cosmosClient + .getDatabase(this.sourceContainersConfig.getDatabaseName()) + .getContainer(containerProperties.getId()); + + return container + .getFeedRanges() + .flatMap(feedRanges -> { + FeedRangesMetadataTopicOffset topicOffset = + this.offsetStorageReader + .getFeedRangesMetadataOffset( + this.sourceContainersConfig.getDatabaseName(), + containerProperties.getResourceId()); + + if (topicOffset == null) { + // the container may have recreated + return Mono.just(true); + } + + List differences = + topicOffset + .getFeedRanges() + .stream() + .filter(normalizedFeedRange -> !feedRanges.contains(normalizedFeedRange)) + .collect(Collectors.toList()); + + if (differences.size() == 0) { + // the feedRanges are exact the same + return Mono.just(false); + } + + // There are feedRanges change, but not all changes need to trigger a reconfiguration + // Merge should not trigger task reconfiguration as we will continue pulling the data from the pre-merge feed ranges + // Split should trigger task reconfiguration for load-balancing + return shouldRequestTaskReconfigurationOnFeedRangeChanges(containerProperties, differences); + }); + } + + // there is no existing feedRanges offset for comparison. + // Could be this is the first time for the connector to start and the metadata task has not been initialized. + // will skip and validate in next cycle. + return Mono.just(false); + } + + private boolean feedRangesMetadataOffsetExists(CosmosContainerProperties containerProperties) { + return this.offsetStorageReader + .getFeedRangesMetadataOffset( + this.sourceContainersConfig.getDatabaseName(), + containerProperties.getResourceId()) != null; + } + + private Mono shouldRequestTaskReconfigurationOnFeedRangeChanges( + CosmosContainerProperties containerProperties, + List changes) { + if (changes == null || changes.isEmpty()) { + return Mono.just(false); + } + + AtomicBoolean shouldRequestTaskReconfiguration = new AtomicBoolean(false); + AtomicInteger feedRangeIndex = new AtomicInteger(0); + + return Mono.just(changes.get(feedRangeIndex.get())) + .flatMap(feedRangeChanged -> shouldRequestTaskReconfigurationOnFeedRangeChange(containerProperties, feedRangeChanged)) + .doOnNext(shouldReconfig -> { + shouldRequestTaskReconfiguration.compareAndSet(false, shouldReconfig); + feedRangeIndex.incrementAndGet(); + }) + .repeat(() -> (!shouldRequestTaskReconfiguration.get()) && feedRangeIndex.get() < changes.size()) + .then(Mono.defer(() -> Mono.just(shouldRequestTaskReconfiguration.get()))); + } + + private Mono shouldRequestTaskReconfigurationOnFeedRangeChange( + CosmosContainerProperties containerProperties, + FeedRange feedRangeChanged) { + + CosmosAsyncContainer container = + this.cosmosClient + .getDatabase(this.sourceContainersConfig.getDatabaseName()) + .getContainer(containerProperties.getId()); + + return ImplementationBridgeHelpers + .CosmosAsyncContainerHelper + .getCosmosAsyncContainerAccessor() + .getOverlappingFeedRanges(container, feedRangeChanged) + .map(matchedPkRanges -> { + if (matchedPkRanges.size() == 0) { + LOGGER.warn( + "FeedRang {} on container {} is gone but we failed to find at least one matching pkRange", + feedRangeChanged, + containerProperties.getResourceId()); + + return true; + } + + if (matchedPkRanges.size() == 1) { + LOGGER.info( + "FeedRange {} is merged into {} on container {}", + feedRangeChanged, + matchedPkRanges.get(0).toString(), + containerProperties.getResourceId()); + return false; + } + + LOGGER.info( + "FeedRange {} is split into [{}] on container {}", + feedRangeChanged, + matchedPkRanges.stream().map(FeedRange::toString).collect(Collectors.toList()), + containerProperties.getResourceId() + ); + return true; + }); + } + + private SqlQuerySpec getContainersQuerySpec() { + boolean includeAllContainers = sourceContainersConfig.isIncludeAllContainers(); + if (includeAllContainers) { + return new SqlQuerySpec("SELECT * FROM c"); + } + + StringBuilder queryStringBuilder = new StringBuilder(); + List parameters = new ArrayList<>(); + + queryStringBuilder.append("SELECT * FROM c WHERE c.id IN ( "); + for (int i = 0; i < sourceContainersConfig.getIncludedContainers().size(); i++) { + String idValue = sourceContainersConfig.getIncludedContainers().get(i); + String idParamName = "@param" + i; + + parameters.add(new SqlParameter(idParamName, idValue)); + queryStringBuilder.append(idParamName); + + if (i < sourceContainersConfig.getIncludedContainers().size() - 1) { + queryStringBuilder.append(", "); + } + } + queryStringBuilder.append(" )"); + return new SqlQuerySpec(queryStringBuilder.toString(), parameters); + } + + public void close() { + this.isRunning.set(false); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataTaskUnit.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataTaskUnit.java new file mode 100644 index 000000000000..d070df5878cf --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataTaskUnit.java @@ -0,0 +1,173 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.models.FeedRange; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import com.fasterxml.jackson.databind.node.ArrayNode; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + +@JsonSerialize(using = MetadataTaskUnit.MetadataTaskUnitSerializer.class) +@JsonDeserialize(using = MetadataTaskUnit.MetadataTaskUnitDeserializer.class) +public class MetadataTaskUnit implements ITaskUnit { + private final String databaseName; + private final List containerRids; + private final Map> containersEffectiveRangesMap; + private final String topic; + + public MetadataTaskUnit( + String databaseName, + List containerRids, + Map> containersEffectiveRangesMap, + String topic) { + + checkArgument(StringUtils.isNotEmpty(databaseName), "Argument 'databaseName' should not be null"); + checkNotNull(containerRids, "Argument 'containerRids' can not be null"); + checkNotNull(containersEffectiveRangesMap, "Argument 'containersEffectiveRangesMap' can not be null"); + checkArgument(StringUtils.isNotEmpty(topic), "Argument 'topic' should not be null"); + + this.databaseName = databaseName; + this.containerRids = containerRids; + this.containersEffectiveRangesMap = containersEffectiveRangesMap; + this.topic = topic; + } + + public String getDatabaseName() { + return databaseName; + } + + public List getContainerRids() { + return containerRids; + } + + public Map> getContainersEffectiveRangesMap() { + return containersEffectiveRangesMap; + } + + public String getTopic() { + return topic; + } + + @Override + public String toString() { + return "MetadataTaskUnit{" + + "databaseName='" + + databaseName + + '\'' + + ", containerRids=" + + containerRids + + ", containersEffectiveRangesMap=" + + containersEffectiveRangesMap + + ", topic='" + topic + '\'' + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MetadataTaskUnit that = (MetadataTaskUnit) o; + return databaseName.equals(that.databaseName) + && containerRids.equals(that.containerRids) + && Objects.equals(containersEffectiveRangesMap, that.containersEffectiveRangesMap) + && topic.equals(that.topic); + } + + @Override + public int hashCode() { + return Objects.hash(databaseName, containerRids, containersEffectiveRangesMap, topic); + } + + public static class MetadataTaskUnitSerializer extends com.fasterxml.jackson.databind.JsonSerializer { + @Override + public void serialize(MetadataTaskUnit metadataTaskUnit, + JsonGenerator writer, + SerializerProvider serializerProvider) throws IOException { + ObjectMapper objectMapper = new ObjectMapper(); + writer.writeStartObject(); + writer.writeStringField("databaseName", metadataTaskUnit.getDatabaseName()); + writer.writeStringField( + "containerRids", + objectMapper.writeValueAsString(metadataTaskUnit.getContainerRids())); + + writer.writeArrayFieldStart("containersEffectiveRangesMap"); + for (String containerRid : metadataTaskUnit.getContainersEffectiveRangesMap().keySet()) { + writer.writeStartObject(); + writer.writeStringField("containerRid", containerRid); + writer.writeStringField( + "effectiveFeedRanges", + objectMapper.writeValueAsString( + metadataTaskUnit. + getContainersEffectiveRangesMap(). + get(containerRid) + .stream() + .map(range -> range.toString()) + .collect(Collectors.toList()))); + writer.writeEndObject(); + } + writer.writeEndArray(); + + writer.writeStringField("topic", metadataTaskUnit.getTopic()); + writer.writeEndObject(); + } + } + + static class MetadataTaskUnitDeserializer extends StdDeserializer { + MetadataTaskUnitDeserializer() { + super(MetadataTaskUnit.class); + } + + @Override + public MetadataTaskUnit deserialize( + JsonParser jsonParser, + DeserializationContext deserializationContext) throws IOException { + + final JsonNode rootNode = jsonParser.getCodec().readTree(jsonParser); + final ObjectMapper mapper = (ObjectMapper) jsonParser.getCodec(); + + String databaseName = rootNode.get("databaseName").asText(); + List containerRids = mapper.readValue(rootNode.get("containerRids").asText(), new TypeReference>() {}); + ArrayNode arrayNode = (ArrayNode) rootNode.get("containersEffectiveRangesMap"); + + Map> containersEffectiveRangesMap = new HashMap<>(); + for (JsonNode jsonNode : arrayNode) { + String containerRid = jsonNode.get("containerRid").asText(); + List effectiveRanges = + mapper + .readValue( + jsonNode.get("effectiveFeedRanges").asText(), + new TypeReference>() {}) + .stream().map(rangeJson -> FeedRange.fromString(rangeJson)) + .collect(Collectors.toList()); + containersEffectiveRangesMap.put(containerRid, effectiveRanges); + } + String topic = rootNode.get("topic").asText(); + + return new MetadataTaskUnit(databaseName, containerRids, containersEffectiveRangesMap, topic); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/module-info.java index 0557d9e6bc83..42f1d1d32a4f 100644 --- a/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/module-info.java +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/main/java/module-info.java @@ -6,10 +6,10 @@ requires transitive com.azure.cosmos; requires kafka.clients; - + requires connect.api; + requires com.fasterxml.jackson.module.afterburner; // public API surface area exports com.azure.cosmos.kafka.connect; - uses com.azure.core.util.tracing.Tracer; } diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/connectorPlugins/build.ps1 b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/connectorPlugins/build.ps1 new file mode 100644 index 000000000000..600429c8488a --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/connectorPlugins/build.ps1 @@ -0,0 +1,20 @@ +#!/usr/bin/env pwsh +$ErrorActionPreference='Stop' +cd $PSScriptRoot + +Write-Host "Deleting prior Cosmos DB connectors..." +rm -rf "$PSScriptRoot/src/test/connectorPlugins/connectors" +New-Item -Path "$PSScriptRoot/src/test/connectorPlugins" -ItemType "directory" -Name "connectors" -Force | Out-Null + +Write-Host "Rebuilding Cosmos DB connectors..." +mvn clean package -DskipTests -Dmaven.javadoc.skip +copy target\*-jar-with-dependencies.jar $PSScriptRoot/src/test/connectorPlugins/connectors +cd $PSScriptRoot/src/test/connectorPlugins + +Write-Host "Adding custom Insert UUID SMT" +cd $PSScriptRoot/src/test/connectorPlugins/connectors +git clone https://github.com/confluentinc/kafka-connect-insert-uuid.git insertuuid -q && cd insertuuid +mvn clean package -DskipTests=true +copy target\*.jar $PSScriptRoot/src/test/connectorPlugins/connectors +rm -rf "$PSScriptRoot/src/test/connectorPlugins/connectors/insertuuid" +cd $PSScriptRoot \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/connectorPlugins/build.sh b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/connectorPlugins/build.sh new file mode 100755 index 000000000000..bda724c262d4 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/connectorPlugins/build.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +echo "Deleting prior Cosmos DB connectors..." +rm -rf src/test/connectorPlugins/connectors +mkdir src/test/connectorPlugins/connectors + +echo "Rebuilding Cosmos DB connectors..." +mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true +cp target/*-jar-with-dependencies.jar src/test/connectorPlugins/connectors +cd src/test/connectorPlugins + +echo "Adding custom Insert UUID SMT" +cd connectors +git clone https://github.com/confluentinc/kafka-connect-insert-uuid.git insertuuid -q && cd insertuuid +mvn clean package -DskipTests=true +cp target/*.jar ../ +cd .. && rm -rf insertuuid +cd ../ \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/CosmosDBSourceConnectorTest.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/CosmosDBSourceConnectorTest.java new file mode 100644 index 000000000000..84aac6cacdcf --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/CosmosDBSourceConnectorTest.java @@ -0,0 +1,636 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.Strings; +import com.azure.cosmos.implementation.TestConfigurations; +import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.changefeed.common.ChangeFeedMode; +import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStartFromInternal; +import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; +import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStateV1; +import com.azure.cosmos.implementation.feedranges.FeedRangeContinuation; +import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; +import com.azure.cosmos.implementation.query.CompositeContinuationToken; +import com.azure.cosmos.kafka.connect.implementation.CosmosClientStore; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosChangeFeedModes; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosChangeFeedStartFromModes; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceConfig; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceOffsetStorageReader; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceTask; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangeContinuationTopicOffset; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangeContinuationTopicPartition; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangeTaskUnit; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangesMetadataTopicOffset; +import com.azure.cosmos.kafka.connect.implementation.source.FeedRangesMetadataTopicPartition; +import com.azure.cosmos.kafka.connect.implementation.source.KafkaCosmosChangeFeedState; +import com.azure.cosmos.kafka.connect.implementation.source.MetadataMonitorThread; +import com.azure.cosmos.kafka.connect.implementation.source.MetadataTaskUnit; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.FeedRange; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import org.apache.kafka.common.config.Config; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.config.ConfigValue; +import org.apache.kafka.connect.source.SourceConnectorContext; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.stream.Collectors; + +import static com.azure.cosmos.kafka.connect.CosmosDBSourceConnectorTest.SourceConfigs.ALL_VALID_CONFIGS; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.testng.Assert.assertEquals; + +@Test +public class CosmosDBSourceConnectorTest extends KafkaCosmosTestSuiteBase { + @Test(groups = "unit") + public void taskClass() { + CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); + assertEquals(sourceConnector.taskClass(), CosmosSourceTask.class); + } + + @Test(groups = "unit") + public void config() { + CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); + ConfigDef configDef = sourceConnector.config(); + Map configs = configDef.configKeys(); + List> allValidConfigs = ALL_VALID_CONFIGS; + + for (SourceConfigEntry sourceConfigEntry : allValidConfigs) { + assertThat(configs.containsKey(sourceConfigEntry.getName())).isTrue(); + + configs.containsKey(sourceConfigEntry.getName()); + if (sourceConfigEntry.isOptional()) { + assertThat(configs.get(sourceConfigEntry.getName()).defaultValue).isEqualTo(sourceConfigEntry.getDefaultValue()); + } else { + assertThat(configs.get(sourceConfigEntry.getName()).defaultValue).isEqualTo(ConfigDef.NO_DEFAULT_VALUE); + } + } + } + + @Test(groups = "{ kafka }", timeOut = TIMEOUT) + public void getTaskConfigsWithoutPersistedOffset() throws JsonProcessingException { + CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); + try { + Map sourceConfigMap = new HashMap<>(); + sourceConfigMap.put("kafka.connect.cosmos.accountEndpoint", TestConfigurations.HOST); + sourceConfigMap.put("kafka.connect.cosmos.accountKey", TestConfigurations.MASTER_KEY); + sourceConfigMap.put("kafka.connect.cosmos.source.database.name", databaseName); + List containersIncludedList = Arrays.asList( + singlePartitionContainerName, + multiPartitionContainerName + ); + sourceConfigMap.put("kafka.connect.cosmos.source.containers.includedList", containersIncludedList.toString()); + + String singlePartitionContainerTopicName = singlePartitionContainerName + "topic"; + List containerTopicMapList = Arrays.asList(singlePartitionContainerTopicName + "#" + singlePartitionContainerName); + sourceConfigMap.put("kafka.connect.cosmos.source.containers.topicMap", containerTopicMapList.toString()); + + // setup the internal state + this.setupDefaultConnectorInternalStates(sourceConnector, sourceConfigMap); + CosmosAsyncClient cosmosAsyncClient = KafkaCosmosReflectionUtils.getCosmosClient(sourceConnector); + + int maxTask = 2; + List> taskConfigs = sourceConnector.taskConfigs(maxTask); + assertThat(taskConfigs.size()).isEqualTo(maxTask); + + // construct expected feed range task units + CosmosContainerProperties singlePartitionContainer = getSinglePartitionContainer(cosmosAsyncClient); + List singlePartitionContainerFeedRangeTasks = + getFeedRangeTaskUnits( + cosmosAsyncClient, + databaseName, + singlePartitionContainer, + null, + singlePartitionContainerTopicName); + assertThat(singlePartitionContainerFeedRangeTasks.size()).isEqualTo(1); + + CosmosContainerProperties multiPartitionContainer = getMultiPartitionContainer(cosmosAsyncClient); + List multiPartitionContainerFeedRangeTasks = + getFeedRangeTaskUnits( + cosmosAsyncClient, + databaseName, + multiPartitionContainer, + null, + multiPartitionContainer.getId()); + assertThat(multiPartitionContainerFeedRangeTasks.size()).isGreaterThan(1); + + List> expectedTaskUnits = new ArrayList<>(); + for (int i = 0; i < maxTask; i++) { + expectedTaskUnits.add(new ArrayList<>()); + } + + expectedTaskUnits.get(0).add(singlePartitionContainerFeedRangeTasks.get(0)); + for (int i = 0; i < multiPartitionContainerFeedRangeTasks.size(); i++) { + int index = ( i + 1) % 2; + expectedTaskUnits.get(index).add(multiPartitionContainerFeedRangeTasks.get(i)); + } + + validateFeedRangeTasks(expectedTaskUnits, taskConfigs); + + MetadataTaskUnit expectedMetadataTaskUnit = + getMetadataTaskUnit( + cosmosAsyncClient, + databaseName, + Arrays.asList(singlePartitionContainer, multiPartitionContainer)); + validateMetadataTask(expectedMetadataTaskUnit, taskConfigs.get(1)); + } finally { + sourceConnector.stop(); + } + } + + @Test(groups = "{ kafka }", timeOut = TIMEOUT) + public void getTaskConfigsAfterSplit() throws JsonProcessingException { + // This test is to simulate after a split happen, the task resume with persisted offset + CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); + + try { + Map sourceConfigMap = new HashMap<>(); + sourceConfigMap.put("kafka.connect.cosmos.accountEndpoint", TestConfigurations.HOST); + sourceConfigMap.put("kafka.connect.cosmos.accountKey", TestConfigurations.MASTER_KEY); + sourceConfigMap.put("kafka.connect.cosmos.source.database.name", databaseName); + List containersIncludedList = Arrays.asList(multiPartitionContainerName); + sourceConfigMap.put("kafka.connect.cosmos.source.containers.includedList", containersIncludedList.toString()); + + // setup the internal state + this.setupDefaultConnectorInternalStates(sourceConnector, sourceConfigMap); + + // override the storage reader with initial offset + CosmosAsyncClient cosmosAsyncClient = KafkaCosmosReflectionUtils.getCosmosClient(sourceConnector); + CosmosSourceOffsetStorageReader sourceOffsetStorageReader = KafkaCosmosReflectionUtils.getSourceOffsetStorageReader(sourceConnector); + InMemoryStorageReader inMemoryStorageReader = + (InMemoryStorageReader) KafkaCosmosReflectionUtils.getOffsetStorageReader(sourceOffsetStorageReader); + + CosmosContainerProperties multiPartitionContainer = getMultiPartitionContainer(cosmosAsyncClient); + + // constructing feed range continuation offset + FeedRangeContinuationTopicPartition feedRangeContinuationTopicPartition = + new FeedRangeContinuationTopicPartition( + databaseName, + multiPartitionContainer.getResourceId(), + FeedRange.forFullRange()); + + String initialContinuationState = new ChangeFeedStateV1( + multiPartitionContainer.getResourceId(), + FeedRangeEpkImpl.forFullRange(), + ChangeFeedMode.INCREMENTAL, + ChangeFeedStartFromInternal.createFromBeginning(), + FeedRangeContinuation.create( + multiPartitionContainer.getResourceId(), + FeedRangeEpkImpl.forFullRange(), + Arrays.asList(new CompositeContinuationToken("1", FeedRangeEpkImpl.forFullRange().getRange())))).toString(); + + FeedRangeContinuationTopicOffset feedRangeContinuationTopicOffset = + new FeedRangeContinuationTopicOffset(initialContinuationState, "1"); // using the same itemLsn as in the continuationToken + Map, Map> initialOffsetMap = new HashMap<>(); + initialOffsetMap.put( + FeedRangeContinuationTopicPartition.toMap(feedRangeContinuationTopicPartition), + FeedRangeContinuationTopicOffset.toMap(feedRangeContinuationTopicOffset)); + + // constructing feedRange metadata offset + FeedRangesMetadataTopicPartition feedRangesMetadataTopicPartition = + new FeedRangesMetadataTopicPartition(databaseName, multiPartitionContainer.getResourceId()); + FeedRangesMetadataTopicOffset feedRangesMetadataTopicOffset = + new FeedRangesMetadataTopicOffset(Arrays.asList(FeedRange.forFullRange())); + initialOffsetMap.put( + FeedRangesMetadataTopicPartition.toMap(feedRangesMetadataTopicPartition), + FeedRangesMetadataTopicOffset.toMap(feedRangesMetadataTopicOffset)); + + inMemoryStorageReader.populateOffset(initialOffsetMap); + + int maxTask = 2; + List> taskConfigs = sourceConnector.taskConfigs(maxTask); + assertThat(taskConfigs.size()).isEqualTo(maxTask); + + // construct expected feed range task units + List multiPartitionContainerFeedRangeTasks = + getFeedRangeTaskUnits( + cosmosAsyncClient, + databaseName, + multiPartitionContainer, + initialContinuationState, + multiPartitionContainer.getId()); + assertThat(multiPartitionContainerFeedRangeTasks.size()).isGreaterThan(1); + + List> expectedTaskUnits = new ArrayList<>(); + for (int i = 0; i < maxTask; i++) { + expectedTaskUnits.add(new ArrayList<>()); + } + + for (int i = 0; i < multiPartitionContainerFeedRangeTasks.size(); i++) { + expectedTaskUnits.get( i % 2).add(multiPartitionContainerFeedRangeTasks.get(i)); + } + + validateFeedRangeTasks(expectedTaskUnits, taskConfigs); + + MetadataTaskUnit expectedMetadataTaskUnit = + getMetadataTaskUnit( + cosmosAsyncClient, + databaseName, + Arrays.asList(multiPartitionContainer)); + validateMetadataTask(expectedMetadataTaskUnit, taskConfigs.get(1)); + } finally { + sourceConnector.stop(); + } + } + + @Test(groups = "{ kafka }", timeOut = TIMEOUT) + public void getTaskConfigsAfterMerge() throws JsonProcessingException { + // This test is to simulate after a merge happen, the task resume with previous feedRanges + CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); + + try { + Map sourceConfigMap = new HashMap<>(); + sourceConfigMap.put("kafka.connect.cosmos.accountEndpoint", TestConfigurations.HOST); + sourceConfigMap.put("kafka.connect.cosmos.accountKey", TestConfigurations.MASTER_KEY); + sourceConfigMap.put("kafka.connect.cosmos.source.database.name", databaseName); + List containersIncludedList = Arrays.asList(singlePartitionContainerName); + sourceConfigMap.put("kafka.connect.cosmos.source.containers.includedList", containersIncludedList.toString()); + + // setup the internal state + this.setupDefaultConnectorInternalStates(sourceConnector, sourceConfigMap); + + // override the storage reader with initial offset + CosmosAsyncClient cosmosAsyncClient = KafkaCosmosReflectionUtils.getCosmosClient(sourceConnector); + CosmosSourceOffsetStorageReader sourceOffsetStorageReader = KafkaCosmosReflectionUtils.getSourceOffsetStorageReader(sourceConnector); + InMemoryStorageReader inMemoryStorageReader = + (InMemoryStorageReader) KafkaCosmosReflectionUtils.getOffsetStorageReader(sourceOffsetStorageReader); + + CosmosContainerProperties singlePartitionContainer = getSinglePartitionContainer(cosmosAsyncClient); + + // constructing feed range continuation offset + List childRanges = + ImplementationBridgeHelpers + .CosmosAsyncContainerHelper + .getCosmosAsyncContainerAccessor() + .trySplitFeedRange( + cosmosAsyncClient.getDatabase(databaseName).getContainer(singlePartitionContainer.getId()), + FeedRange.forFullRange(), + 2) + .block(); + + Map, Map> initialOffsetMap = new HashMap<>(); + List singlePartitionFeedRangeTaskUnits = new ArrayList<>(); + + for (FeedRange childRange : childRanges) { + FeedRangeContinuationTopicPartition feedRangeContinuationTopicPartition = + new FeedRangeContinuationTopicPartition( + databaseName, + singlePartitionContainer.getResourceId(), + childRange); + + ChangeFeedStateV1 childRangeContinuationState = new ChangeFeedStateV1( + singlePartitionContainer.getResourceId(), + (FeedRangeEpkImpl)childRange, + ChangeFeedMode.INCREMENTAL, + ChangeFeedStartFromInternal.createFromBeginning(), + FeedRangeContinuation.create( + singlePartitionContainer.getResourceId(), + (FeedRangeEpkImpl)childRange, + Arrays.asList(new CompositeContinuationToken("1", ((FeedRangeEpkImpl)childRange).getRange())))); + + FeedRangeContinuationTopicOffset feedRangeContinuationTopicOffset = + new FeedRangeContinuationTopicOffset(childRangeContinuationState.toString(), "1"); + + initialOffsetMap.put( + FeedRangeContinuationTopicPartition.toMap(feedRangeContinuationTopicPartition), + FeedRangeContinuationTopicOffset.toMap(feedRangeContinuationTopicOffset)); + + KafkaCosmosChangeFeedState taskUnitContinuationState = + new KafkaCosmosChangeFeedState(childRangeContinuationState.toString(), childRange, "1"); + singlePartitionFeedRangeTaskUnits.add( + new FeedRangeTaskUnit( + databaseName, + singlePartitionContainer.getId(), + singlePartitionContainer.getResourceId(), + childRange, + taskUnitContinuationState, + singlePartitionContainer.getId())); + } + + // constructing feedRange metadata offset + FeedRangesMetadataTopicPartition feedRangesMetadataTopicPartition = + new FeedRangesMetadataTopicPartition(databaseName, singlePartitionContainer.getResourceId()); + FeedRangesMetadataTopicOffset feedRangesMetadataTopicOffset = + new FeedRangesMetadataTopicOffset( + childRanges + .stream() + .collect(Collectors.toList())); + + initialOffsetMap.put( + FeedRangesMetadataTopicPartition.toMap(feedRangesMetadataTopicPartition), + FeedRangesMetadataTopicOffset.toMap(feedRangesMetadataTopicOffset)); + + inMemoryStorageReader.populateOffset(initialOffsetMap); + + int maxTask = 2; + List> taskConfigs = sourceConnector.taskConfigs(maxTask); + assertThat(taskConfigs.size()).isEqualTo(maxTask); + + // construct expected feed range task units + assertThat(singlePartitionFeedRangeTaskUnits.size()).isEqualTo(2); + + List> expectedTaskUnits = new ArrayList<>(); + for (int i = 0; i < maxTask; i++) { + expectedTaskUnits.add(new ArrayList<>()); + } + + for (int i = 0; i < singlePartitionFeedRangeTaskUnits.size(); i++) { + expectedTaskUnits.get( i % 2).add(singlePartitionFeedRangeTaskUnits.get(i)); + } + + validateFeedRangeTasks(expectedTaskUnits, taskConfigs); + + Map> containersEffectiveRangesMap = new HashMap<>(); + containersEffectiveRangesMap.put( + singlePartitionContainer.getResourceId(), + childRanges.stream().collect(Collectors.toList())); + + MetadataTaskUnit expectedMetadataTaskUnit = + new MetadataTaskUnit( + databaseName, + Arrays.asList(singlePartitionContainer.getResourceId()), + containersEffectiveRangesMap, + "_cosmos.metadata.topic" + ); + validateMetadataTask(expectedMetadataTaskUnit, taskConfigs.get(1)); + } finally { + sourceConnector.stop(); + } + } + + @Test(groups = "unit") + public void missingRequiredConfig() { + + List> requiredConfigs = + ALL_VALID_CONFIGS + .stream() + .filter(sourceConfigEntry -> !sourceConfigEntry.isOptional) + .collect(Collectors.toList()); + + assertThat(requiredConfigs.size()).isGreaterThan(1); + CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); + for (SourceConfigEntry configEntry : requiredConfigs) { + + Map sourceConfigMap = this.getValidSourceConfig(); + sourceConfigMap.remove(configEntry.getName()); + Config validatedConfig = sourceConnector.validate(sourceConfigMap); + ConfigValue configValue = + validatedConfig + .configValues() + .stream() + .filter(config -> config.name().equalsIgnoreCase(configEntry.name)) + .findFirst() + .get(); + + assertThat(configValue.errorMessages()).isNotNull(); + assertThat(configValue.errorMessages().size()).isGreaterThanOrEqualTo(1); + } + } + + @Test(groups = "unit") + public void misFormattedConfig() { + CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); + Map sourceConfigMap = this.getValidSourceConfig(); + + String topicMapConfigName = "kafka.connect.cosmos.source.containers.topicMap"; + sourceConfigMap.put(topicMapConfigName, UUID.randomUUID().toString()); + + Config validatedConfig = sourceConnector.validate(sourceConfigMap); + ConfigValue configValue = + validatedConfig + .configValues() + .stream() + .filter(config -> config.name().equalsIgnoreCase(topicMapConfigName)) + .findFirst() + .get(); + + assertThat(configValue.errorMessages()).isNotNull(); + assertThat( + configValue + .errorMessages() + .get(0) + .contains( + "The topic-container map should be a comma-delimited list of Kafka topic to Cosmos containers." + + " Each mapping should be a pair of Kafka topic and Cosmos container separated by '#'." + + " For example: topic1#con1,topic2#con2.")) + .isTrue(); + + // TODO[Public Preview]: add other config validations + } + + private Map getValidSourceConfig() { + Map sourceConfigMap = new HashMap<>(); + sourceConfigMap.put("kafka.connect.cosmos.accountEndpoint", TestConfigurations.HOST); + sourceConfigMap.put("kafka.connect.cosmos.accountKey", TestConfigurations.MASTER_KEY); + sourceConfigMap.put("kafka.connect.cosmos.source.database.name", databaseName); + List containersIncludedList = Arrays.asList(singlePartitionContainerName); + sourceConfigMap.put("kafka.connect.cosmos.source.containers.includedList", containersIncludedList.toString()); + + return sourceConfigMap; + } + + private void setupDefaultConnectorInternalStates(CosmosDBSourceConnector sourceConnector, Map sourceConfigMap) { + CosmosSourceConfig cosmosSourceConfig = new CosmosSourceConfig(sourceConfigMap); + KafkaCosmosReflectionUtils.setCosmosSourceConfig(sourceConnector, cosmosSourceConfig); + + CosmosAsyncClient cosmosAsyncClient = CosmosClientStore.getCosmosClient(cosmosSourceConfig.getAccountConfig()); + KafkaCosmosReflectionUtils.setCosmosClient(sourceConnector, cosmosAsyncClient); + + InMemoryStorageReader inMemoryStorageReader = new InMemoryStorageReader(); + CosmosSourceOffsetStorageReader storageReader = new CosmosSourceOffsetStorageReader(inMemoryStorageReader); + KafkaCosmosReflectionUtils.setOffsetStorageReader(sourceConnector, storageReader); + + SourceConnectorContext connectorContext = Mockito.mock(SourceConnectorContext.class); + MetadataMonitorThread monitorThread = new MetadataMonitorThread( + cosmosSourceConfig.getContainersConfig(), + cosmosSourceConfig.getMetadataConfig(), + connectorContext, + storageReader, + cosmosAsyncClient); + + KafkaCosmosReflectionUtils.setMetadataMonitorThread(sourceConnector, monitorThread); + } + + private List getFeedRangeTaskUnits( + CosmosAsyncClient cosmosClient, + String databaseName, + CosmosContainerProperties containerProperties, + String continuationState, + String topicName) { + + List feedRanges = + cosmosClient + .getDatabase(databaseName) + .getContainer(containerProperties.getId()) + .getFeedRanges() + .block(); + + return feedRanges + .stream() + .map(feedRange -> { + KafkaCosmosChangeFeedState kafkaCosmosChangeFeedState = null; + if (StringUtils.isNotEmpty(continuationState)) { + ChangeFeedState changeFeedState = ChangeFeedStateV1.fromString(continuationState); + kafkaCosmosChangeFeedState = + new KafkaCosmosChangeFeedState( + continuationState, + feedRange, + changeFeedState.getContinuation().getCurrentContinuationToken().getToken()); + } + + return new FeedRangeTaskUnit( + databaseName, + containerProperties.getId(), + containerProperties.getResourceId(), + feedRange, + kafkaCosmosChangeFeedState, + topicName); + }) + .collect(Collectors.toList()); + } + + private MetadataTaskUnit getMetadataTaskUnit( + CosmosAsyncClient cosmosAsyncClient, + String databaseName, + List containers) { + + Map> containersEffectiveRangesMap = new HashMap<>(); + for (CosmosContainerProperties containerProperties : containers) { + List feedRanges = + cosmosAsyncClient + .getDatabase(databaseName) + .getContainer(containerProperties.getId()) + .getFeedRanges() + .block(); + + containersEffectiveRangesMap.put(containerProperties.getResourceId(), feedRanges); + } + + return new MetadataTaskUnit( + databaseName, + containers.stream().map(CosmosContainerProperties::getResourceId).collect(Collectors.toList()), + containersEffectiveRangesMap, + "_cosmos.metadata.topic" + ); + } + + private void validateFeedRangeTasks( + List> feedRangeTaskUnits, + List> taskConfigs) throws JsonProcessingException { + + String taskUnitsKey = "kafka.connect.cosmos.source.task.feedRangeTaskUnits"; + List allTaskUnitsFromTaskConfigs = new ArrayList<>(); + for (Map taskConfig : taskConfigs) { + List taskUnitsFromTaskConfig = + Utils + .getSimpleObjectMapper() + .readValue(taskConfig.get(taskUnitsKey), new TypeReference>() {}) + .stream() + .map(taskUnitString -> { + try { + return Utils.getSimpleObjectMapper().readValue(taskUnitString, FeedRangeTaskUnit.class); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }) + .collect(Collectors.toList()); + allTaskUnitsFromTaskConfigs.addAll(taskUnitsFromTaskConfig); + } + + Map allExpectedTaskUnits = new HashMap<>(); + feedRangeTaskUnits.forEach(taskUnits -> { + allExpectedTaskUnits.putAll( + taskUnits.stream().collect(Collectors.toMap(taskUnit -> taskUnit.getFeedRange(), taskUnit -> taskUnit))); + }); + + assertThat(allExpectedTaskUnits.size()).isEqualTo(allTaskUnitsFromTaskConfigs.size()); + for (FeedRangeTaskUnit feedRangeTaskUnit : allTaskUnitsFromTaskConfigs) { + FeedRangeTaskUnit expectedTaskUnit = allExpectedTaskUnits.get(feedRangeTaskUnit.getFeedRange()); + assertThat(expectedTaskUnit).isNotNull(); + assertThat( + Utils.getSimpleObjectMapper().writeValueAsString(expectedTaskUnit) + ).isEqualTo( + Utils.getSimpleObjectMapper().writeValueAsString(feedRangeTaskUnit) + ); + } + } + + private void validateMetadataTask( + MetadataTaskUnit expectedMetadataTaskUnit, + Map taskConfig) throws JsonProcessingException { + + String taskUnitKey = "kafka.connect.cosmos.source.task.metadataTaskUnit"; + assertThat(taskConfig.containsKey(taskUnitKey)); + MetadataTaskUnit metadataTaskUnitFromTaskConfig = + Utils.getSimpleObjectMapper().readValue(taskConfig.get(taskUnitKey), MetadataTaskUnit.class); + + assertThat(expectedMetadataTaskUnit).isEqualTo(metadataTaskUnitFromTaskConfig); + } + + public static class SourceConfigEntry { + private final String name; + private final T defaultValue; + private final boolean isOptional; + + public SourceConfigEntry(String name, T defaultValue, boolean isOptional) { + this.name = name; + this.defaultValue = defaultValue; + this.isOptional = isOptional; + } + + public String getName() { + return name; + } + + public T getDefaultValue() { + return defaultValue; + } + + public boolean isOptional() { + return isOptional; + } + } + + public static class SourceConfigs { + public static final List> ALL_VALID_CONFIGS = Arrays.asList( + new SourceConfigEntry("kafka.connect.cosmos.accountEndpoint", null, false), + new SourceConfigEntry("kafka.connect.cosmos.accountKey", null, false), + new SourceConfigEntry("kafka.connect.cosmos.useGatewayMode", false, true), + new SourceConfigEntry("kafka.connect.cosmos.preferredRegionsList", Strings.Emtpy, true), + new SourceConfigEntry("kafka.connect.cosmos.applicationName", Strings.Emtpy, true), + new SourceConfigEntry("kafka.connect.cosmos.source.database.name", null, false), + new SourceConfigEntry("kafka.connect.cosmos.source.containers.includeAll", false, true), + new SourceConfigEntry("kafka.connect.cosmos.source.containers.includedList", Strings.Emtpy, true), + new SourceConfigEntry("kafka.connect.cosmos.source.containers.topicMap", Strings.Emtpy, true), + new SourceConfigEntry( + "kafka.connect.cosmos.source.changeFeed.startFrom", + CosmosChangeFeedStartFromModes.BEGINNING.getName(), + true), + new SourceConfigEntry( + "kafka.connect.cosmos.source.changeFeed.mode", + CosmosChangeFeedModes.LATEST_VERSION.getName(), + true), + new SourceConfigEntry("kafka.connect.cosmos.source.changeFeed.maxItemCountHint", 1000, true), + new SourceConfigEntry("kafka.connect.cosmos.source.metadata.poll.delay.ms", 5 * 60 * 1000, true), + new SourceConfigEntry( + "kafka.connect.cosmos.source.metadata.storage.topic", + "_cosmos.metadata.topic", + true), + new SourceConfigEntry("kafka.connect.cosmos.source.messageKey.enabled", true, true), + new SourceConfigEntry("kafka.connect.cosmos.source.messageKey.field", "id", true) + ); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/CosmosDbSourceConnectorITest.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/CosmosDbSourceConnectorITest.java new file mode 100644 index 000000000000..9c9ef85c9a33 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/CosmosDbSourceConnectorITest.java @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.kafka.connect.implementation.CosmosClientStore; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceConfig; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.rnorth.ducttape.unreliables.Unreliables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.annotations.Test; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +public class CosmosDbSourceConnectorITest extends KafkaCosmosIntegrationTestSuiteBase { + private static final Logger logger = LoggerFactory.getLogger(CosmosDbSourceConnectorITest.class); + + // TODO[public preview]: add more integration tests + @Test(groups = { "kafka-integration"}, timeOut = TIMEOUT) + public void readFromSingleContainer() { + Map sourceConnectorConfig = new HashMap<>(); + sourceConnectorConfig.put("connector.class", "com.azure.cosmos.kafka.connect.CosmosDBSourceConnector"); + sourceConnectorConfig.put("kafka.connect.cosmos.accountEndpoint", KafkaCosmosTestConfigurations.HOST); + sourceConnectorConfig.put("kafka.connect.cosmos.accountKey", KafkaCosmosTestConfigurations.MASTER_KEY); + sourceConnectorConfig.put("kafka.connect.cosmos.applicationName", "Test"); + sourceConnectorConfig.put("kafka.connect.cosmos.source.database.name", databaseName); + sourceConnectorConfig.put("kafka.connect.cosmos.source.containers.includeAll", "false"); + sourceConnectorConfig.put("kafka.connect.cosmos.source.containers.includedList", singlePartitionContainerName); + + // Create topic ahead of time + kafkaCosmosConnectContainer.createTopic(singlePartitionContainerName, 1); + + CosmosSourceConfig sourceConfig = new CosmosSourceConfig(sourceConnectorConfig); + CosmosAsyncClient client = CosmosClientStore.getCosmosClient(sourceConfig.getAccountConfig()); + CosmosAsyncContainer container = client.getDatabase(databaseName).getContainer(singlePartitionContainerName); + + String connectorName = "simpleTest-" + UUID.randomUUID(); + try { + // create few items in the container + logger.info("creating items in container {}", singlePartitionContainerName); + List createdItems = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + TestItem testItem = TestItem.createNewItem(); + container.createItem(testItem).block(); + createdItems.add(testItem.getId()); + } + + kafkaCosmosConnectContainer.registerConnector(connectorName, sourceConnectorConfig); + + logger.info("Getting consumer and subscribe to topic {}", singlePartitionContainerName); + KafkaConsumer kafkaConsumer = kafkaCosmosConnectContainer.getConsumer(); + kafkaConsumer.subscribe( + Arrays.asList( + singlePartitionContainerName, + sourceConfig.getMetadataConfig().getMetadataTopicName())); + + List> metadataRecords = new ArrayList<>(); + List> itemRecords = new ArrayList<>(); + + Unreliables.retryUntilTrue(30, TimeUnit.SECONDS, () -> {; + kafkaConsumer.poll(Duration.ofMillis(1000)) + .iterator() + .forEachRemaining(consumerRecord -> { + if (consumerRecord.topic().equals(singlePartitionContainerName)) { + itemRecords.add(consumerRecord); + } else if (consumerRecord.topic().equals(sourceConfig.getMetadataConfig().getMetadataTopicName())) { + metadataRecords.add(consumerRecord); + } + }); + return metadataRecords.size() >= 2 && itemRecords.size() >= createdItems.size(); + }); + + //TODO[public preview]currently the metadata record value is null, populate it with metadata and validate the content here + assertThat(metadataRecords.size()).isEqualTo(2); + assertThat(itemRecords.size()).isEqualTo(createdItems.size()); + + List receivedItems = + itemRecords.stream().map(consumerRecord -> { + JsonNode jsonNode = consumerRecord.value(); + return jsonNode.get("payload").get("id").asText(); + }).collect(Collectors.toList()); + + assertThat(receivedItems.containsAll(createdItems)).isTrue(); + + } finally { + if (client != null) { + logger.info("cleaning container {}", singlePartitionContainerName); + cleanUpContainer(client, databaseName, singlePartitionContainerName); + client.close(); + } + + // IMPORTANT: remove the connector after use + if (kafkaCosmosConnectContainer != null) { + kafkaCosmosConnectContainer.deleteConnector(connectorName); + } + } + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/InMemoryStorageReader.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/InMemoryStorageReader.java new file mode 100644 index 000000000000..c7903df4d640 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/InMemoryStorageReader.java @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import org.apache.kafka.connect.storage.OffsetStorageReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/*** + * Only used for test. + */ +public class InMemoryStorageReader implements OffsetStorageReader { + private static final Logger logger = LoggerFactory.getLogger(InMemoryStorageReader.class); + private Map, Map> offsetStore; + public InMemoryStorageReader() { + offsetStore = new HashMap<>(); + } + + public void populateOffset(Map, Map> offsets) { + this.offsetStore.putAll(offsets); + } + + @Override + public Map offset(Map partition) { + return offsetStore.get(partition); + } + + @Override + public Map, Map> offsets(Collection> partitions) { + Map, Map> results = new HashMap<>(); + for (Map partition : partitions) { + results.put(partition, offsetStore.get(partition)); + } + + return results; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosConnectContainer.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosConnectContainer.java new file mode 100644 index 000000000000..6587222aaee5 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosConnectContainer.java @@ -0,0 +1,159 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.core.exception.ResourceNotFoundException; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.connect.json.JsonDeserializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.sourcelab.kafka.connect.apiclient.Configuration; +import org.sourcelab.kafka.connect.apiclient.KafkaConnectClient; +import org.sourcelab.kafka.connect.apiclient.request.dto.ConnectorDefinition; +import org.sourcelab.kafka.connect.apiclient.request.dto.NewConnectorDefinition; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.KafkaContainer; +import org.testcontainers.utility.DockerImageName; + +import java.util.Arrays; +import java.util.Map; +import java.util.Properties; + +public class KafkaCosmosConnectContainer extends GenericContainer { + private static final Logger logger = LoggerFactory.getLogger(KafkaCosmosConnectContainer.class); + private static final int KAFKA_CONNECT_PORT = 8083; + private KafkaConsumer kafkaConsumer; + private AdminClient adminClient; + private int replicationFactor = 1; + + public KafkaCosmosConnectContainer(final DockerImageName dockerImageName) { + super(dockerImageName); + defaultConfig(); + } + + private void defaultConfig() { + withEnv("CONNECT_GROUP_ID", KafkaCosmosTestConfigurations.CONNECT_GROUP_ID); + withEnv("CONNECT_CONFIG_STORAGE_TOPIC", KafkaCosmosTestConfigurations.CONNECT_CONFIG_STORAGE_TOPIC); + withEnv("CONNECT_OFFSET_STORAGE_TOPIC", KafkaCosmosTestConfigurations.CONNECT_OFFSET_STORAGE_TOPIC); + withEnv("CONNECT_STATUS_STORAGE_TOPIC", KafkaCosmosTestConfigurations.CONNECT_STATUS_STORAGE_TOPIC); + withEnv("CONNECT_KEY_CONVERTER", KafkaCosmosTestConfigurations.CONNECT_KEY_CONVERTER); + withEnv("CONNECT_VALUE_CONVERTER", KafkaCosmosTestConfigurations.CONNECT_VALUE_CONVERTER); + withEnv("CONNECT_PLUGIN_PATH", KafkaCosmosTestConfigurations.CONNECT_PLUGIN_PATH); + withEnv("CONNECT_REST_ADVERTISED_HOST_NAME", KafkaCosmosTestConfigurations.CONNECT_REST_ADVERTISED_HOST_NAME); + withEnv("CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR", KafkaCosmosTestConfigurations.CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR); + withEnv("CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR", KafkaCosmosTestConfigurations.CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR); + withEnv("CONNECT_STATUS_STORAGE_REPLICATION_FACTOR", KafkaCosmosTestConfigurations.CONNECT_STATUS_STORAGE_REPLICATION_FACTOR); +// withEnv("CONNECT_LOG4J_ROOT_LOGLEVEL", "DEBUG"); +// withEnv("CONNECT_LOG4J_LOGGERS", "org.apache.kafka=DEBUG,org.reflections=DEBUG,com.azure.cosmos.kafka=DEBUG"); + + withExposedPorts(KAFKA_CONNECT_PORT); + } + + private Properties defaultConsumerConfig() { + Properties kafkaConsumerProperties = new Properties(); + kafkaConsumerProperties.put("group.id", "IntegrationTest"); + kafkaConsumerProperties.put("value.deserializer", JsonDeserializer.class.getName()); + kafkaConsumerProperties.put("key.deserializer", StringDeserializer.class.getName()); + kafkaConsumerProperties.put("sasl.mechanism", "PLAIN"); + kafkaConsumerProperties.put("client.dns.lookup", "use_all_dns_ips"); + kafkaConsumerProperties.put("session.timeout.ms", "45000"); + return kafkaConsumerProperties; + } + + public KafkaCosmosConnectContainer withLocalKafkaContainer(final KafkaContainer kafkaContainer) { + withNetwork(kafkaContainer.getNetwork()); + + withEnv("CONNECT_BOOTSTRAP_SERVERS", kafkaContainer.getNetworkAliases().get(0) + ":9092"); + return self(); + } + + public KafkaCosmosConnectContainer withCloudKafkaContainer() { + withEnv("CONNECT_BOOTSTRAP_SERVERS", KafkaCosmosTestConfigurations.BOOTSTRAP_SERVER); + withEnv("CONNECT_SECURITY_PROTOCOL", "SASL_SSL"); + withEnv("CONNECT_SASL_JAAS_CONFIG", KafkaCosmosTestConfigurations.SASL_JAAS); + withEnv("CONNECT_SASL_MECHANISM", "PLAIN"); + + withEnv("CONNECT_PRODUCER_SECURITY_PROTOCOL", "SASL_SSL"); + withEnv("CONNECT_PRODUCER_SASL_JAAS_CONFIG", KafkaCosmosTestConfigurations.SASL_JAAS); + withEnv("CONNECT_PRODUCER_SASL_MECHANISM", "PLAIN"); + + withEnv("CONNECT_CONSUMER_SECURITY_PROTOCOL", "SASL_SSL"); + withEnv("CONNECT_CONSUMER_SASL_JAAS_CONFIG", KafkaCosmosTestConfigurations.SASL_JAAS); + withEnv("CONNECT_CONSUMER_SASL_MECHANISM", "PLAIN"); + return self(); + } + + public KafkaCosmosConnectContainer withLocalBootstrapServer(String localBootstrapServer) { + Properties consumerProperties = defaultConsumerConfig(); + consumerProperties.put("bootstrap.servers", localBootstrapServer); + this.kafkaConsumer = new KafkaConsumer<>(consumerProperties); + this.adminClient = this.getAdminClient(localBootstrapServer); + return self(); + } + + public KafkaCosmosConnectContainer withCloudBootstrapServer() { + Properties consumerProperties = defaultConsumerConfig(); + consumerProperties.put("bootstrap.servers", KafkaCosmosTestConfigurations.BOOTSTRAP_SERVER); + consumerProperties.put("sasl.jaas.config", KafkaCosmosTestConfigurations.SASL_JAAS); + consumerProperties.put("security.protocol", "SASL_SSL"); + consumerProperties.put("sasl.mechanism", "PLAIN"); + + this.kafkaConsumer = new KafkaConsumer<>(consumerProperties); + this.adminClient = this.getAdminClient(KafkaCosmosTestConfigurations.BOOTSTRAP_SERVER); + this.replicationFactor = 3; + return self(); + } + + public void registerConnector(String name, Map config) { + NewConnectorDefinition newConnectorDefinition = new NewConnectorDefinition(name, config); + KafkaConnectClient kafkaConnectClient = new KafkaConnectClient(new Configuration(getTarget())); + + logger.info("adding kafka connector {}", name); + + try { + Thread.sleep(500); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + ConnectorDefinition connectorDefinition = kafkaConnectClient.addConnector(newConnectorDefinition); + logger.info("adding kafka connector completed with " + connectorDefinition); + } + + public void deleteConnector(String name) { + KafkaConnectClient kafkaConnectClient = new KafkaConnectClient(new Configuration(getTarget())); + try { + kafkaConnectClient.deleteConnector(name); + logger.info("Deleting container {} succeeded.", name); + } catch (Exception exception) { + if (exception instanceof ResourceNotFoundException) { + logger.info("Connector {} not found"); + } + + logger.warn("Failed to delete connector {}", name); + } + } + + public KafkaConsumer getConsumer() { + return this.kafkaConsumer; + } + + public String getTarget() { + return "http://" + getContainerIpAddress() + ":" + getMappedPort(KAFKA_CONNECT_PORT); + } + + public void createTopic(String topicName, int numPartitions) { + this.adminClient.createTopics( + Arrays.asList(new NewTopic(topicName, numPartitions, (short) replicationFactor))); + } + + private AdminClient getAdminClient(String bootstrapServer) { + Properties properties = new Properties(); + properties.put("bootstrap.servers", bootstrapServer); + return AdminClient.create(properties); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosIntegrationTestSuiteBase.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosIntegrationTestSuiteBase.java new file mode 100644 index 000000000000..6a113f45b037 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosIntegrationTestSuiteBase.java @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.KafkaContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerImageName; +import org.testng.annotations.AfterSuite; +import org.testng.annotations.BeforeSuite; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.Arrays; +import java.util.stream.Stream; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.testng.AssertJUnit.fail; + +public class KafkaCosmosIntegrationTestSuiteBase extends KafkaCosmosTestSuiteBase { + private static final Logger logger = LoggerFactory.getLogger(KafkaCosmosIntegrationTestSuiteBase.class); + private static final Duration DEFAULT_CONTAINER_START_UP_TIMEOUT = Duration.ofMinutes(5); + + protected static Network network; + protected static KafkaContainer kafkaContainer; + protected static KafkaCosmosConnectContainer kafkaCosmosConnectContainer; + + @BeforeSuite(groups = { "kafka-integration" }, timeOut = 10 * SUITE_SETUP_TIMEOUT) + public static void beforeIntegrationSuite() throws IOException, InterruptedException { + + logger.info("beforeIntegrationSuite Started"); + // initialize the kafka, kafka-connect containers + setupDockerContainers(); + } + + @AfterSuite(groups = { "kafka-integration" }, timeOut = 10 * SUITE_SETUP_TIMEOUT) + public static void afterIntegrationSuite() { + + logger.info("afterIntegrationSuite Started"); + // The TestContainers library will automatically clean up resources by using Ryuk sidecar container + } + + private static void setupDockerContainers() throws IOException, InterruptedException { + createConnectorJar(); + + logger.info("Setting up docker containers..."); + + network = Network.newNetwork(); + if (StringUtils.isEmpty(KafkaCosmosTestConfigurations.BOOTSTRAP_SERVER)) { + setupDockerContainersForLocal(); + } else { + setupDockerContainersForCloud(); + } + } + + private static void setupDockerContainersForLocal() { + logger.info("Setting up local docker containers..."); + network = Network.newNetwork(); + kafkaContainer = new KafkaContainer(getDockerImageName("confluentinc/cp-kafka:")) + .withNetwork(network) + .withNetworkAliases("broker") + .withStartupTimeout(DEFAULT_CONTAINER_START_UP_TIMEOUT); + + kafkaCosmosConnectContainer = new KafkaCosmosConnectContainer(getDockerImageName("confluentinc/cp-kafka-connect:")) + .withNetwork(network) + .dependsOn(kafkaContainer) + .withLocalKafkaContainer(kafkaContainer) + .withStartupTimeout(DEFAULT_CONTAINER_START_UP_TIMEOUT) + .withFileSystemBind("src/test/connectorPlugins", "/kafka/connect/cosmos-connector") + .withLogConsumer(new Slf4jLogConsumer(logger)); + + Startables.deepStart(Stream.of(kafkaContainer, kafkaCosmosConnectContainer)).join(); + + // the mapped bootstrap server port can only be obtained after the container started + kafkaCosmosConnectContainer.withLocalBootstrapServer(kafkaContainer.getBootstrapServers()); + } + + private static void setupDockerContainersForCloud() { + logger.info("Setting up docker containers with self-managed cloud clusters..."); + kafkaCosmosConnectContainer = new KafkaCosmosConnectContainer(getDockerImageName("confluentinc/cp-kafka-connect:")) + .withCloudKafkaContainer() + .withStartupTimeout(DEFAULT_CONTAINER_START_UP_TIMEOUT) + .withFileSystemBind("src/test/connectorPlugins", "/kafka/connect/cosmos-connector") + .withLogConsumer(new Slf4jLogConsumer(logger)); + + Startables.deepStart(Stream.of(kafkaCosmosConnectContainer)).join(); + + kafkaCosmosConnectContainer.withCloudBootstrapServer(); + } + + private static void createConnectorJar() throws IOException, InterruptedException { + logger.info("Start creating connector jars..."); + + boolean isWindows = System.getProperty("os.name").startsWith("windows"); + Path connectorPluginsPath = Paths.get("src/test/connectorPlugins"); + + ProcessBuilder processBuilder; + if (isWindows) { + String buildScriptPath = connectorPluginsPath + "/build.ps1"; + processBuilder = new ProcessBuilder("powershell.exe", buildScriptPath); + } else { + String buildScriptPath = connectorPluginsPath + "/build.sh"; + processBuilder = new ProcessBuilder("/bin/bash", buildScriptPath); + } + + processBuilder.redirectErrorStream(true); + Process process = processBuilder.start(); + + BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); + String line; + while ((line = reader.readLine()) != null) { + logger.info(line); + System.out.println(line); + } + + // Wait for the script to complete + int exitCode = process.waitFor(); + if (exitCode == 0) { + logger.info("Build script completed successfully"); + //validate the jar exists + File jarFile = findFile("src/test/connectorPlugins/connectors", "jar-with-dependencies.jar"); + + assertThat(jarFile).isNotNull(); + assertThat(jarFile.exists()).isTrue(); + + } else { + fail("Build script failed with error code " + exitCode); + } + } + + private static File findFile(String folder, String filenameFilterEndsWith) { + File file = new File(folder); + if (!file.exists() || !file.isDirectory()) { + return null; + } + return Arrays.stream(file.listFiles()) + .filter(f -> f.getName().endsWith(filenameFilterEndsWith)) + .findFirst().orElse(null); + } + + private static DockerImageName getDockerImageName(String prefix) { + return DockerImageName.parse(prefix + KafkaCosmosTestConfigurations.CONFLUENT_VERSION); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosReflectionUtils.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosReflectionUtils.java new file mode 100644 index 000000000000..e15f33693460 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosReflectionUtils.java @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceConfig; +import com.azure.cosmos.kafka.connect.implementation.source.CosmosSourceOffsetStorageReader; +import com.azure.cosmos.kafka.connect.implementation.source.MetadataMonitorThread; +import org.apache.commons.lang3.reflect.FieldUtils; +import org.apache.kafka.connect.storage.OffsetStorageReader; + +public class KafkaCosmosReflectionUtils { + private static void set(Object object, T newValue, String fieldName) { + try { + FieldUtils.writeField(object, fieldName, newValue, true); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + @SuppressWarnings("unchecked") + private static T get(Object object, String fieldName) { + try { + return (T) FieldUtils.readField(object, fieldName, true); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public static void setCosmosClient(CosmosDBSourceConnector sourceConnector, CosmosAsyncClient cosmosAsyncClient) { + set(sourceConnector, cosmosAsyncClient,"cosmosClient"); + } + + public static void setCosmosSourceConfig(CosmosDBSourceConnector sourceConnector, CosmosSourceConfig sourceConfig) { + set(sourceConnector, sourceConfig,"config"); + } + + public static void setOffsetStorageReader( + CosmosDBSourceConnector sourceConnector, + CosmosSourceOffsetStorageReader storageReader) { + set(sourceConnector, storageReader,"offsetStorageReader"); + } + + public static void setMetadataMonitorThread( + CosmosDBSourceConnector sourceConnector, + MetadataMonitorThread metadataMonitorThread) { + set(sourceConnector, metadataMonitorThread,"monitorThread"); + } + + public static CosmosAsyncClient getCosmosClient(CosmosDBSourceConnector sourceConnector) { + return get(sourceConnector,"cosmosClient"); + } + + public static CosmosSourceOffsetStorageReader getSourceOffsetStorageReader(CosmosDBSourceConnector sourceConnector) { + return get(sourceConnector,"offsetStorageReader"); + } + + public static OffsetStorageReader getOffsetStorageReader(CosmosSourceOffsetStorageReader sourceOffsetStorageReader) { + return get(sourceOffsetStorageReader,"offsetStorageReader"); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestConfigurations.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestConfigurations.java new file mode 100644 index 000000000000..f26d86ce1728 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestConfigurations.java @@ -0,0 +1,208 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.guava25.base.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.InputStream; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Properties; + +public class KafkaCosmosTestConfigurations { + private static final Logger logger = LoggerFactory.getLogger(KafkaCosmosTestConfigurations.class); + private static Properties properties = loadProperties(); + + private static final String COSMOS_EMULATOR_KEY = "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw=="; + private static final String COSMOS_EMULATOR_HOST = "https://localhost:8081/"; + public static final String DEFAULT_CONFLUENT_VERSION = "7.6.0"; //https://docs.confluent.io/platform/current/installation/versions-interoperability.html + public static final String DEFAULT_CONNECT_GROUP_ID = "1"; + public static final String DEFAULT_CONNECT_CONFIG_STORAGE_TOPIC = "docker-connect-configs"; + public static final String DEFAULT_CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR = "1"; + public static final String DEFAULT_CONNECT_OFFSET_STORAGE_TOPIC = "docker-connect-offsets"; + public static final String DEFAULT_CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR = "1"; + public static final String DEFAULT_CONNECT_STATUS_STORAGE_TOPIC = "docker-connect-status"; + public static final String DEFAULT_CONNECT_STATUS_STORAGE_REPLICATION_FACTOR = "1"; + public static final String DEFAULT_CONNECT_KEY_CONVERTER = "org.apache.kafka.connect.storage.StringConverter"; + public static final String DEFAULT_CONNECT_VALUE_CONVERTER = "org.apache.kafka.connect.json.JsonConverter"; + public static final String DEFAULT_CONNECT_PLUGIN_PATH = "/kafka/connect/cosmos-connector"; + public static final String DEFAULT_CONNECT_REST_ADVERTISED_HOST_NAME = "connect"; + + public final static String MASTER_KEY = + properties + .getProperty( + "ACCOUNT_KEY", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("ACCOUNT_KEY")), COSMOS_EMULATOR_KEY)); + + public final static String SECONDARY_MASTER_KEY = + properties + .getProperty( + "SECONDARY_ACCOUNT_KEY", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("SECONDARY_ACCOUNT_KEY")), COSMOS_EMULATOR_KEY)); + + public final static String HOST = + properties + .getProperty( + "ACCOUNT_HOST", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("ACCOUNT_HOST")), COSMOS_EMULATOR_HOST)); + + public final static String KAFKA_CLUSTER_KEY = + properties + .getProperty( + "KAFKA_CLUSTER_KEY", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("KAFKA_CLUSTER_KEY")), "")); + + public final static String KAFKA_CLUSTER_SECRET = + properties + .getProperty( + "KAFKA_CLUSTER_SECRET", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("KAFKA_CLUSTER_SECRET")), "")); + + public final static String SCHEMA_REGISTRY_KEY = + properties + .getProperty( + "SCHEMA_REGISTRY_KEY", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("SCHEMA_REGISTRY_KEY")), "")); + + public final static String SCHEMA_REGISTRY_SECRET = + properties + .getProperty( + "SCHEMA_REGISTRY_SECRET", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("SCHEMA_REGISTRY_SECRET")), "")); + + public final static String SCHEMA_REGISTRY_URL = + properties + .getProperty( + "SCHEMA_REGISTRY_URL", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("SCHEMA_REGISTRY_URL")), "")); + + public final static String BOOTSTRAP_SERVER = + properties + .getProperty( + "BOOTSTRAP_SERVER", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("BOOTSTRAP_SERVER")), "")); + + public final static String SASL_JAAS = + properties + .getProperty( + "SASL_JAAS", + StringUtils.defaultString( + Strings.emptyToNull(System.getenv().get("SASL_JAAS")), "")); + + public final static String CONFLUENT_VERSION = + properties + .getProperty( + "CONFLUENT_VERSION", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONFLUENT_VERSION")), DEFAULT_CONFLUENT_VERSION)); + + public final static String CONNECT_GROUP_ID = + properties + .getProperty( + "CONNECT_GROUP_ID", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_GROUP_ID")), DEFAULT_CONNECT_GROUP_ID)); + + public final static String CONNECT_CONFIG_STORAGE_TOPIC = + properties + .getProperty( + "CONNECT_CONFIG_STORAGE_TOPIC", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_CONFIG_STORAGE_TOPIC")), DEFAULT_CONNECT_CONFIG_STORAGE_TOPIC)); + + public final static String CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR = + properties + .getProperty( + "CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR")), DEFAULT_CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR)); + + public final static String CONNECT_OFFSET_STORAGE_TOPIC = + properties + .getProperty( + "CONNECT_OFFSET_STORAGE_TOPIC", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_OFFSET_STORAGE_TOPIC")), DEFAULT_CONNECT_OFFSET_STORAGE_TOPIC)); + + public final static String CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR = + properties + .getProperty( + "CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR")), DEFAULT_CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR)); + + public final static String CONNECT_STATUS_STORAGE_TOPIC = + properties + .getProperty( + "CONNECT_STATUS_STORAGE_TOPIC", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_STATUS_STORAGE_TOPIC")), DEFAULT_CONNECT_STATUS_STORAGE_TOPIC)); + + public final static String CONNECT_STATUS_STORAGE_REPLICATION_FACTOR = + properties + .getProperty( + "CONNECT_STATUS_STORAGE_REPLICATION_FACTOR", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_STATUS_STORAGE_REPLICATION_FACTOR")), DEFAULT_CONNECT_STATUS_STORAGE_REPLICATION_FACTOR)); + + public final static String CONNECT_KEY_CONVERTER = + properties + .getProperty( + "CONNECT_KEY_CONVERTER", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_KEY_CONVERTER")), DEFAULT_CONNECT_KEY_CONVERTER)); + + public final static String CONNECT_VALUE_CONVERTER = + properties + .getProperty( + "CONNECT_VALUE_CONVERTER", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_VALUE_CONVERTER")), DEFAULT_CONNECT_VALUE_CONVERTER)); + + public final static String CONNECT_PLUGIN_PATH = + properties + .getProperty( + "CONNECT_PLUGIN_PATH", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_PLUGIN_PATH")), DEFAULT_CONNECT_PLUGIN_PATH)); + + public final static String CONNECT_REST_ADVERTISED_HOST_NAME = + properties + .getProperty( + "CONNECT_REST_ADVERTISED_HOST_NAME", + StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("CONNECT_REST_ADVERTISED_HOST_NAME")), DEFAULT_CONNECT_REST_ADVERTISED_HOST_NAME)); + + private static Properties loadProperties() { + Path root = FileSystems.getDefault().getPath("").toAbsolutePath(); + Path propertiesInProject = Paths.get(root.toString(),"../kafka-cosmos-local.properties"); + + Properties props = loadFromPathIfExists(propertiesInProject); + if (props != null) { + return props; + } + + Path propertiesInUserHome = Paths.get(System.getProperty("user.home"), "kafka-cosmos-local.properties"); + props = loadFromPathIfExists(propertiesInUserHome); + if (props != null) { + return props; + } + + return System.getProperties(); + } + + private static Properties loadFromPathIfExists(Path propertiesFilePath) { + if (Files.exists(propertiesFilePath)) { + try (InputStream in = Files.newInputStream(propertiesFilePath)) { + Properties props = new Properties(); + props.load(in); + logger.info("properties loaded from {}", propertiesFilePath); + return props; + } catch (Exception e) { + logger.error("Loading properties {} failed", propertiesFilePath, e); + } + } + return null; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestNGLogListener.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestNGLogListener.java new file mode 100644 index 000000000000..0d4366c9d729 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestNGLogListener.java @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.IInvokedMethod; +import org.testng.IInvokedMethodListener; +import org.testng.ITestResult; +import org.testng.SkipException; + +public class KafkaCosmosTestNGLogListener implements IInvokedMethodListener { + private final Logger logger = LoggerFactory.getLogger(KafkaCosmosTestNGLogListener.class); + + @Override + public void beforeInvocation(IInvokedMethod iInvokedMethod, ITestResult iTestResult) { + logger.info("beforeInvocation: {}", methodName(iInvokedMethod)); + } + + @Override + public void afterInvocation(IInvokedMethod iInvokedMethod, ITestResult iTestResult) { + logger.info("afterInvocation: {}, total time {}ms, result {}", + methodName(iInvokedMethod), + iTestResult.getEndMillis() - iTestResult.getStartMillis(), + resultDetails(iTestResult) + ); + } + + private String resultDetails(ITestResult iTestResult) { + if (iTestResult.isSuccess()) { + return "success"; + } + + if (iTestResult.getThrowable() instanceof SkipException) { + return "skipped. reason: " + failureDetails(iTestResult); + } + + return "failed. reason: " + failureDetails(iTestResult); + } + + private String failureDetails(ITestResult iTestResult) { + if (iTestResult.isSuccess()) { + return null; + } + + if (iTestResult.getThrowable() == null) { + logger.error("throwable is null"); + return null; + } + + return iTestResult.getThrowable().getClass().getName() + ": " + iTestResult.getThrowable().getMessage(); + } + + private String methodName(IInvokedMethod iInvokedMethod) { + return iInvokedMethod.getTestMethod().getRealClass().getSimpleName() + "#" + iInvokedMethod.getTestMethod().getMethodName(); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestSuiteBase.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestSuiteBase.java new file mode 100644 index 000000000000..ac52e93574e0 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/KafkaCosmosTestSuiteBase.java @@ -0,0 +1,250 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import com.azure.core.credential.AzureKeyCredential; +import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosAsyncDatabase; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.GatewayConnectionConfig; +import com.azure.cosmos.ThrottlingRetryOptions; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.CosmosContainerRequestOptions; +import com.azure.cosmos.models.CosmosItemRequestOptions; +import com.azure.cosmos.models.IncludedPath; +import com.azure.cosmos.models.IndexingPolicy; +import com.azure.cosmos.models.PartitionKeyDefinition; +import com.azure.cosmos.models.ThroughputProperties; +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.shaded.com.google.common.base.Strings; +import org.testng.ITest; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.AfterSuite; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Listeners; + +import java.lang.reflect.Method; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +@Listeners({KafkaCosmosTestNGLogListener.class}) +public class KafkaCosmosTestSuiteBase implements ITest { + protected static Logger logger = LoggerFactory.getLogger(KafkaCosmosTestSuiteBase.class.getSimpleName()); + protected static final int TIMEOUT = 60000; + + protected static final int SUITE_SETUP_TIMEOUT = 120000; + protected static final int SUITE_SHUTDOWN_TIMEOUT = 60000; + + protected static final AzureKeyCredential credential; + protected static String databaseName; + protected static String multiPartitionContainerWithIdAsPartitionKeyName; + protected static String multiPartitionContainerName; + protected static String singlePartitionContainerName; + private String testName; + + protected static CosmosAsyncDatabase getDatabase(CosmosAsyncClient client) { + return client.getDatabase(databaseName); + } + + protected static CosmosContainerProperties getMultiPartitionContainerWithIdAsPartitionKey(CosmosAsyncClient client) { + return client + .getDatabase(databaseName) + .getContainer(multiPartitionContainerWithIdAsPartitionKeyName) + .read() + .block() + .getProperties(); + } + + protected static CosmosContainerProperties getMultiPartitionContainer(CosmosAsyncClient client) { + return client + .getDatabase(databaseName) + .getContainer(multiPartitionContainerName) + .read() + .block() + .getProperties(); + } + + protected static CosmosContainerProperties getSinglePartitionContainer(CosmosAsyncClient client) { + return client + .getDatabase(databaseName) + .getContainer(singlePartitionContainerName) + .read() + .block() + .getProperties(); + } + + static { + credential = new AzureKeyCredential(KafkaCosmosTestConfigurations.MASTER_KEY); + } + + @BeforeSuite(groups = { "kafka" }, timeOut = SUITE_SETUP_TIMEOUT) + public static void beforeSuite() { + + logger.info("beforeSuite Started"); + try (CosmosAsyncClient houseKeepingClient = createGatewayHouseKeepingDocumentClient(true).buildAsyncClient()) { + databaseName = createDatabase(houseKeepingClient); + + CosmosContainerRequestOptions options = new CosmosContainerRequestOptions(); + multiPartitionContainerName = + createCollection( + houseKeepingClient, + databaseName, + getCollectionDefinitionWithRangeRangeIndex(), + options, + 10100); + multiPartitionContainerWithIdAsPartitionKeyName = + createCollection( + houseKeepingClient, + databaseName, + getCollectionDefinitionWithRangeRangeIndexWithIdAsPartitionKey(), + options, + 10100); + singlePartitionContainerName = + createCollection( + houseKeepingClient, + databaseName, + getCollectionDefinitionWithRangeRangeIndex(), + options, + 6000); + } + } + + @AfterSuite(groups = { "kafka" }, timeOut = SUITE_SHUTDOWN_TIMEOUT) + public static void afterSuite() { + + logger.info("afterSuite Started"); + + try (CosmosAsyncClient houseKeepingClient = createGatewayHouseKeepingDocumentClient(true).buildAsyncClient()) { + safeDeleteDatabase(houseKeepingClient, databaseName); + } + } + + + static protected CosmosClientBuilder createGatewayHouseKeepingDocumentClient(boolean contentResponseOnWriteEnabled) { + ThrottlingRetryOptions options = new ThrottlingRetryOptions(); + options.setMaxRetryWaitTime(Duration.ofSeconds(SUITE_SETUP_TIMEOUT)); + GatewayConnectionConfig gatewayConnectionConfig = new GatewayConnectionConfig(); + return new CosmosClientBuilder().endpoint(KafkaCosmosTestConfigurations.HOST) + .credential(credential) + .gatewayMode(gatewayConnectionConfig) + .throttlingRetryOptions(options) + .contentResponseOnWriteEnabled(contentResponseOnWriteEnabled) + .consistencyLevel(ConsistencyLevel.SESSION); + } + + private static String createDatabase(CosmosAsyncClient cosmosAsyncClient) { + String databaseName = "KafkaCosmosTest-" + UUID.randomUUID(); + cosmosAsyncClient.createDatabase(databaseName).block(); + + return databaseName; + } + + private static String createCollection( + CosmosAsyncClient cosmosAsyncClient, + String database, + CosmosContainerProperties cosmosContainerProperties, + CosmosContainerRequestOptions options, + int throughput) { + + cosmosAsyncClient + .getDatabase(database) + .createContainer( + cosmosContainerProperties, + ThroughputProperties.createManualThroughput(throughput), + options) + .block(); + + // Creating a container is async - especially on multi-partition or multi-region accounts + boolean isMultiRegional = ImplementationBridgeHelpers + .CosmosAsyncClientHelper + .getCosmosAsyncClientAccessor() + .getPreferredRegions(cosmosAsyncClient).size() > 1; + + if (throughput > 6000 || isMultiRegional) { + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + return cosmosContainerProperties.getId(); + } + + static protected CosmosContainerProperties getCollectionDefinitionWithRangeRangeIndex() { + return getCollectionDefinitionWithRangeRangeIndex(Collections.singletonList("/mypk")); + } + + static protected CosmosContainerProperties getCollectionDefinitionWithRangeRangeIndexWithIdAsPartitionKey() { + return getCollectionDefinitionWithRangeRangeIndex(Collections.singletonList("/id")); + } + + public static void cleanUpContainer(CosmosAsyncClient client, String databaseName, String containerName) { + CosmosAsyncContainer container = client.getDatabase(databaseName).getContainer(containerName); + List allItems = + container.queryItems("select * from c", JsonNode.class) + .byPage() + .flatMapIterable(feedResponse -> feedResponse.getResults()) + .collectList() + .block(); + + // do a batch delete + for (JsonNode item : allItems) { + container.deleteItem(item, new CosmosItemRequestOptions()).block(); + } + } + + static protected CosmosContainerProperties getCollectionDefinitionWithRangeRangeIndex(List partitionKeyPath) { + PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition(); + + partitionKeyDef.setPaths(partitionKeyPath); + IndexingPolicy indexingPolicy = new IndexingPolicy(); + List includedPaths = new ArrayList<>(); + IncludedPath includedPath = new IncludedPath("/*"); + includedPaths.add(includedPath); + indexingPolicy.setIncludedPaths(includedPaths); + + CosmosContainerProperties cosmosContainerProperties = new CosmosContainerProperties(UUID.randomUUID().toString(), partitionKeyDef); + cosmosContainerProperties.setIndexingPolicy(indexingPolicy); + + return cosmosContainerProperties; + } + + private static void safeDeleteDatabase(CosmosAsyncClient client, String database) { + if (StringUtils.isNotEmpty(database)) { + try { + client.getDatabase(database).delete().block(); + } catch (Exception e) { + logger.error("Failed to delete database {}", database, e); + } + } + } + + @BeforeMethod(alwaysRun = true) + public final void setTestName(Method method, Object[] row) { + this.testName = Strings.lenientFormat("%s::%s", + method.getDeclaringClass().getSimpleName(), + method.getName()); + } + + @AfterMethod(alwaysRun = true) + public final void unsetTestName() { + this.testName = null; + } + + @Override + public String getTestName() { + return this.testName; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/TestItem.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/TestItem.java new file mode 100644 index 000000000000..669dd67d9349 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/TestItem.java @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect; + +import java.util.UUID; + +public class TestItem { + private String id; + private String mypk; + private String prop; + + public TestItem() { + } + + public TestItem(String id, String mypk, String prop) { + this.id = id; + this.mypk = mypk; + this.prop = prop; + } + + public static TestItem createNewItem() { + return new TestItem(UUID.randomUUID().toString(), UUID.randomUUID().toString(), UUID.randomUUID().toString()); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getMypk() { + return mypk; + } + + public void setMypk(String mypk) { + this.mypk = mypk; + } + + public String getProp() { + return prop; + } + + public void setProp(String prop) { + this.prop = prop; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTaskTest.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTaskTest.java new file mode 100644 index 000000000000..cfa257a64465 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/implementation/source/CosmosSourceTaskTest.java @@ -0,0 +1,256 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.implementation.TestConfigurations; +import com.azure.cosmos.kafka.connect.KafkaCosmosTestSuiteBase; +import com.azure.cosmos.kafka.connect.TestItem; +import com.azure.cosmos.kafka.connect.implementation.CosmosClientStore; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.ThroughputProperties; +import com.azure.cosmos.models.ThroughputResponse; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.source.SourceRecord; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +public class CosmosSourceTaskTest extends KafkaCosmosTestSuiteBase { + private final int CONTAINER_THROUGHPUT_FOR_SPLIT = 10100; + + @Test(groups = {"kafka"}, timeOut = 10 * TIMEOUT) + public void poll() throws InterruptedException { + String testContainerName = "KafkaCosmosTestPoll-" + UUID.randomUUID(); + Map sourceConfigMap = new HashMap<>(); + sourceConfigMap.put("kafka.connect.cosmos.accountEndpoint", TestConfigurations.HOST); + sourceConfigMap.put("kafka.connect.cosmos.accountKey", TestConfigurations.MASTER_KEY); + sourceConfigMap.put("kafka.connect.cosmos.source.database.name", databaseName); + List containersIncludedList = Arrays.asList(testContainerName); + sourceConfigMap.put("kafka.connect.cosmos.source.containers.includedList", containersIncludedList.toString()); + + CosmosSourceConfig sourceConfig = new CosmosSourceConfig(sourceConfigMap); + CosmosAsyncClient client = CosmosClientStore.getCosmosClient(sourceConfig.getAccountConfig()); + + // create a new container as we are going to trigger split as well, isolate the possible impact for other tests + CosmosContainerProperties testContainer = + client + .getDatabase(databaseName) + .createContainer(testContainerName, "/id") + .block() + .getProperties(); + + try { + Map taskConfigMap = sourceConfig.originalsStrings(); + + // define metadata task + List feedRanges = + client.getDatabase(databaseName).getContainer(testContainerName).getFeedRanges().block(); + assertThat(feedRanges.size()).isEqualTo(1); + + Map> containersEffectiveRangesMap = new HashMap<>(); + containersEffectiveRangesMap.put(testContainer.getResourceId(), Arrays.asList(FeedRange.forFullRange())); + MetadataTaskUnit metadataTaskUnit = new MetadataTaskUnit( + databaseName, + Arrays.asList(testContainer.getResourceId()), + containersEffectiveRangesMap, + testContainerName); + taskConfigMap.putAll(CosmosSourceTaskConfig.getMetadataTaskUnitConfigMap(metadataTaskUnit)); + + // define feedRanges task + FeedRangeTaskUnit feedRangeTaskUnit = new FeedRangeTaskUnit( + databaseName, + testContainerName, + testContainer.getResourceId(), + FeedRange.forFullRange(), + null, + testContainerName); + taskConfigMap.putAll(CosmosSourceTaskConfig.getFeedRangeTaskUnitsConfigMap(Arrays.asList(feedRangeTaskUnit))); + + CosmosSourceTask sourceTask = new CosmosSourceTask(); + sourceTask.start(taskConfigMap); + + // first creating few items in the container + List createdItems = this.createItems(client, databaseName, testContainerName, 10); + + List sourceRecords = sourceTask.poll(); + // Since there are metadata task unit being defined, we expected to get the metadata records first. + validateMetadataRecords(sourceRecords, metadataTaskUnit); + + sourceRecords = sourceTask.poll(); + validateFeedRangeRecords(sourceRecords, createdItems); + + logger.info("Testing split..."); + // trigger split + ThroughputResponse throughputResponse = + client + .getDatabase(databaseName) + .getContainer(testContainerName) + .replaceThroughput(ThroughputProperties.createManualThroughput(CONTAINER_THROUGHPUT_FOR_SPLIT)) + .block(); + + // Wait for the throughput update to complete so that we get the partition split + while (true) { + assert throughputResponse != null; + if (!throughputResponse.isReplacePending()) { + break; + } + logger.info("Waiting for split to complete"); + Thread.sleep(10 * 1000); + throughputResponse = client.getDatabase(databaseName).getContainer(testContainerName).readThroughput().block(); + } + + createdItems = this.createItems(client, databaseName, testContainerName, 10); + sourceRecords = new ArrayList<>(); + // the first poll will return 0 records as it will be the first time the task detect split happened + // internally it will create two new feedRange task units + // so here we will need to poll 3 times to get all newly created items + for (int i = 0; i < 3; i++) { + sourceRecords.addAll(sourceTask.poll()); + } + validateFeedRangeRecords(sourceRecords, createdItems); + } finally { + if (client != null) { + client.getDatabase(databaseName).getContainer(testContainerName).delete().block(); + client.close(); + } + } + } + + @Test(groups = { "kafka" }, timeOut = TIMEOUT) + public void pollWithSpecificFeedRange() { + // Test only items belong to the feedRange defined in the feedRangeTaskUnit will be returned + Map sourceConfigMap = new HashMap<>(); + sourceConfigMap.put("kafka.connect.cosmos.accountEndpoint", TestConfigurations.HOST); + sourceConfigMap.put("kafka.connect.cosmos.accountKey", TestConfigurations.MASTER_KEY); + sourceConfigMap.put("kafka.connect.cosmos.source.database.name", databaseName); + List containersIncludedList = Arrays.asList(multiPartitionContainerName); + sourceConfigMap.put("kafka.connect.cosmos.source.containers.includedList", containersIncludedList.toString()); + + CosmosSourceConfig sourceConfig = new CosmosSourceConfig(sourceConfigMap); + CosmosAsyncClient client = CosmosClientStore.getCosmosClient(sourceConfig.getAccountConfig()); + + try { + Map taskConfigMap = sourceConfig.originalsStrings(); + + // define metadata task + List feedRanges = + client.getDatabase(databaseName).getContainer(multiPartitionContainerName).getFeedRanges().block(); + CosmosContainerProperties multiPartitionContainer = getMultiPartitionContainer(client); + assertThat(feedRanges.size()).isGreaterThan(1); + + // define feedRanges task + FeedRangeTaskUnit feedRangeTaskUnit = new FeedRangeTaskUnit( + databaseName, + multiPartitionContainer.getId(), + multiPartitionContainer.getResourceId(), + feedRanges.get(0), + null, + multiPartitionContainer.getId()); + taskConfigMap.putAll(CosmosSourceTaskConfig.getFeedRangeTaskUnitsConfigMap(Arrays.asList(feedRangeTaskUnit))); + + CosmosSourceTask sourceTask = new CosmosSourceTask(); + sourceTask.start(taskConfigMap); + + // first creating few items in the container + this.createItems(client, databaseName, multiPartitionContainer.getId(), 10); + + List sourceRecords = new ArrayList<>(); + for (int i = 0; i < 3; i++) { // poll few times + sourceRecords.addAll(sourceTask.poll()); + } + + // get all items belong to feed range 0 + CosmosQueryRequestOptions queryRequestOptions = new CosmosQueryRequestOptions(); + queryRequestOptions.setFeedRange(feedRanges.get(0)); + List expectedItems = client + .getDatabase(databaseName) + .getContainer(multiPartitionContainer.getId()) + .queryItems("select * from c", queryRequestOptions, TestItem.class) + .byPage() + .flatMapIterable(feedResponse -> feedResponse.getResults()) + .collectList() + .block(); + + validateFeedRangeRecords(sourceRecords, expectedItems); + } finally { + if (client != null) { + // clean up containers + cleanUpContainer(client, databaseName, multiPartitionContainerName); + client.close(); + } + } + } + + private void validateMetadataRecords(List sourceRecords, MetadataTaskUnit metadataTaskUnit) { + // one containers metadata + // one feedRanges metadata record for each container + assertThat(sourceRecords.size()).isEqualTo(metadataTaskUnit.getContainerRids().size() + 1); + + ContainersMetadataTopicPartition containersMetadataTopicPartition = + new ContainersMetadataTopicPartition(metadataTaskUnit.getDatabaseName()); + ContainersMetadataTopicOffset containersMetadataTopicOffset = + new ContainersMetadataTopicOffset(metadataTaskUnit.getContainerRids()); + assertThat(sourceRecords.get(0).sourcePartition()).isEqualTo(ContainersMetadataTopicPartition.toMap(containersMetadataTopicPartition)); + assertThat(sourceRecords.get(0).sourceOffset()).isEqualTo(ContainersMetadataTopicOffset.toMap(containersMetadataTopicOffset)); + + for (int i = 0; i < metadataTaskUnit.getContainerRids().size(); i++) { + String containerRid = metadataTaskUnit.getContainerRids().get(i); + SourceRecord sourceRecord = sourceRecords.get(i + 1); + List containerFeedRanges = + metadataTaskUnit.getContainersEffectiveRangesMap().get(containerRid); + assertThat(containerFeedRanges).isNotNull(); + + FeedRangesMetadataTopicPartition feedRangesMetadataTopicPartition = + new FeedRangesMetadataTopicPartition(metadataTaskUnit.getDatabaseName(), containerRid); + FeedRangesMetadataTopicOffset feedRangesMetadataTopicOffset = + new FeedRangesMetadataTopicOffset(containerFeedRanges); + assertThat(sourceRecord.sourcePartition()).isEqualTo(FeedRangesMetadataTopicPartition.toMap(feedRangesMetadataTopicPartition)); + assertThat(sourceRecord.sourceOffset()).isEqualTo(FeedRangesMetadataTopicOffset.toMap(feedRangesMetadataTopicOffset)); + } + } + + private void validateFeedRangeRecords(List sourceRecords, List expectedItems) { + List idsReceived = + sourceRecords + .stream() + .map(sourceRecord -> ((Struct)sourceRecord.value()).get("id").toString()) + .collect(Collectors.toList()); + List expectedIds = + expectedItems + .stream() + .map(testItem -> testItem.getId()) + .collect(Collectors.toList()); + assertThat(idsReceived.size()).isEqualTo(expectedItems.size()); + assertThat(idsReceived.containsAll(expectedIds)); + } + + private List createItems( + CosmosAsyncClient client, + String databaseName, + String containerName, + int numberOfItems) { + + List testItems = new ArrayList<>(); + CosmosAsyncContainer container = client.getDatabase(databaseName).getContainer(containerName); + for (int i = 0; i < numberOfItems; i++) { + TestItem testItem = TestItem.createNewItem(); + container.createItem(testItem).block(); + testItems.add(testItem); + } + + return testItems; + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataMonitorThreadTest.java b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataMonitorThreadTest.java new file mode 100644 index 000000000000..6054489e91fb --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/java/com/azure/cosmos/kafka/connect/implementation/source/MetadataMonitorThreadTest.java @@ -0,0 +1,253 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.kafka.connect.implementation.source; + +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.TestConfigurations; +import com.azure.cosmos.kafka.connect.InMemoryStorageReader; +import com.azure.cosmos.kafka.connect.KafkaCosmosTestSuiteBase; +import com.azure.cosmos.kafka.connect.implementation.CosmosAccountConfig; +import com.azure.cosmos.kafka.connect.implementation.CosmosClientStore; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.FeedRange; +import org.apache.kafka.connect.source.SourceConnectorContext; +import org.mockito.Mockito; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +public class MetadataMonitorThreadTest extends KafkaCosmosTestSuiteBase { + private CosmosAsyncClient client; + @BeforeClass(groups = {"kafka"}, timeOut = TIMEOUT) + public void before_MetadataMonitorThreadTest() { + CosmosAccountConfig accountConfig = new CosmosAccountConfig( + TestConfigurations.HOST, + TestConfigurations.MASTER_KEY, + "requestTaskReconfigurationTest", + false, + new ArrayList()); + this.client = CosmosClientStore.getCosmosClient(accountConfig); + } + + @AfterClass(groups = {"kafka"}, timeOut = TIMEOUT) + public void after_MetadataMonitorThreadTest() { + if (this.client != null) { + this.client.close(); + } + } + + @Test(groups = "{ kafka }", timeOut = TIMEOUT) + public void requestTaskReconfigurationOnContainersChange() throws InterruptedException { + + CosmosSourceContainersConfig cosmosSourceContainersConfig = + new CosmosSourceContainersConfig( + databaseName, + true, + new ArrayList(), + new ArrayList()); + CosmosMetadataConfig metadataConfig = + new CosmosMetadataConfig(500, "_cosmos.metadata.topic"); + SourceConnectorContext sourceConnectorContext = Mockito.mock(SourceConnectorContext.class); + InMemoryStorageReader inMemoryStorageReader = new InMemoryStorageReader(); + CosmosSourceOffsetStorageReader sourceOffsetStorageReader = new CosmosSourceOffsetStorageReader(inMemoryStorageReader); + + MetadataMonitorThread monitorThread = + new MetadataMonitorThread( + cosmosSourceContainersConfig, + metadataConfig, + sourceConnectorContext, + sourceOffsetStorageReader, + this.client); + + monitorThread.run(); + + Thread.sleep(1000); + // Since there is no offset yet, no requestTaskReconfiguration will happen + Mockito.verify(sourceConnectorContext, Mockito.never()).requestTaskReconfiguration(); + + // now populate containers metadata offset + CosmosContainerProperties singlePartitionContainer = getSinglePartitionContainer(this.client); + ContainersMetadataTopicPartition containersMetadataTopicPartition = + new ContainersMetadataTopicPartition(databaseName); + ContainersMetadataTopicOffset containersMetadataTopicOffset = + new ContainersMetadataTopicOffset(Arrays.asList(singlePartitionContainer.getResourceId())); + + Map, Map> offsetMap = new HashMap<>(); + offsetMap.put( + ContainersMetadataTopicPartition.toMap(containersMetadataTopicPartition), + ContainersMetadataTopicOffset.toMap(containersMetadataTopicOffset)); + + inMemoryStorageReader.populateOffset(offsetMap); + + Thread.sleep(5000); // give enough time to do the containers query request + monitorThread.close(); + + Mockito.verify(sourceConnectorContext, Mockito.atLeastOnce()).requestTaskReconfiguration(); + } + + @Test(groups = "{ kafka }", timeOut = TIMEOUT) + public void requestTaskReconfigurationOnSplit() throws InterruptedException { + + CosmosSourceContainersConfig cosmosSourceContainersConfig = + new CosmosSourceContainersConfig( + databaseName, + false, + Arrays.asList(multiPartitionContainerName), + new ArrayList()); + CosmosMetadataConfig metadataConfig = + new CosmosMetadataConfig(500, "_cosmos.metadata.topic"); + SourceConnectorContext sourceConnectorContext = Mockito.mock(SourceConnectorContext.class); + + InMemoryStorageReader inMemoryStorageReader = new InMemoryStorageReader(); + CosmosSourceOffsetStorageReader sourceOffsetStorageReader = new CosmosSourceOffsetStorageReader(inMemoryStorageReader); + + //populate containers metadata offset + CosmosContainerProperties multiPartitionContainer = getMultiPartitionContainer(this.client); + ContainersMetadataTopicPartition containersMetadataTopicPartition = + new ContainersMetadataTopicPartition(databaseName); + ContainersMetadataTopicOffset containersMetadataTopicOffset = + new ContainersMetadataTopicOffset(Arrays.asList(multiPartitionContainer.getResourceId())); + Map, Map> offsetMap = new HashMap<>(); + offsetMap.put( + ContainersMetadataTopicPartition.toMap(containersMetadataTopicPartition), + ContainersMetadataTopicOffset.toMap(containersMetadataTopicOffset)); + + inMemoryStorageReader.populateOffset(offsetMap); + + MetadataMonitorThread monitorThread = + new MetadataMonitorThread( + cosmosSourceContainersConfig, + metadataConfig, + sourceConnectorContext, + sourceOffsetStorageReader, + this.client); + + monitorThread.run(); + + Thread.sleep(2000); // give some time for the query containers requests + // Since there is no offset yet, no requestTaskReconfiguration will happen + Mockito.verify(sourceConnectorContext, Mockito.never()).requestTaskReconfiguration(); + + // now populate container feedRanges metadata + List feedRanges = + this.client + .getDatabase(databaseName) + .getContainer(multiPartitionContainer.getId()) + .getFeedRanges() + .block(); + assertThat(feedRanges.size()).isGreaterThan(1); + + FeedRangesMetadataTopicPartition feedRangesMetadataTopicPartition = + new FeedRangesMetadataTopicPartition(databaseName, multiPartitionContainer.getResourceId()); + FeedRangesMetadataTopicOffset feedRangesMetadataTopicOffset = + new FeedRangesMetadataTopicOffset(Arrays.asList(FeedRange.forFullRange())); + + Map, Map> feedRangesOffSetMap = new HashMap<>(); + feedRangesOffSetMap.put( + FeedRangesMetadataTopicPartition.toMap(feedRangesMetadataTopicPartition), + FeedRangesMetadataTopicOffset.toMap(feedRangesMetadataTopicOffset)); + + inMemoryStorageReader.populateOffset(feedRangesOffSetMap); + + Thread.sleep(5000); // give enough time for the containers query and feedRanges request + monitorThread.close(); + + // for merge, no task reconfiguration is needed + Mockito.verify(sourceConnectorContext, Mockito.atLeastOnce()).requestTaskReconfiguration(); + } + + @Test(groups = "{ kafka }", timeOut = TIMEOUT) + public void requestTaskReconfigurationOnMerge() throws InterruptedException { + + CosmosSourceContainersConfig cosmosSourceContainersConfig = + new CosmosSourceContainersConfig( + databaseName, + false, + Arrays.asList(singlePartitionContainerName), + new ArrayList()); + CosmosMetadataConfig metadataConfig = + new CosmosMetadataConfig(500, "_cosmos.metadata.topic"); + SourceConnectorContext sourceConnectorContext = Mockito.mock(SourceConnectorContext.class); + + InMemoryStorageReader inMemoryStorageReader = new InMemoryStorageReader(); + CosmosSourceOffsetStorageReader sourceOffsetStorageReader = new CosmosSourceOffsetStorageReader(inMemoryStorageReader); + + //populate containers metadata offset + CosmosContainerProperties singlePartitionContainer = getSinglePartitionContainer(this.client); + ContainersMetadataTopicPartition containersMetadataTopicPartition = + new ContainersMetadataTopicPartition(databaseName); + ContainersMetadataTopicOffset containersMetadataTopicOffset = + new ContainersMetadataTopicOffset(Arrays.asList(singlePartitionContainer.getResourceId())); + Map, Map> offsetMap = new HashMap<>(); + offsetMap.put( + ContainersMetadataTopicPartition.toMap(containersMetadataTopicPartition), + ContainersMetadataTopicOffset.toMap(containersMetadataTopicOffset)); + + inMemoryStorageReader.populateOffset(offsetMap); + + MetadataMonitorThread monitorThread = + new MetadataMonitorThread( + cosmosSourceContainersConfig, + metadataConfig, + sourceConnectorContext, + sourceOffsetStorageReader, + this.client); + + monitorThread.run(); + + Thread.sleep(2000); // give some time for the query containers requests + // Since there is no offset yet, no requestTaskReconfiguration will happen + Mockito.verify(sourceConnectorContext, Mockito.never()).requestTaskReconfiguration(); + + // now populate container feedRanges metadata + List feedRanges = + this.client + .getDatabase(databaseName) + .getContainer(singlePartitionContainer.getId()) + .getFeedRanges() + .block(); + assertThat(feedRanges.size()).isEqualTo(1); + + List childRanges = + ImplementationBridgeHelpers + .CosmosAsyncContainerHelper + .getCosmosAsyncContainerAccessor() + .trySplitFeedRange( + this.client.getDatabase(databaseName).getContainer(singlePartitionContainer.getId()), + FeedRange.forFullRange(), + 2) + .block(); + + FeedRangesMetadataTopicPartition feedRangesMetadataTopicPartition = + new FeedRangesMetadataTopicPartition(databaseName, singlePartitionContainer.getResourceId()); + FeedRangesMetadataTopicOffset feedRangesMetadataTopicOffset = + new FeedRangesMetadataTopicOffset( + childRanges + .stream() + .collect(Collectors.toList())); + + Map, Map> feedRangesOffSetMap = new HashMap<>(); + feedRangesOffSetMap.put( + FeedRangesMetadataTopicPartition.toMap(feedRangesMetadataTopicPartition), + FeedRangesMetadataTopicOffset.toMap(feedRangesMetadataTopicOffset)); + + inMemoryStorageReader.populateOffset(feedRangesOffSetMap); + + Thread.sleep(5000); // give enough time for the containers query and feedRanges request + monitorThread.close(); + + // for merge, no task reconfiguration is needed + Mockito.verify(sourceConnectorContext, Mockito.never()).requestTaskReconfiguration(); + } +} diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/resources/kafka-testng.xml b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/resources/kafka-testng.xml new file mode 100644 index 000000000000..4d30f16b4df5 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/resources/kafka-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-kafka-connect/src/test/resources/log4j2.properties b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/resources/log4j2.properties new file mode 100644 index 000000000000..b68efa3afd76 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-kafka-connect/src/test/resources/log4j2.properties @@ -0,0 +1,24 @@ +# Set root logger level to INFO and its default appender to be 'STDOUT'. +rootLogger.level = info +rootLogger.appenderRef.stdout.ref = STDOUT + +# Uncomment here and lines 21 - 25 to enable logging to a file as well. +# rootLogger.appenderRef.logFile.ref = FILE + +property.logDirectory = $${sys:azure.cosmos.logger.directory} +property.hostName = $${sys:azure.cosmos.hostname} + +logger.netty.name = io.netty +logger.netty.level = off + +# STDOUT is a ConsoleAppender and uses PatternLayout. +appender.console.name = STDOUT +appender.console.type = Console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d %5X{pid} [%t] %-5p %c - %m%n + +# appender.logfile.name = FILE +# appender.logfile.type = File +# appender.logfile.filename = ${logDirectory}/azure-cosmos-benchmark.log +# appender.logfile.layout.type = PatternLayout +# appender.logfile.layout.pattern = [%d][%p][${hostName}][thread:%t][logger:%c] %m%n diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java index bc4de6cf7917..fff63f1feefd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java @@ -18,6 +18,7 @@ import com.azure.cosmos.implementation.Offer; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyHelper; +import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.Paths; import com.azure.cosmos.implementation.QueryFeedOperationState; import com.azure.cosmos.implementation.RequestOptions; @@ -71,6 +72,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -79,6 +81,7 @@ import java.util.concurrent.CompletionException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; +import java.util.stream.Collectors; import static com.azure.core.util.FluxUtil.withContext; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; @@ -2549,6 +2552,36 @@ public Mono> getFeedRanges() { return this.getFeedRanges(true); } + Mono> getOverlappingFeedRanges(FeedRange feedRange) { + checkNotNull(feedRange, "Argument 'feedRange' must not be null."); + + final AsyncDocumentClient clientWrapper = this.database.getDocClientWrapper(); + + return this.getNormalizedEffectiveRange(feedRange) + .flatMap(normalizedRange -> { + return clientWrapper + .getCollectionCache() + .resolveByNameAsync(null, this.getLinkWithoutTrailingSlash(), null) + .flatMap(collection -> { + return clientWrapper + .getPartitionKeyRangeCache() + .tryGetOverlappingRangesAsync( + null, + collection.getResourceId(), + normalizedRange, + false, + null + ); + }); + }) + .map(pkRangesValueHolder -> { + List matchedPkRanges = + (pkRangesValueHolder == null || pkRangesValueHolder.v == null) ? new ArrayList<>() : pkRangesValueHolder.v; + + return matchedPkRanges.stream().map(pkRange -> new FeedRangeEpkImpl(pkRange.toRange())).collect(Collectors.toList()); + }); + } + Mono> getFeedRanges(boolean forceRefresh) { return this.getDatabase().getDocClientWrapper().getFeedRanges(getLink(), forceRefresh); } @@ -2597,6 +2630,17 @@ Mono> getNormalizedEffectiveRange(FeedRange feedRange) { getCollectionObservable); } + Mono checkFeedRangeOverlapping(FeedRange feedRange1, FeedRange feedRange2) { + checkNotNull(feedRange1, "Argument 'feedRange1' must not be null."); + checkNotNull(feedRange2, "Argument 'feedRange2' must not be null."); + + return this.getNormalizedEffectiveRange(feedRange1) + .flatMap(normalizedRange1 -> { + return this.getNormalizedEffectiveRange(feedRange2) + .map(normalizedRange2 -> Range.checkOverlapping(normalizedRange1, normalizedRange2)); + }); + } + /** * Enable the throughput control group with local control mode. *
@@ -2775,6 +2819,29 @@ public Function>> queryItemsInt public Mono> getFeedRanges(CosmosAsyncContainer cosmosAsyncContainer, boolean forceRefresh) { return cosmosAsyncContainer.getFeedRanges(forceRefresh); } + + @Override + public Mono> trySplitFeedRange( + CosmosAsyncContainer cosmosAsyncContainer, + FeedRange feedRange, + int targetedCountAfterSplit) { + + return cosmosAsyncContainer.trySplitFeedRange(feedRange, targetedCountAfterSplit) + .map(feedRangeEpks -> feedRangeEpks.stream().collect(Collectors.toList())); + } + + @Override + public Mono checkFeedRangeOverlapping( + CosmosAsyncContainer cosmosAsyncContainer, + FeedRange feedRange1, + FeedRange feedRange2) { + return cosmosAsyncContainer.checkFeedRangeOverlapping(feedRange1, feedRange2); + } + + @Override + public Mono> getOverlappingFeedRanges(CosmosAsyncContainer container, FeedRange feedRange) { + return container.getOverlappingFeedRanges(feedRange); + } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 1f23419c63eb..bea9a78c891b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -37,8 +37,10 @@ import com.azure.cosmos.implementation.directconnectivity.Uri; import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdChannelStatistics; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; +import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.patch.PatchOperation; import com.azure.cosmos.implementation.routing.PartitionKeyInternal; +import com.azure.cosmos.implementation.routing.Range; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchOperationResult; @@ -350,6 +352,7 @@ public interface CosmosChangeFeedRequestOptionsAccessor { CosmosChangeFeedRequestOptions setItemFactoryMethod(CosmosChangeFeedRequestOptions queryRequestOptions, Function factoryMethod); CosmosDiagnosticsThresholds getDiagnosticsThresholds(CosmosChangeFeedRequestOptions options); List getExcludeRegions(CosmosChangeFeedRequestOptions cosmosChangeFeedRequestOptions); + CosmosChangeFeedRequestOptions createForProcessingFromContinuation(String continuation, FeedRange targetRange, String continuationLsn); } } @@ -940,6 +943,14 @@ Function>> queryItemsInternalFu Class classType); Mono> getFeedRanges(CosmosAsyncContainer cosmosAsyncContainer, boolean forceRefresh); + + Mono> trySplitFeedRange( + CosmosAsyncContainer cosmosAsyncContainer, + FeedRange feedRange, + int targetedCountAfterSplit); + + Mono checkFeedRangeOverlapping(CosmosAsyncContainer container, FeedRange feedRange1, FeedRange feedRange2); + Mono> getOverlappingFeedRanges(CosmosAsyncContainer container, FeedRange feedRange); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java index 1ab6c161cad6..61e34ed4d0fd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java @@ -8,17 +8,22 @@ import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedMode; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStartFromInternal; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; +import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStateV1; import com.azure.cosmos.implementation.feedranges.FeedRangeContinuation; +import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.feedranges.FeedRangeInternal; import com.azure.cosmos.implementation.query.CompositeContinuationToken; +import com.azure.cosmos.implementation.routing.Range; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.util.Beta; import com.fasterxml.jackson.databind.JsonNode; import java.time.Instant; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -262,6 +267,48 @@ public static CosmosChangeFeedRequestOptions createForProcessingFromContinuation return createForProcessingFromContinuation(changeFeedState); } + /*** + * Creates a new {@link CosmosChangeFeedRequestOptions} instance to start processing + * change feed items based on a previous continuation. + * + * ONLY used by Kafka connector. + * + * @param continuation The continuation that was retrieved from a previously retrieved FeedResponse + * @param targetRange the new target range + * @param continuationLsn the new continuation lsn + * @return a new {@link CosmosChangeFeedRequestOptions} instance + */ + static CosmosChangeFeedRequestOptions createForProcessingFromContinuation( + String continuation, FeedRange targetRange, String continuationLsn) { + if (targetRange instanceof FeedRangeEpkImpl) { + Range normalizedRange = + FeedRangeInternal.normalizeRange(((FeedRangeEpkImpl) targetRange).getRange()); + + final ChangeFeedState changeFeedState = ChangeFeedState.fromString(continuation); + + if (StringUtils.isEmpty(continuationLsn)) { + continuationLsn = changeFeedState.getContinuation().getCurrentContinuationToken().getToken(); + } + + ChangeFeedState targetChangeFeedState = + new ChangeFeedStateV1( + changeFeedState.getContainerRid(), + (FeedRangeEpkImpl) targetRange, + changeFeedState.getMode(), + changeFeedState.getStartFromSettings(), + FeedRangeContinuation.create( + changeFeedState.getContainerRid(), + (FeedRangeEpkImpl) targetRange, + Arrays.asList(new CompositeContinuationToken(continuationLsn, normalizedRange)) + ) + ); + + return createForProcessingFromContinuation(targetChangeFeedState); + } + + throw new IllegalStateException("createForProcessingFromContinuation does not support feedRange type " + targetRange.getClass()); + } + static CosmosChangeFeedRequestOptions createForProcessingFromContinuation( ChangeFeedState changeFeedState) { @@ -609,6 +656,15 @@ public CosmosDiagnosticsThresholds getDiagnosticsThresholds(CosmosChangeFeedRequ public List getExcludeRegions(CosmosChangeFeedRequestOptions cosmosChangeFeedRequestOptions) { return cosmosChangeFeedRequestOptions.excludeRegions; } + + @Override + public CosmosChangeFeedRequestOptions createForProcessingFromContinuation( + String continuation, + FeedRange targetRange, + String continuationLsn) { + + return CosmosChangeFeedRequestOptions.createForProcessingFromContinuation(continuation, targetRange, continuationLsn); + } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java index baeb43da9e50..92524abb3d53 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java @@ -34,13 +34,13 @@ exports com.azure.cosmos.util; // export packages for multiple different modules - exports com.azure.cosmos.implementation to com.azure.cosmos.encryption, com.azure.cosmos.test; + exports com.azure.cosmos.implementation to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; exports com.azure.cosmos.implementation.caches to com.azure.cosmos.encryption, com.azure.cosmos.test; exports com.azure.cosmos.implementation.feedranges to com.azure.cosmos.encryption, com.azure.cosmos.test; - exports com.azure.cosmos.implementation.apachecommons.lang to com.azure.cosmos.encryption, com.azure.cosmos.test; - exports com.azure.cosmos.implementation.guava25.base to com.azure.cosmos.encryption, com.azure.cosmos.test; - exports com.azure.cosmos.implementation.guava25.collect to com.azure.cosmos.encryption, com.azure.cosmos.test; - exports com.azure.cosmos.implementation.guava27 to com.azure.cosmos.encryption, com.azure.cosmos.test; + exports com.azure.cosmos.implementation.apachecommons.lang to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.guava25.base to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.guava25.collect to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.guava27 to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; exports com.azure.cosmos.implementation.directconnectivity to com.azure.cosmos.encryption, com.azure.cosmos.test; opens com.azure.cosmos.implementation to com.fasterxml.jackson.databind, java.logging, com.fasterxml.jackson.module.afterburner; @@ -48,7 +48,7 @@ exports com.azure.cosmos.implementation.batch to com.azure.cosmos.encryption; exports com.azure.cosmos.implementation.patch to com.azure.cosmos.encryption; exports com.azure.cosmos.implementation.query to com.azure.cosmos.encryption; - exports com.azure.cosmos.implementation.apachecommons.lang.tuple to com.azure.cosmos.encryption; + exports com.azure.cosmos.implementation.apachecommons.lang.tuple to com.azure.cosmos.encryption, com.azure.cosmos.kafka.connect; // exporting some packages specifically for Jackson opens com.azure.cosmos.implementation.caches to com.fasterxml.jackson.databind; diff --git a/sdk/cosmos/kafka-integration-matrix.json b/sdk/cosmos/kafka-integration-matrix.json new file mode 100644 index 000000000000..608160ba2f4d --- /dev/null +++ b/sdk/cosmos/kafka-integration-matrix.json @@ -0,0 +1,14 @@ +{ + "matrix": { + "Cosmos": { + "Session_Integration": { + "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", + "ProfileFlag": "-P kafka-integration", + "Pool": "azsdk-pool-mms-ubuntu-2004-general", + "OSVmImage": "MMSUbuntu2004" + } + }, + "TestFromSource": true, + "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] + } +} diff --git a/sdk/cosmos/tests.yml b/sdk/cosmos/tests.yml index 82671c67cec6..ffab0caff4a5 100644 --- a/sdk/cosmos/tests.yml +++ b/sdk/cosmos/tests.yml @@ -68,4 +68,33 @@ extends: AdditionalVariables: - name: AdditionalArgs value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' - \ No newline at end of file + + - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml + parameters: + TestName: 'Kafka_Cosmos_Integration' + CloudConfig: + Public: + SubscriptionConfigurations: + - $(sub-config-azure-cloud-test-resources) + - $(sub-config-cosmos-azure-cloud-test-resources) + MatrixConfigs: + - Name: Kafka_Cosmos_Integration_Test + Path: sdk/cosmos/kafka-integration-matrix.json + Selection: all + GenerateVMJobs: true + ServiceDirectory: cosmos + TestResourceDirectories: + - cosmos/ + Artifacts: + - name: azure-cosmos-kafka-connect + groupId: com.azure.cosmos.kafka + safeName: azurecosmoskafkaconnect + TimeoutInMinutes: 120 + PreSteps: + - template: /eng/pipelines/templates/steps/install-reporting-tools.yml + TestGoals: 'clean verify' + TestOptions: '$(ProfileFlag)' + AdditionalVariables: + - name: AdditionalArgs + value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' +