Revert "Remove schema from stream name. (#2807)"

This reverts commit 6e9d6fc.
airbytehq · jrhizor · Apr 12, 2021 · Apr 12, 2021 · Apr 12, 2021 · 72e9534aa2332f65748c5cbd595310f55c8ffe16
commit 72e9534aa2332f65748c5cbd595310f55c8ffe16
@@ -90,7 +90,6 @@ jobs:
         id: publish
         env:
           DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
-          # Oracle expects this variable to be set. Although usually present, this is not set by default on Github virtual runners.
           TZ: UTC
       - name: Add Success Comment
         if: github.event.inputs.comment-id && success()

@@ -89,8 +89,6 @@ jobs:
         id: test
         env:
           ACTION_RUN_ID: ${{github.run_id}}
-          # Oracle expects this variable to be set. Although usually present, this is not set by default on Github virtual runners.
-          TZ: UTC
       - name: Report Status
         if: github.ref == 'refs/heads/master' && always()
         run: ./tools/status/report.sh ${{ github.event.inputs.connector }} ${{github.repository}} ${{github.run_id}} ${{steps.test.outcome}}

diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/type/Types.java b/airbyte-commons/src/main/java/io/airbyte/commons/type/Types.java
diff --git a/airbyte-integrations/bases/base-normalization/Dockerfile b/airbyte-integrations/bases/base-normalization/Dockerfile
@@ -19,5 +19,5 @@ WORKDIR /airbyte
 
 ENTRYPOINT ["/airbyte/entrypoint.sh"]
 
-LABEL io.airbyte.version=0.1.19
+LABEL io.airbyte.version=0.1.18
 LABEL io.airbyte.name=airbyte/normalization
diff --git a/...ntegrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py b/...ntegrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py
@@ -53,14 +53,14 @@ def __init__(self, output_directory: str, destination_type: DestinationType):
         self.destination_type: DestinationType = destination_type
         self.name_transformer: DestinationNameTransformer = DestinationNameTransformer(destination_type)
 
-    def process(self, catalog_file: str, json_column_name: str, default_schema: str):
+    def process(self, catalog_file: str, json_column_name: str, target_schema: str):
         """
         This method first parse and build models to handle top-level streams.
         In a second loop will go over the substreams that were nested in a breadth-first traversal manner.
 
         @param catalog_file input AirbyteCatalog file in JSON Schema describing the structure of the raw data
         @param json_column_name is the column name containing the JSON Blob with the raw data
-        @param default_schema is the final schema where to output the final transformed data to
+        @param target_schema is the final schema where to output the final transformed data to
         """
         # Registry of all tables in all schemas
         tables_registry: Set[str] = set()
@@ -73,7 +73,7 @@ def process(self, catalog_file: str, json_column_name: str, default_schema: str)
         for stream_processor in self.build_stream_processor(
             catalog=catalog,
             json_column_name=json_column_name,
-            default_schema=default_schema,
+            target_schema=target_schema,
             name_transformer=self.name_transformer,
             destination_type=self.destination_type,
             tables_registry=tables_registry,
@@ -98,22 +98,16 @@ def process(self, catalog_file: str, json_column_name: str, default_schema: str)
     def build_stream_processor(
         catalog: Dict,
         json_column_name: str,
-        default_schema: str,
+        target_schema: str,
         name_transformer: DestinationNameTransformer,
         destination_type: DestinationType,
         tables_registry: Set[str],
     ) -> List[StreamProcessor]:
         result = []
         for configured_stream in get_field(catalog, "streams", "Invalid Catalog: 'streams' is not defined in Catalog"):
             stream_config = get_field(configured_stream, "stream", "Invalid Stream: 'stream' is not defined in Catalog streams")
-
-            # The logic here matches the logic in JdbcBufferedConsumerFactory.java. Any modifications need to be reflected there and vice versa.
-            schema = default_schema
-            if "namespace" in stream_config:
-                schema = stream_config["namespace"]
-
-            schema_name = name_transformer.normalize_schema_name(schema)
-            raw_schema_name = name_transformer.normalize_schema_name(f"_airbyte_{schema}", truncate=False)
+            schema_name = name_transformer.normalize_schema_name(target_schema)
+            raw_schema_name = name_transformer.normalize_schema_name(f"_airbyte_{target_schema}", truncate=False)
             stream_name = get_field(stream_config, "name", f"Invalid Stream: 'name' is not defined in stream: {str(stream_config)}")
             raw_table_name = name_transformer.normalize_table_name(f"_airbyte_raw_{stream_name}", truncate=False)
 

diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py
@@ -78,7 +78,7 @@ def process_catalog(self) -> None:
         processor = CatalogProcessor(output_directory=output, destination_type=destination_type)
         for catalog_file in self.config["catalog"]:
             print(f"Processing {catalog_file}...")
-            processor.process(catalog_file=catalog_file, json_column_name=json_col, default_schema=schema)
+            processor.process(catalog_file=catalog_file, json_column_name=json_col, target_schema=schema)
 
 
 def read_profiles_yml(profile_dir: str) -> Any:

diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py
@@ -69,7 +69,7 @@ def test_stream_processor_tables_naming(integration_type: str, catalog_file: str
     for stream_processor in CatalogProcessor.build_stream_processor(
         catalog=catalog,
         json_column_name="'json_column_name_test'",
-        default_schema="schema_test",
+        target_schema="schema_test",
         name_transformer=DestinationNameTransformer(destination_type),
         destination_type=destination_type,
         tables_registry=tables_registry,

diff --git a/...ce-test/src/main/java/io/airbyte/integrations/standardtest/source/StandardSourceTest.java b/...ce-test/src/main/java/io/airbyte/integrations/standardtest/source/StandardSourceTest.java
@@ -257,8 +257,7 @@ public void testDiscover() throws Exception {
    */
   @Test
   public void testFullRefreshRead() throws Exception {
-    ConfiguredAirbyteCatalog catalog = withFullRefreshSyncModes(getConfiguredCatalog());
-    final List<AirbyteMessage> allMessages = runRead(catalog);
+    final List<AirbyteMessage> allMessages = runRead(withFullRefreshSyncModes(getConfiguredCatalog()));
     final List<AirbyteMessage> recordMessages = allMessages.stream().filter(m -> m.getType() == Type.RECORD).collect(Collectors.toList());
     // the worker validates the message formats, so we just validate the message content
     // We don't need to validate message format as long as we use the worker, which we will not want to

@@ -104,13 +104,18 @@ class BigQueryDestinationTest {
   private static final AirbyteMessage MESSAGE_STATE = new AirbyteMessage().withType(AirbyteMessage.Type.STATE)
       .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build())));
 
+  private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
+      CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, io.airbyte.protocol.models.Field.of("name", JsonSchemaPrimitive.STRING),
+          io.airbyte.protocol.models.Field
+              .of("id", JsonSchemaPrimitive.STRING)),
+      CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, Field.of("goal", JsonSchemaPrimitive.STRING))));
+
   private static final NamingConventionTransformer NAMING_RESOLVER = new StandardNameTransformer();
 
   private JsonNode config;
 
   private BigQuery bigquery;
   private Dataset dataset;
-  private ConfiguredAirbyteCatalog catalog;
 
   private boolean tornDown = true;
 
@@ -137,13 +142,6 @@ void setup(TestInfo info) throws IOException {
 
     final String datasetId = "airbyte_tests_" + RandomStringUtils.randomAlphanumeric(8);
 
-    catalog = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
-        CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, datasetId,
-            io.airbyte.protocol.models.Field.of("name", JsonSchemaPrimitive.STRING),
-            io.airbyte.protocol.models.Field
-                .of("id", JsonSchemaPrimitive.STRING)),
-        CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, datasetId, Field.of("goal", JsonSchemaPrimitive.STRING))));
-
     final DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetId).build();
     dataset = bigquery.create(datasetInfo);
 
@@ -218,7 +216,7 @@ void testCheckFailure() {
   @Test
   void testWriteSuccess() throws Exception {
     final BigQueryDestination destination = new BigQueryDestination();
-    final AirbyteMessageConsumer consumer = destination.getConsumer(config, catalog);
+    final AirbyteMessageConsumer consumer = destination.getConsumer(config, CATALOG);
 
     consumer.accept(MESSAGE_USERS1);
     consumer.accept(MESSAGE_TASKS1);
@@ -237,7 +235,7 @@ void testWriteSuccess() throws Exception {
     assertEquals(expectedTasksJson.size(), tasksActual.size());
     assertTrue(expectedTasksJson.containsAll(tasksActual) && tasksActual.containsAll(expectedTasksJson));
 
-    assertTmpTablesNotPresent(catalog.getStreams()
+    assertTmpTablesNotPresent(CATALOG.getStreams()
         .stream()
         .map(ConfiguredAirbyteStream::getStream)
         .map(AirbyteStream::getName)
@@ -250,18 +248,18 @@ void testWriteFailure() throws Exception {
     final AirbyteMessage spiedMessage = spy(MESSAGE_USERS1);
     doThrow(new RuntimeException()).when(spiedMessage).getRecord();
 
-    final AirbyteMessageConsumer consumer = spy(new BigQueryDestination().getConsumer(config, catalog));
+    final AirbyteMessageConsumer consumer = spy(new BigQueryDestination().getConsumer(config, CATALOG));
 
     assertThrows(RuntimeException.class, () -> consumer.accept(spiedMessage));
     consumer.accept(MESSAGE_USERS2);
     consumer.close();
 
-    final List<String> tableNames = catalog.getStreams()
+    final List<String> tableNames = CATALOG.getStreams()
         .stream()
         .map(ConfiguredAirbyteStream::getStream)
         .map(AirbyteStream::getName)
         .collect(toList());
-    assertTmpTablesNotPresent(catalog.getStreams()
+    assertTmpTablesNotPresent(CATALOG.getStreams()
         .stream()
         .map(ConfiguredAirbyteStream::getStream)
         .map(AirbyteStream::getName)

@@ -97,9 +97,9 @@ class CsvDestinationTest {
       .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build())));
 
   private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
-      CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, null, Field.of("name", JsonSchemaPrimitive.STRING),
+      CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, Field.of("name", JsonSchemaPrimitive.STRING),
           Field.of("id", JsonSchemaPrimitive.STRING)),
-      CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, null, Field.of("goal", JsonSchemaPrimitive.STRING))));
+      CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, Field.of("goal", JsonSchemaPrimitive.STRING))));
 
   private Path destinationPath;
   private JsonNode config;

@@ -103,9 +103,6 @@ private static Function<ConfiguredAirbyteStream, WriteConfig> toWriteConfig(Nami
   /**
    * Defer to the {@link AirbyteStream}'s namespace. If this is not set, use the destination's default
    * schema. This namespace is source-provided, and can be potentially empty.
-   *
-   * The logic here matches the logic in the catalog_process.py for Normalization. Any modifications
-   * need to be reflected there and vice versa.
    */
   private static String getOutputSchema(AirbyteStream stream, String defaultDestSchema) {
     final String sourceSchema = stream.getNamespace();

@@ -93,9 +93,9 @@ class LocalJsonDestinationTest {
       .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build())));
 
   private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
-      CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, null, Field.of("name", JsonSchemaPrimitive.STRING),
+      CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, Field.of("name", JsonSchemaPrimitive.STRING),
           Field.of("id", JsonSchemaPrimitive.STRING)),
-      CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, null, Field.of("goal", JsonSchemaPrimitive.STRING))));
+      CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, Field.of("goal", JsonSchemaPrimitive.STRING))));
 
   private Path destinationPath;
   private JsonNode config;