forked from airbytehq/airbyte
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ Source Mongodb POC: Ignore data type for discovered field uniqueness (
airbytehq#29168) * Ignore data type for discovered field uniqueness * Add unit test * Formatting
- Loading branch information
1 parent
b277cd3
commit 848fb74
Showing
5 changed files
with
185 additions
and
39 deletions.
There are no files selected for viewing
37 changes: 37 additions & 0 deletions
37
...nternal-poc/src/main/java/io/airbyte/integrations/source/mongodb/internal/MongoField.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.integrations.source.mongodb.internal; | ||
|
||
import io.airbyte.protocol.models.Field; | ||
import io.airbyte.protocol.models.JsonSchemaType; | ||
import java.util.Objects; | ||
|
||
/** | ||
* Custom implementation of {@link Field} that only uses the name of the field for equality. This is | ||
* to support MongoDB's unstructured documents which may contain more than one document with the | ||
* same field name, but different data type. | ||
*/ | ||
public class MongoField extends Field { | ||
|
||
public MongoField(String name, JsonSchemaType type) { | ||
super(name, type); | ||
} | ||
|
||
public boolean equals(Object o) { | ||
if (this == o) { | ||
return true; | ||
} else if (o != null && this.getClass() == o.getClass()) { | ||
final MongoField field = (MongoField) o; | ||
return this.getName().equals(field.getName()); | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
public int hashCode() { | ||
return Objects.hash(new Object[] {this.getName()}); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
...rnal-poc/src/test/java/io/airbyte/integrations/source/mongodb/internal/MongoUtilTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.integrations.source.mongodb.internal; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertNotNull; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
|
||
import com.fasterxml.jackson.core.type.TypeReference; | ||
import com.mongodb.client.AggregateIterable; | ||
import com.mongodb.client.MongoClient; | ||
import com.mongodb.client.MongoCollection; | ||
import com.mongodb.client.MongoCursor; | ||
import com.mongodb.client.MongoDatabase; | ||
import io.airbyte.commons.json.Jsons; | ||
import io.airbyte.commons.resources.MoreResources; | ||
import io.airbyte.protocol.models.JsonSchemaType; | ||
import io.airbyte.protocol.models.v0.AirbyteStream; | ||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
import org.bson.Document; | ||
import org.junit.jupiter.api.Test; | ||
|
||
public class MongoUtilTest { | ||
|
||
@Test | ||
void testGetAirbyteStreams() throws IOException { | ||
final AggregateIterable<Document> aggregateIterable = mock(AggregateIterable.class); | ||
final MongoCursor<Document> cursor = mock(MongoCursor.class); | ||
final String databaseName = "database"; | ||
final Document authorizedCollectionsResponse = Document.parse(MoreResources.readResource("authorized_collections_response.json")); | ||
final MongoClient mongoClient = mock(MongoClient.class); | ||
final MongoCollection mongoCollection = mock(MongoCollection.class); | ||
final MongoDatabase mongoDatabase = mock(MongoDatabase.class); | ||
final List<Map<String, Object>> schemaDiscoveryJsonResponses = | ||
Jsons.deserialize(MoreResources.readResource("schema_discovery_response.json"), new TypeReference<>() {}); | ||
final List<Document> schemaDiscoveryResponses = schemaDiscoveryJsonResponses.stream().map(s -> new Document(s)).collect(Collectors.toList()); | ||
|
||
when(cursor.hasNext()).thenReturn(true, true, false); | ||
when(cursor.next()).thenReturn(schemaDiscoveryResponses.get(0), schemaDiscoveryResponses.get(1)); | ||
when(aggregateIterable.cursor()).thenReturn(cursor); | ||
when(mongoCollection.aggregate(any())).thenReturn(aggregateIterable); | ||
when(mongoDatabase.getCollection(any())).thenReturn(mongoCollection); | ||
when(mongoDatabase.runCommand(any())).thenReturn(authorizedCollectionsResponse); | ||
when(mongoClient.getDatabase(databaseName)).thenReturn(mongoDatabase); | ||
|
||
final List<AirbyteStream> streams = MongoUtil.getAirbyteStreams(mongoClient, databaseName); | ||
assertNotNull(streams); | ||
assertEquals(1, streams.size()); | ||
assertEquals(11, streams.get(0).getJsonSchema().get("properties").size()); | ||
} | ||
|
||
@Test | ||
void testGetAirbyteStreamsDifferentDataTypes() throws IOException { | ||
final AggregateIterable<Document> aggregateIterable = mock(AggregateIterable.class); | ||
final MongoCursor<Document> cursor = mock(MongoCursor.class); | ||
final String databaseName = "database"; | ||
final Document authorizedCollectionsResponse = Document.parse(MoreResources.readResource("authorized_collections_response.json")); | ||
final MongoClient mongoClient = mock(MongoClient.class); | ||
final MongoCollection mongoCollection = mock(MongoCollection.class); | ||
final MongoDatabase mongoDatabase = mock(MongoDatabase.class); | ||
final List<Map<String, Object>> schemaDiscoveryJsonResponses = | ||
Jsons.deserialize(MoreResources.readResource("schema_discovery_response_different_datatypes.json"), new TypeReference<>() {}); | ||
final List<Document> schemaDiscoveryResponses = schemaDiscoveryJsonResponses.stream().map(s -> new Document(s)).collect(Collectors.toList()); | ||
|
||
when(cursor.hasNext()).thenReturn(true, true, false); | ||
when(cursor.next()).thenReturn(schemaDiscoveryResponses.get(0), schemaDiscoveryResponses.get(1)); | ||
when(aggregateIterable.cursor()).thenReturn(cursor); | ||
when(mongoCollection.aggregate(any())).thenReturn(aggregateIterable); | ||
when(mongoDatabase.getCollection(any())).thenReturn(mongoCollection); | ||
when(mongoDatabase.runCommand(any())).thenReturn(authorizedCollectionsResponse); | ||
when(mongoClient.getDatabase(databaseName)).thenReturn(mongoDatabase); | ||
|
||
final List<AirbyteStream> streams = MongoUtil.getAirbyteStreams(mongoClient, databaseName); | ||
assertNotNull(streams); | ||
assertEquals(1, streams.size()); | ||
assertEquals(11, streams.get(0).getJsonSchema().get("properties").size()); | ||
assertEquals(JsonSchemaType.NUMBER.getJsonSchemaTypeMap().get("type"), | ||
streams.get(0).getJsonSchema().get("properties").get("total").get("type").asText()); | ||
} | ||
|
||
} |
31 changes: 31 additions & 0 deletions
31
...ongodb-internal-poc/src/test/resources/schema_discovery_response_different_datatypes.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[ | ||
{ | ||
"_id" : null, | ||
"fields" : [ | ||
{ | ||
"_id" : "string", | ||
"name" : "string", | ||
"last_updated" : "date", | ||
"total" : "int", | ||
"price" : "decimal", | ||
"items" : "array", | ||
"owners" : "object" | ||
} | ||
] | ||
}, | ||
{ | ||
"_id" : null, | ||
"fields" : [ | ||
{ | ||
"_id" : "string", | ||
"name" : "string", | ||
"last_updated" : "date", | ||
"total" : "string", | ||
"price" : "decimal", | ||
"items" : "array", | ||
"owners" : "object", | ||
"other" : "string" | ||
} | ||
] | ||
} | ||
] |