-
Notifications
You must be signed in to change notification settings - Fork 3.2k
MongoDB: Support DBRef pushdown #22027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1536,7 +1536,7 @@ private void testProjectionPushdownWithDBRef(Object objectId, String expectedVal | |
|
||
assertThat(query("SELECT parent.child, creator.databaseName, creator.collectionName, creator.id FROM test." + tableName)) | ||
.matches("SELECT " + expectedValue + ", varchar 'test', varchar 'creators', " + expectedValue) | ||
.isNotFullyPushedDown(ProjectNode.class); | ||
.isFullyPushedDown(); | ||
assertQuery( | ||
"SELECT typeof(creator) FROM test." + tableName, | ||
"SELECT 'row(databaseName varchar, collectionName varchar, id " + expectedType + ")'"); | ||
|
@@ -1573,7 +1573,7 @@ private void testProjectionPushdownWithNestedDBRef(Object objectId, String expec | |
|
||
assertThat(query("SELECT parent.child, parent.creator.databaseName, parent.creator.collectionName, parent.creator.id FROM test." + tableName)) | ||
.matches("SELECT " + expectedValue + ", varchar 'test', varchar 'creators', " + expectedValue) | ||
.isNotFullyPushedDown(ProjectNode.class); | ||
.isFullyPushedDown(); | ||
assertQuery( | ||
"SELECT typeof(parent.creator) FROM test." + tableName, | ||
"SELECT 'row(databaseName varchar, collectionName varchar, id " + expectedType + ")'"); | ||
|
@@ -1609,7 +1609,7 @@ private void testProjectionPushdownWithPredefinedDBRefKeyword(Object objectId, S | |
assertThat(query("SELECT parent.id, parent.id.id FROM test." + tableName)) | ||
.skippingTypesCheck() | ||
.matches("SELECT row('test', 'creators', %1$s), %1$s".formatted(expectedValue)) | ||
.isNotFullyPushedDown(ProjectNode.class); | ||
.isFullyPushedDown(); | ||
assertQuery( | ||
"SELECT typeof(parent.id), typeof(parent.id.id) FROM test." + tableName, | ||
"SELECT 'row(databaseName varchar, collectionName varchar, id %1$s)', '%1$s'".formatted(expectedType)); | ||
|
@@ -1679,12 +1679,12 @@ private void testDBRefLikeDocument(Document document1, Document document2, Strin | |
assertThat(query("SELECT creator.id FROM test." + tableName)) | ||
.skippingTypesCheck() | ||
.matches("VALUES (%1$s), (%1$s)".formatted(expectedValue)) | ||
.isNotFullyPushedDown(ProjectNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query("SELECT creator.databasename, creator.collectionname, creator.id FROM test." + tableName)) | ||
.skippingTypesCheck() | ||
.matches("VALUES ('doc_test', 'doc_creators', %1$s), ('dbref_test', 'dbref_creators', %1$s)".formatted(expectedValue)) | ||
.isNotFullyPushedDown(ProjectNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertUpdate("DROP TABLE test." + tableName); | ||
} | ||
|
@@ -1693,14 +1693,14 @@ private static Document getDocumentWithDifferentDbRefFieldOrder(Object objectId) | |
{ | ||
return new Document() | ||
.append("_id", new ObjectId("5126bbf64aed4daf9e2ab771")) | ||
.append("creator", new Document().append("collectionName", "doc_creators").append("id", objectId).append("databaseName", "doc_test")); | ||
.append("creator", new Document().append("$ref", "doc_creators").append("$id", objectId).append("$db", "doc_test")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we're constructing the BSON document by hand here I changed these to actually correspond to correct MongoDB names for collectionName, databaseName etc. Otherwise these documents are in no way related to DBRefs. |
||
} | ||
|
||
private static Document documentWithSameDbRefFieldOrder(Object objectId) | ||
{ | ||
return new Document() | ||
.append("_id", new ObjectId("5126bbf64aed4daf9e2ab771")) | ||
.append("creator", new Document().append("databaseName", "doc_test").append("collectionName", "doc_creators").append("id", objectId)); | ||
.append("creator", new Document().append("$db", "doc_test").append("$ref", "doc_creators").append("$id", objectId)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||
} | ||
|
||
private static Document dbRefDocument(Object objectId) | ||
|
@@ -1717,9 +1717,9 @@ private void testDBRefLikeDocument(Object objectId, String expectedValue) | |
Document documentWithDifferentDbRefFieldOrder = new Document() | ||
.append("_id", new ObjectId("5126bbf64aed4daf9e2ab771")) | ||
.append("creator", new Document() | ||
.append("databaseName", "doc_test") | ||
.append("collectionName", "doc_creators") | ||
.append("id", objectId)); | ||
.append("$db", "doc_test") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||
.append("$ref", "doc_creators") | ||
.append("$id", objectId)); | ||
Document dbRefDocument = new Document() | ||
.append("_id", new ObjectId("5126bbf64aed4daf9e2ab772")) | ||
.append("creator", new DBRef("dbref_test", "dbref_creators", objectId)); | ||
|
@@ -1743,6 +1743,38 @@ private void testDBRefLikeDocument(Object objectId, String expectedValue) | |
assertUpdate("DROP TABLE test." + tableName); | ||
} | ||
|
||
@Test | ||
public void testDBRefLikeDocumentWithTrinoLikeFieldNames() | ||
{ | ||
String objectId = "test_id"; | ||
String expectedValue = "varchar 'test_id'"; | ||
String tableName = "test_dbref_like_document_" + randomNameSuffix(); | ||
|
||
// DbRef-like document but with field names as they're used in Trino | ||
Document dbRefLikeDocument = new Document() | ||
.append("_id", new ObjectId("5126bbf64aed4daf9e2ab771")) | ||
.append("creator", new Document() | ||
.append("collectionName", "doc_creators") | ||
.append("id", objectId) | ||
.append("databaseName", "doc_test")); | ||
client.getDatabase("test").getCollection(tableName).insertOne(dbRefLikeDocument); | ||
|
||
assertThat(query("SELECT * FROM test." + tableName)) | ||
.skippingTypesCheck() | ||
.matches("VALUES " | ||
+ " row(row('doc_creators', " + expectedValue + ", 'doc_test'))"); | ||
|
||
assertThat(query("SELECT creator.id FROM test." + tableName)) | ||
.skippingTypesCheck() | ||
.matches("VALUES " + "(%1$s)".formatted(expectedValue)); | ||
|
||
assertThat(query("SELECT creator.collectionname, creator.id, creator.databasename FROM test." + tableName)) | ||
.skippingTypesCheck() | ||
.matches("VALUES " + "('doc_creators', %1$s, 'doc_test')".formatted(expectedValue)); | ||
|
||
assertUpdate("DROP TABLE test." + tableName); | ||
} | ||
|
||
@Test | ||
public void testPredicateOnDBRefField() | ||
{ | ||
|
@@ -1766,12 +1798,12 @@ private void testPredicateOnDBRefField(Object objectId, String expectedValue) | |
assertThat(query("SELECT * FROM test." + tableName + " WHERE creator.id = " + expectedValue)) | ||
.skippingTypesCheck() | ||
.matches("SELECT ROW(varchar 'test', varchar 'creators', " + expectedValue + ")") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query("SELECT creator.id FROM test." + tableName + " WHERE creator.id = " + expectedValue)) | ||
.skippingTypesCheck() | ||
.matches("SELECT " + expectedValue) | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertUpdate("DROP TABLE test." + tableName); | ||
} | ||
|
@@ -1793,21 +1825,21 @@ private void testPredicateOnDBRefLikeDocument(Object objectId, String expectedVa | |
Document document = new Document() | ||
.append("_id", new ObjectId("5126bbf64aed4daf9e2ab771")) | ||
.append("creator", new Document() | ||
.append("databaseName", "test") | ||
.append("collectionName", "creators") | ||
.append("id", objectId)); | ||
.append("$db", "test") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||
.append("$ref", "creators") | ||
.append("$id", objectId)); | ||
|
||
client.getDatabase("test").getCollection(tableName).insertOne(document); | ||
|
||
assertThat(query("SELECT * FROM test." + tableName + " WHERE creator.id = " + expectedValue)) | ||
.skippingTypesCheck() | ||
.matches("SELECT ROW(varchar 'test', varchar 'creators', " + expectedValue + ")") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query("SELECT creator.id FROM test." + tableName + " WHERE creator.id = " + expectedValue)) | ||
.skippingTypesCheck() | ||
.matches("SELECT " + expectedValue) | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertUpdate("DROP TABLE test." + tableName); | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I remember why we disallowed DBRef. We can't distinguish DBRef from object with same field names.
This PR causes silent correctness issue if the document has an object with
databaseName
,collectionName
andid
fields.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ebyhr I don't think that's the case - I added a test to check this scenario.
The only time this would be a problem is if the same collection has documents that hold both actual DBRefs and DBRef-like documents in the same field.
Could we just document this possible but extremely unlikely edge case? IMO the importance of DBRef pushdown is way higher than this concern since without it you're loading the entire collection into memory when filtering or joining by a DBRef field (which makes Trino fairly useless for bigger databases in such cases).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The added test is wrong. Please reorder
creator
fields as databaseName, collectionName, id.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right - I put them in the order that mongo expects them (docs), not the order trino expects them. If I order them as you said the test does indeed fail.
Is there a way we could signal that a field is a DBRef in its type signature so that we didn't have to rely on field names & order to detect a DBRef down the line?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ebyhr in order to consider DBRef pushdown do you think we should
a) find a way to support it cleanly (how? a custom type?)
b) require it to be explicitly turned on with a disclaimer that it can break on specifically formed documents?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ping @ebyhr - would appreciate some guidance
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ebyhr What if we could allow the pushdown - if the Collection doesn't have any columns on those name ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Praveen2112 I think the problem is we can't really differentiate whether those are columns on the collection or these "system" DBRef columns.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
see @ebyhr 's first comment in this thread.