Skip to content

Rewrite the "sync_local" query #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Move annotations to the test file; add a link to docs.
  • Loading branch information
rkistner committed May 27, 2025
commit bf38faf24c3761805dc1934cea32575ec432b931
9 changes: 1 addition & 8 deletions crates/core/src/sync_local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,35 +186,28 @@ impl<'a> SyncOperation<'a> {
Ok(match &self.partial {
None => {
// Complete sync
// See dart/test/sync_local_performance_test.dart for an annotated version of this query.
self.db
.prepare_v2(
"\
-- 1. Filter oplog by the ops added but not applied yet (oplog b).
-- We do not do any DISTINCT operation here, since that introduces a temp b-tree.
-- We filter out duplicates using the GROUP BY below.
WITH updated_rows AS (
SELECT b.row_type, b.row_id FROM ps_buckets AS buckets
CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
AND (b.op_id > buckets.last_applied_op)
UNION ALL SELECT row_type, row_id FROM ps_updated_rows
)

-- 2. Find *all* current ops over different buckets for those objects (oplog r).
SELECT
b.row_type,
b.row_id,
(
-- 3. For each unique row, select the data from the latest oplog entry.
-- The max(r.op_id) clause is used to select the latest oplog entry.
-- The iif is to avoid the max(r.op_id) column ending up in the results.
SELECT iif(max(r.op_id), r.data, null)
FROM ps_oplog r
WHERE r.row_type = b.row_type
AND r.row_id = b.row_id

) as data
FROM updated_rows b
-- Group for (2)
GROUP BY b.row_type, b.row_id;",
)
.into_db_result(self.db)?
Expand Down
14 changes: 14 additions & 0 deletions dart/test/sync_local_performance_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ COMMIT;
// standard test suite.

test('sync_local new query', () {
// This is the query we're using now.
// This query only uses a single TEMP B-TREE for the GROUP BY operation,
// leading to fairly efficient execution.

Expand All @@ -134,27 +135,40 @@ COMMIT;
// |--USE TEMP B-TREE FOR GROUP BY
// `--CORRELATED SCALAR SUBQUERY 3
// `--SEARCH r USING INDEX ps_oplog_row (row_type=? AND row_id=?)
//
// For details on the max(r.op_id) clause, see:
// https://sqlite.org/lang_select.html#bare_columns_in_an_aggregate_query
// > If there is exactly one min() or max() aggregate in the query, then all bare columns in the result
// > set take values from an input row which also contains the minimum or maximum.

var timer = Stopwatch()..start();
final q = '''
-- 1. Filter oplog by the ops added but not applied yet (oplog b).
-- We do not do any DISTINCT operation here, since that introduces a temp b-tree.
-- We filter out duplicates using the GROUP BY below.
WITH updated_rows AS (
SELECT b.row_type, b.row_id FROM ps_buckets AS buckets
CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
AND (b.op_id > buckets.last_applied_op)
UNION ALL SELECT row_type, row_id FROM ps_updated_rows
)

-- 2. Find *all* current ops over different buckets for those objects (oplog r).
SELECT
b.row_type,
b.row_id,
(
-- 3. For each unique row, select the data from the latest oplog entry.
-- The max(r.op_id) clause is used to select the latest oplog entry.
-- The iif is to avoid the max(r.op_id) column ending up in the results.
SELECT iif(max(r.op_id), r.data, null)
FROM ps_oplog r
WHERE r.row_type = b.row_type
AND r.row_id = b.row_id

) as data
FROM updated_rows b
-- Group for (2)
GROUP BY b.row_type, b.row_id;
''';
db.select(q);
Expand Down