Test all supported types for chunking.

rkistner · rkistner · commit f1ff6db59efe · 2024-12-12T14:23:49.000+02:00
diff --git a/modules/module-postgres/test/src/chunked_snapshots.test.ts b/modules/module-postgres/test/src/chunked_snapshots.test.ts
@@ -0,0 +1,157 @@
+import { MONGO_STORAGE_FACTORY, StorageFactory } from '@core-tests/util.js';
+import { Metrics, reduceBucket } from '@powersync/service-core';
+import { SqliteJsonValue } from '@powersync/service-sync-rules';
+import * as crypto from 'node:crypto';
+import * as timers from 'timers/promises';
+import { describe, expect, test } from 'vitest';
+import { WalStreamTestContext } from './wal_stream_utils.js';
+
+describe('batch replication tests - mongodb', { timeout: 30_000 }, function () {
+  // These are slow but consistent tests.
+  // Not run on every test run, but we do run on CI, or when manually debugging issues.
+  defineBatchTests(MONGO_STORAGE_FACTORY);
+});
+
+function defineBatchTests(factory: StorageFactory) {
+  // We need to test every supported type, since chunking could be quite sensitive to
+  // how each specific type is handled.
+  test('chunked snapshot edge case (int2)', async () => {
+    await testChunkedSnapshot({
+      idType: 'int2',
+      genId: 'i',
+      lastId: '2000',
+      moveTo: '0',
+      moveToJs: 0
+    });
+  });
+
+  test('chunked snapshot edge case (int4)', async () => {
+    await testChunkedSnapshot({
+      idType: 'int4',
+      genId: 'i',
+      lastId: '2000',
+      moveTo: '0',
+      moveToJs: 0
+    });
+  });
+
+  test('chunked snapshot edge case (int8)', async () => {
+    await testChunkedSnapshot({
+      idType: 'int8',
+      genId: 'i',
+      lastId: '2000',
+      moveTo: '0',
+      moveToJs: 0
+    });
+  });
+
+  test('chunked snapshot edge case (text)', async () => {
+    await testChunkedSnapshot({
+      idType: 'text',
+      genId: `to_char(i, 'fm0000')`,
+      lastId: `'2000'`,
+      moveTo: `'0000'`,
+      moveToJs: '0000'
+    });
+  });
+
+  test('chunked snapshot edge case (varchar)', async () => {
+    await testChunkedSnapshot({
+      idType: 'varchar',
+      genId: `to_char(i, 'fm0000')`,
+      lastId: `'2000'`,
+      moveTo: `'0000'`,
+      moveToJs: '0000'
+    });
+  });
+
+  test('chunked snapshot edge case (uuid)', async () => {
+    await testChunkedSnapshot({
+      idType: 'uuid',
+      // Generate a uuid by using the first part of a uuid and appending a 4-digit number.
+      genId: `('00000000-0000-4000-8000-00000000' || to_char(i, 'fm0000')) :: uuid`,
+      lastId: `'00000000-0000-4000-8000-000000002000'`,
+      moveTo: `'00000000-0000-4000-8000-000000000000'`,
+      moveToJs: '00000000-0000-4000-8000-000000000000'
+    });
+  });
+
+  async function testChunkedSnapshot(options: {
+    idType: string;
+    genId: string;
+    lastId: string;
+    moveTo: string;
+    moveToJs: SqliteJsonValue;
+  }) {
+    // 1. Start with 2k rows, one row with id = 2000, and a large TOAST value in another column.
+    // 2. Replicate one batch of rows (id < 2000).
+    // 3. `UPDATE table SET id = 0 WHERE id = 2000`
+    // 4. Replicate the rest of the table.
+    // 5. Logical replication picks up the UPDATE above, but it is missing the TOAST column.
+    // 6. We end up with a row that has a missing TOAST column.
+
+    await using context = await WalStreamTestContext.open(factory, {
+      // We need to use a smaller chunk size here, so that we can run a query in between chunks
+      walStreamOptions: { snapshotChunkSize: 100 }
+    });
+
+    await context.updateSyncRules(`bucket_definitions:
+  global:
+    data:
+      - SELECT * FROM test_data`);
+    const { pool } = context;
+
+    await pool.query(`CREATE TABLE test_data(id ${options.idType} primary key, description text)`);
+
+    // 1. Start with 10k rows, one row with id = 10000...
+    await pool.query({
+      statement: `INSERT INTO test_data(id, description) SELECT ${options.genId}, 'foo' FROM generate_series(1, 2000) i`
+    });
+
+    // ...and a large TOAST value in another column.
+    // Toast value, must be > 8kb after compression
+    const largeDescription = crypto.randomBytes(20_000).toString('hex');
+    await pool.query({
+      statement: `UPDATE test_data SET description = $1 WHERE id = ${options.lastId} :: ${options.idType}`,
+      params: [{ type: 'varchar', value: largeDescription }]
+    });
+
+    // 2. Replicate one batch of rows (id < 2000).
+    // Our "stopping point" here is not quite deterministic.
+    const p = context.replicateSnapshot();
+
+    const stopAfter = 100;
+    const startRowCount = (await Metrics.getInstance().getMetricValueForTests('powersync_rows_replicated_total')) ?? 0;
+
+    while (true) {
+      const count =
+        ((await Metrics.getInstance().getMetricValueForTests('powersync_rows_replicated_total')) ?? 0) - startRowCount;
+
+      if (count >= stopAfter) {
+        break;
+      }
+      await timers.setTimeout(1);
+    }
+
+    // 3. `UPDATE table SET id = 0 WHERE id = 2000`
+    const rs = await pool.query(
+      `UPDATE test_data SET id = ${options.moveTo} WHERE id = ${options.lastId} RETURNING id`
+    );
+    expect(rs.rows.length).toEqual(1);
+
+    // 4. Replicate the rest of the table.
+    await p;
+
+    // 5. Logical replication picks up the UPDATE above, but it is missing the TOAST column.
+    context.startStreaming();
+
+    // 6. If all went well, the "resnapshot" process would take care of this.
+    const data = await context.getBucketData('global[]', undefined, {});
+    const reduced = reduceBucket(data);
+
+    const movedRow = reduced.find((row) => row.object_id == String(options.moveToJs));
+    expect(movedRow?.data).toEqual(JSON.stringify({ id: options.moveToJs, description: largeDescription }));
+
+    expect(reduced.length).toEqual(2001);
+  }
+}
diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts
@@ -1,12 +1,11 @@
-import { MONGO_STORAGE_FACTORY, StorageFactory, StorageOptions } from '@core-tests/util.js';
+import { MONGO_STORAGE_FACTORY, StorageFactory } from '@core-tests/util.js';
+import { Metrics } from '@powersync/service-core';
+import * as timers from 'timers/promises';
 import { describe, expect, test } from 'vitest';
 import { populateData } from '../../dist/utils/populate_test_data.js';
 import { env } from './env.js';
 import { TEST_CONNECTION_OPTIONS } from './util.js';
 import { WalStreamTestContext } from './wal_stream_utils.js';
-import * as timers from 'timers/promises';
-import { Metrics, reduceBucket } from '@powersync/service-core';
-import * as crypto from 'node:crypto';
 
 describe('batch replication tests - mongodb', { timeout: 120_000 }, function () {
   // These are slow but consistent tests.
@@ -369,76 +368,6 @@ function defineBatchTests(factory: StorageFactory) {
     expect(data.length).toEqual(11002 + deletedRowOps.length);
   }
 
-  test('chunked snapshot edge case', async () => {
-    // 1. Start with 10k rows, one row with id = 10000, and a large TOAST value in another column.
-    // 2. Replicate one batch of rows (id < 10000).
-    // 3. `UPDATE table SET id = 0 WHERE id = 10000`
-    // 4. Replicate the rest of the table.
-    // 5. Logical replication picks up the UPDATE above, but it is missing the TOAST column.
-    // 6. We end up with a row that has a missing TOAST column.
-
-    await using context = await WalStreamTestContext.open(factory, {
-      // We need to use a smaller chunk size here, so that we can run a query in between chunks
-      walStreamOptions: { snapshotChunkSize: 100 }
-    });
-
-    await context.updateSyncRules(`bucket_definitions:
-  global:
-    data:
-      - SELECT * FROM test_data`);
-    const { pool } = context;
-
-    await pool.query(`CREATE TABLE test_data(id integer primary key, description text)`);
-
-    // 1. Start with 10k rows, one row with id = 10000...
-    await pool.query({
-      statement: `INSERT INTO test_data(id, description) SELECT i, 'foo' FROM generate_series(1, 10000) i`
-    });
-
-    // ...and a large TOAST value in another column.
-    // Toast value, must be > 8kb after compression
-    const largeDescription = crypto.randomBytes(20_000).toString('hex');
-    await pool.query({
-      statement: 'UPDATE test_data SET description = $1 WHERE id = 10000',
-      params: [{ type: 'varchar', value: largeDescription }]
-    });
-
-    // 2. Replicate one batch of rows (id < 10000).
-    // Our "stopping point" here is not quite deterministic.
-    const p = context.replicateSnapshot();
-
-    const stopAfter = 1_000;
-    const startRowCount = (await Metrics.getInstance().getMetricValueForTests('powersync_rows_replicated_total')) ?? 0;
-
-    while (true) {
-      const count =
-        ((await Metrics.getInstance().getMetricValueForTests('powersync_rows_replicated_total')) ?? 0) - startRowCount;
-
-      if (count >= stopAfter) {
-        break;
-      }
-      await timers.setTimeout(1);
-    }
-
-    // 3. `UPDATE table SET id = 0 WHERE id = 10000`
-    await pool.query('UPDATE test_data SET id = 0 WHERE id = 10000');
-
-    // 4. Replicate the rest of the table.
-    await p;
-
-    // 5. Logical replication picks up the UPDATE above, but it is missing the TOAST column.
-    context.startStreaming();
-
-    // 6. If all went well, the "resnapshot" process would take care of this.
-    const data = await context.getBucketData('global[]', undefined, {});
-    const reduced = reduceBucket(data);
-
-    const movedRow = reduced.find((row) => row.object_id === '0');
-    expect(movedRow?.data).toEqual(`{"id":0,"description":"${largeDescription}"}`);
-
-    expect(reduced.length).toEqual(10_001);
-  });
-
   function printMemoryUsage() {
     const memoryUsage = process.memoryUsage();