Skip to content

db-dump: Reset sequences after data import #11506

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion script/import-database-dump.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,16 @@ psql -a "$DATABASE_NAME" < schema.sql
echo "Importing data"
psql -a "$DATABASE_NAME" < import.sql

cd "$ORIG_WD"

# Importing the database doesn't cause materialised views to be refreshed, so
# let's do that.
psql --command="REFRESH MATERIALIZED VIEW recent_crate_downloads" "$DATABASE_NAME"

# Reset all ID sequence values to match the imported data
echo "Resetting sequence values"
psql -a "$DATABASE_NAME" < "$(dirname "$0")/reset-sequences.sql"

# Importing the database also doesn't insert Diesel migration metadata, but we
# can infer that from the dump metadata and an up to date crates.io repo.
cd "$ORIG_WD"
python3 "$(dirname "$0")/infer-database-dump-version.py" -m "$DUMP_PATH/metadata.json" | psql -a "$DATABASE_NAME"
38 changes: 38 additions & 0 deletions script/reset-sequences.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
-- Reset all ID column sequences to their maximum values or 1
-- This script dynamically discovers all sequences associated with 'id' columns
-- and sets them to either the maximum id value in the table or 1 if empty

DO $$
DECLARE
rec RECORD;
max_id BIGINT;
BEGIN
FOR rec IN
WITH id_sequences AS (
SELECT
pg_class.relname AS sequence_name,
pg_class_tables.relname AS table_name
FROM pg_class
JOIN pg_depend ON pg_depend.objid = pg_class.oid
JOIN pg_class pg_class_tables ON pg_depend.refobjid = pg_class_tables.oid
JOIN pg_attribute ON pg_depend.refobjid = pg_attribute.attrelid AND pg_depend.refobjsubid = pg_attribute.attnum
JOIN pg_namespace ON pg_class_tables.relnamespace = pg_namespace.oid
WHERE pg_class.relkind = 'S' -- sequences
AND pg_attribute.attname = 'id' -- only 'id' columns
AND pg_namespace.nspname = 'public' -- only public schema
)
SELECT * FROM id_sequences
LOOP
-- Get the maximum id value from the table
EXECUTE format('SELECT MAX(id) FROM public.%I', rec.table_name) INTO max_id;

-- Reset the sequence to the max value or 1 if empty
--
-- Use is_called = false for empty tables (max_id IS NULL),
-- and is_called = true for populated tables
PERFORM setval('public.' || rec.sequence_name, COALESCE(max_id, 1), max_id IS NOT NULL);

-- Log the action
RAISE NOTICE 'Reset sequence % to % (table: %, is_called: %)', rec.sequence_name, COALESCE(max_id, 1), rec.table_name, max_id IS NOT NULL;
END LOOP;
END $$;
Loading