Skip to content

Commit

Permalink
Add insert --truncate option
Browse files Browse the repository at this point in the history
Deletes all rows in the table (if it exists) before inserting new rows.
SQLite doesn't implement a TRUNCATE TABLE statement but does optimize an
unqualified DELETE FROM.

This can be handy if you want to refresh the entire contents of a table
but a) don't have a PK (so can't use --replace), b) don't want the table
to disappear (even briefly) for other connections, and c) have to handle
records that used to exist being deleted.

Ideally the replacement of rows would appear instantaneous to other
connections by putting the DELETE + INSERT in a transaction, but this is
very difficult without breaking other code as the current transaction
handling is inconsistent and non-systematic.  There exists the
possibility for the DELETE to succeed but the INSERT to fail, leaving an
empty table.  This is not much worse, however, than the current
possibility of one chunked INSERT succeeding and being committed while
the next chunked INSERT fails, leaving a partially complete operation.
  • Loading branch information
tsibley authored and simonw committed Jul 8, 2020
1 parent f8277d0 commit ae45933
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 1 deletion.
4 changes: 4 additions & 0 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,10 @@ You can skip inserting any records that have a primary key that already exists u

$ sqlite-utils insert dogs.db dogs dogs.json --ignore

You can delete all the existing rows in the table before inserting the new records using ``--truncate``::

$ sqlite-utils insert dogs.db dogs dogs.json --truncate

You can also import newline-delimited JSON using the ``--nl`` option. Since `Datasette <https://datasette.readthedocs.io/>`__ can export newline-delimited JSON, you can combine the two tools like so::

$ curl -L "https://latest.datasette.io/fixtures/facetable.json?_shape=array&_nl=on" \
Expand Down
3 changes: 3 additions & 0 deletions docs/python-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,9 @@ The function can accept an iterator or generator of rows and will commit them ac
You can skip inserting any records that have a primary key that already exists using ``ignore=True``. This works with both ``.insert({...}, ignore=True)`` and ``.insert_all([...], ignore=True)``.

You can delete all the existing rows in the table before inserting the new
records using ``truncate=True``. This is useful if you want to replace the data in the table.

.. _python_api_insert_replace:

Insert-replacing data
Expand Down
11 changes: 10 additions & 1 deletion sqlite_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ def insert_upsert_implementation(
upsert,
ignore=False,
replace=False,
truncate=False,
not_null=None,
default=None,
):
Expand All @@ -442,7 +443,7 @@ def insert_upsert_implementation(
docs = json.load(json_file)
if isinstance(docs, dict):
docs = [docs]
extra_kwargs = {"ignore": ignore, "replace": replace}
extra_kwargs = {"ignore": ignore, "replace": replace, "truncate": truncate}
if not_null:
extra_kwargs["not_null"] = set(not_null)
if default:
Expand All @@ -465,6 +466,12 @@ def insert_upsert_implementation(
default=False,
help="Replace records if pk already exists",
)
@click.option(
"--truncate",
is_flag=True,
default=False,
help="Truncate table before inserting records, if table already exists",
)
def insert(
path,
table,
Expand All @@ -477,6 +484,7 @@ def insert(
alter,
ignore,
replace,
truncate,
not_null,
default,
):
Expand All @@ -499,6 +507,7 @@ def insert(
upsert=False,
ignore=ignore,
replace=replace,
truncate=truncate,
not_null=not_null,
default=default,
)
Expand Down
3 changes: 3 additions & 0 deletions sqlite_utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,7 @@ def insert_all(
alter=DEFAULT,
ignore=DEFAULT,
replace=DEFAULT,
truncate=False,
extracts=DEFAULT,
conversions=DEFAULT,
columns=DEFAULT,
Expand Down Expand Up @@ -1027,6 +1028,8 @@ def insert_all(
batch_size = max(1, min(batch_size, SQLITE_MAX_VARS // num_columns))
self.last_rowid = None
self.last_pk = None
if truncate and self.exists():
self.db.conn.execute("DELETE FROM [{}];".format(self.name))
for chunk in chunks(itertools.chain([first_record], records), batch_size):
chunk = list(chunk)
num_records_processed += len(chunk)
Expand Down
33 changes: 33 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,39 @@ def test_insert_replace(db_path, tmpdir):
)


def test_insert_truncate(db_path):
result = CliRunner().invoke(
cli.cli,
["insert", db_path, "from_json_nl", "-", "--nl", "--batch-size=1"],
input='{"foo": "bar", "n": 1}\n{"foo": "baz", "n": 2}',
)
assert 0 == result.exit_code, result.output
db = Database(db_path)
assert [
{"foo": "bar", "n": 1},
{"foo": "baz", "n": 2},
] == db.execute_returning_dicts("select foo, n from from_json_nl")
# Truncate and insert new rows
result = CliRunner().invoke(
cli.cli,
[
"insert",
db_path,
"from_json_nl",
"-",
"--nl",
"--truncate",
"--batch-size=1",
],
input='{"foo": "bam", "n": 3}\n{"foo": "bat", "n": 4}',
)
assert 0 == result.exit_code, result.output
assert [
{"foo": "bam", "n": 3},
{"foo": "bat", "n": 4},
] == db.execute_returning_dicts("select foo, n from from_json_nl")


def test_insert_alter(db_path, tmpdir):
result = CliRunner().invoke(
cli.cli,
Expand Down

0 comments on commit ae45933

Please sign in to comment.