Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate table option for to sql #50088

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Bug fixes

Other
~~~~~
-
-
-

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Other enhancements
- :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`)
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
-
- Added ``"truncate"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` to truncate the existing table (:issue:`37210`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.notable_bug_fixes:
Expand Down
31 changes: 23 additions & 8 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def to_sql(
name: str,
con,
schema: str | None = None,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label: IndexLabel = None,
chunksize: int | None = None,
Expand All @@ -629,10 +629,11 @@ def to_sql(
schema : str, optional
Name of SQL schema in database to write to (if database flavor
supports this). If None, use default schema (default).
if_exists : {'fail', 'replace', 'append'}, default 'fail'
if_exists : {'fail', 'replace', 'append', 'truncate}, default 'fail'
- fail: If table exists, do nothing.
- replace: If table exists, drop it, recreate it, and insert data.
- append: If table exists, insert data. Create if does not exist.
- truncate: If table exists, truncate it, then insert data.
index : bool, default True
Write DataFrame index as a column.
index_label : str or sequence, optional
Expand Down Expand Up @@ -682,7 +683,7 @@ def to_sql(
`sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
`SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__
""" # noqa:E501
if if_exists not in ("fail", "replace", "append"):
if if_exists not in ("fail", "replace", "append", "truncate"):
raise ValueError(f"'{if_exists}' is not valid for if_exists")

if isinstance(frame, Series):
Expand Down Expand Up @@ -854,6 +855,8 @@ def create(self) -> None:
if self.if_exists == "replace":
self.pd_sql.drop_table(self.name, self.schema)
self._execute_create()
elif self.if_exists == "truncate":
self.pd_sql.trunc_table(self.name, self.schema)
elif self.if_exists == "append":
pass
else:
Expand Down Expand Up @@ -1311,7 +1314,7 @@ def to_sql(
self,
frame,
name,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label=None,
schema=None,
Expand Down Expand Up @@ -1642,7 +1645,7 @@ def prep_table(
self,
frame,
name,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool | str | list[str] | None = True,
index_label=None,
schema=None,
Expand Down Expand Up @@ -1718,7 +1721,7 @@ def to_sql(
self,
frame,
name: str,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label=None,
schema: str | None = None,
Expand All @@ -1736,10 +1739,11 @@ def to_sql(
frame : DataFrame
name : string
Name of SQL table.
if_exists : {'fail', 'replace', 'append'}, default 'fail'
if_exists : {'fail', 'replace', 'append', 'truncate'}, default 'fail'
- fail: If table exists, do nothing.
- replace: If table exists, drop it, recreate it, and insert data.
- append: If table exists, insert data. Create if does not exist.
- truncate: If table exists, truncate it, and insert data.
index : boolean, default True
Write DataFrame index as a column.
index_label : string or sequence, default None
Expand Down Expand Up @@ -1833,6 +1837,13 @@ def drop_table(self, table_name: str, schema: str | None = None) -> None:
self.get_table(table_name, schema).drop(bind=self.con)
self.meta.clear()

def trunc_table(self, table_name: str, schema: str | None = None) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens when you try to truncate a table that doesn't exist? Should this raise? If so can you add a test for that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the table doesn't exist, it should just create a new table - added a test for it.

Also, added a test for if truncate is selected and then new columns are designated to write to the table. This should throw an error on the database.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather we raise here if the DB doesn't support truncate. In the future there could be a use for a delete_from argument in addition to truncate, so merging the two here dependent on the DB is confusing

schema = schema or self.meta.schema
if self.has_table(table_name, schema):
self.meta.reflect(bind=self.con, only=[table_name], schema=schema)
self.execute(f"TRUNCATE TABLE {schema}.{table_name}")
self.meta.clear()

def _create_sql_schema(
self,
frame: DataFrame,
Expand Down Expand Up @@ -2181,10 +2192,11 @@ def to_sql(
frame: DataFrame
name: string
Name of SQL table.
if_exists: {'fail', 'replace', 'append'}, default 'fail'
if_exists: {'fail', 'replace', 'append', 'truncate}, default 'fail'
fail: If table exists, do nothing.
replace: If table exists, drop it, recreate it, and insert data.
append: If table exists, insert data. Create if it does not exist.
truncate: If table exists, truncate it, then insert data.
index : bool, default True
Write DataFrame index as a column
index_label : string or sequence, default None
Expand Down Expand Up @@ -2253,6 +2265,9 @@ def drop_table(self, name: str, schema: str | None = None) -> None:
drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}"
self.execute(drop_sql)

def trunc_table(self, name: str, schema: str | None = None) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this method should be deleted, or should explicitly raise a NotImplementedError for sqlite. Can you also set up a test called test_sqlite_truncate_raises that makes sure that happens? You'll see that pattern in many of the other tests

raise NotImplementedError("TRUNCATE not implemented on database")

def _create_sql_schema(
self,
frame,
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,36 @@ def test_to_sql_replace(self, test_frame1):

assert num_rows == num_entries

def test_to_sql_truncate(self, test_frame1):
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="fail")
# Add to table again
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="truncate")
assert sql.has_table("test_frame3", self.conn)

num_entries = len(test_frame1)
num_rows = count_rows(self.conn, "test_frame3")

assert num_rows == num_entries

def test_to_sql_truncate_no_table(self, test_frame1):
# creates new table if table doesn't exist
sql.to_sql(test_frame1, "test_frame_new", self.conn, if_exists="truncate")
Copy link
Member

@WillAyd WillAyd Dec 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think this should raise too? Feels a little strange to have truncate create a table

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though I guess this is consistent with replace

assert sql.has_table("test_frame_new")

def test_to_sql_truncate_new_columns(self, test_frame1, test_frame3):
sql.to_sql(test_frame3, "test_frame3", self.conn, if_exists="fail")
# truncate and attempt to add more columns
msg = "table test_frame3 has no column named C"
with pytest.raises(Exception, match=msg):
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="truncate")

def test_sqlite_truncate_raises(self, test_frame1):
msg = "TRUNCATE not implemented on database"
with pytest.raises(NotImplementedError, match=msg):
sql.to_sql(
test_frame1, "test_frame3", self.conn, if_exists="truncate"
)

def test_to_sql_append(self, test_frame1):
assert sql.to_sql(test_frame1, "test_frame4", self.conn, if_exists="fail") == 4

Expand Down