Skip to content

Commit

Permalink
Handle BOM in CSV files, closes #250
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed May 29, 2021
1 parent 668e8c9 commit 8de5595
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
2 changes: 1 addition & 1 deletion sqlite_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ def insert_upsert_implementation(
raise click.ClickException("Use just one of --nl, --csv or --tsv")
if encoding and not (csv or tsv):
raise click.ClickException("--encoding must be used with --csv or --tsv")
encoding = encoding or "utf-8"
encoding = encoding or "utf-8-sig"
buffered = io.BufferedReader(json_file, buffer_size=4096)
decoded = io.TextIOWrapper(buffered, encoding=encoding)
if pk and len(pk) == 1:
Expand Down
25 changes: 25 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1915,3 +1915,28 @@ def test_attach(tmpdir):
{"id": 1, "text": "foo"},
{"id": 1, "text": "bar"},
]


def test_csv_insert_bom(tmpdir):
db_path = str(tmpdir / "test.db")
bom_csv_path = str(tmpdir / "bom.csv")
with open(bom_csv_path, "wb") as fp:
fp.write(b"\xef\xbb\xbfname,age\nCleo,5")
result = CliRunner().invoke(
cli.cli,
["insert", db_path, "broken", bom_csv_path, "--encoding", "utf-8", "--csv"],
catch_exceptions=False,
)
assert result.exit_code == 0
result2 = CliRunner().invoke(
cli.cli,
["insert", db_path, "fixed", bom_csv_path, "--csv"],
catch_exceptions=False,
)
assert result2.exit_code == 0
db = Database(db_path)
tables = db.execute("select name, sql from sqlite_master").fetchall()
assert tables == [
("broken", "CREATE TABLE [broken] (\n [\ufeffname] TEXT,\n [age] TEXT\n)"),
("fixed", "CREATE TABLE [fixed] (\n [name] TEXT,\n [age] TEXT\n)"),
]

0 comments on commit 8de5595

Please sign in to comment.