diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 68bd98684..cbc0555b4 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -694,7 +694,7 @@ def insert_upsert_implementation( raise click.ClickException("Use just one of --nl, --csv or --tsv") if encoding and not (csv or tsv): raise click.ClickException("--encoding must be used with --csv or --tsv") - encoding = encoding or "utf-8" + encoding = encoding or "utf-8-sig" buffered = io.BufferedReader(json_file, buffer_size=4096) decoded = io.TextIOWrapper(buffered, encoding=encoding) if pk and len(pk) == 1: diff --git a/tests/test_cli.py b/tests/test_cli.py index 17ce27d8f..3807eca52 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1915,3 +1915,28 @@ def test_attach(tmpdir): {"id": 1, "text": "foo"}, {"id": 1, "text": "bar"}, ] + + +def test_csv_insert_bom(tmpdir): + db_path = str(tmpdir / "test.db") + bom_csv_path = str(tmpdir / "bom.csv") + with open(bom_csv_path, "wb") as fp: + fp.write(b"\xef\xbb\xbfname,age\nCleo,5") + result = CliRunner().invoke( + cli.cli, + ["insert", db_path, "broken", bom_csv_path, "--encoding", "utf-8", "--csv"], + catch_exceptions=False, + ) + assert result.exit_code == 0 + result2 = CliRunner().invoke( + cli.cli, + ["insert", db_path, "fixed", bom_csv_path, "--csv"], + catch_exceptions=False, + ) + assert result2.exit_code == 0 + db = Database(db_path) + tables = db.execute("select name, sql from sqlite_master").fetchall() + assert tables == [ + ("broken", "CREATE TABLE [broken] (\n [\ufeffname] TEXT,\n [age] TEXT\n)"), + ("fixed", "CREATE TABLE [fixed] (\n [name] TEXT,\n [age] TEXT\n)"), + ]