Skip to content

Commit

Permalink
io: Repurpose delimiter inference function to return dialect
Browse files Browse the repository at this point in the history
Delimiter is one of many inferrable properties of a tabular file, all of
which are encompassed by the csv.Dialect class. Since the dialect must
be retrieved to identify the delimiter, this both simplifies the
function and opens the door to access other dialect properties in the
future.
  • Loading branch information
victorlin committed Jan 31, 2024
1 parent 5be2639 commit ff98de7
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions augur/io/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def read_metadata(metadata_file, delimiters=DEFAULT_DELIMITERS, id_columns=DEFAU
"""
kwargs = {
"sep": _get_delimiter(metadata_file, delimiters),
"sep": _get_dialect(metadata_file, delimiters).delimiter,
"engine": "c",
"skipinitialspace": True,
"na_filter": False,
Expand Down Expand Up @@ -190,7 +190,7 @@ def read_table_to_dict(table, delimiters, duplicate_reporting=DataErrorMethod.ER
handle = chain(table_sample_file, handle)

try:
# Note: this sort of duplicates _get_delimiter(), but it's easier if
# Note: this sort of duplicates _get_dialect(), but it's easier if
# this is separate since it handles non-seekable buffers.
dialect = csv.Sniffer().sniff(table_sample, delimiters)
except csv.Error as error:
Expand Down Expand Up @@ -474,8 +474,8 @@ def write_records_to_tsv(records, output_file):
tsv_writer.writerow(record)


def _get_delimiter(path: str, valid_delimiters: Iterable[str]):
"""Get the delimiter of a file given a list of valid delimiters."""
def _get_dialect(path: str, valid_delimiters: Iterable[str]):
"""Get the dialect of a file given a list of valid delimiters."""

for delimiter in valid_delimiters:
if len(delimiter) != 1:
Expand All @@ -484,7 +484,7 @@ def _get_delimiter(path: str, valid_delimiters: Iterable[str]):
with open_file(path) as file:
try:
# Infer the delimiter from the first line.
return csv.Sniffer().sniff(file.readline(), "".join(valid_delimiters)).delimiter
return csv.Sniffer().sniff(file.readline(), "".join(valid_delimiters))
except csv.Error as error:
# This assumes all csv.Errors imply a delimiter issue. That might
# change in a future Python version.
Expand Down

0 comments on commit ff98de7

Please sign in to comment.