From f9d5c56dfd2500614111fd36320b5d52319019ac Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Mon, 3 Apr 2023 10:17:56 -0700 Subject: [PATCH] Fix support for VCF sequence indexes --- augur/filter/io.py | 7 ++++++- augur/io/tabular_file.py | 15 +++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/augur/filter/io.py b/augur/filter/io.py index c0f976512..511473f04 100644 --- a/augur/filter/io.py +++ b/augur/filter/io.py @@ -146,7 +146,12 @@ def import_sequence_index(args): # Load the sequence index, if a path exists. if sequence_index_path: - sequence_index = TabularFile(sequence_index_path, header=True, delimiters=[SEQUENCE_INDEX_DELIMITER]) + try: + sequence_index = TabularFile(sequence_index_path, header=True, delimiters=[SEQUENCE_INDEX_DELIMITER]) + except InvalidDelimiter: + # This can happen for single-column files (e.g. VCF sequence indexes). + # If so, use a tab character as an arbitrary delimiter. + sequence_index = TabularFile(sequence_index_path, header=True, delimiter='\t') with Sqlite3Database(constants.RUNTIME_DB_FILE, mode="rw") as db: # Import the sequence index. _import_tabular_file(sequence_index, db, constants.SEQUENCE_INDEX_TABLE) diff --git a/augur/io/tabular_file.py b/augur/io/tabular_file.py index 3e7596f6c..b5ba61ea3 100644 --- a/augur/io/tabular_file.py +++ b/augur/io/tabular_file.py @@ -24,13 +24,15 @@ class TabularFile(File): context-specific usage. """ - def __init__(self, path: str, delimiters: Iterable[str] = None, + def __init__(self, path: str, delimiter: str = None, delimiters: Iterable[str] = None, header: bool = False, columns: Iterable[str] = None): """ Parameters ---------- path Path of tabular file. + delimiter + Use this as the delimiter. delimiters List of possible delimiters to use, in order of precedence. header @@ -40,11 +42,16 @@ def __init__(self, path: str, delimiters: Iterable[str] = None, """ super().__init__(path) - if delimiters is None: + if delimiter and delimiters: + raise ValueError("At most one of delimiter and delimiters can be set.") + + if not delimiter and not delimiters: delimiters = DEFAULT_DELIMITERS - self.delimiter = get_delimiter(self.path, delimiters) - """Delimiter of tabular file.""" + if delimiter: + self.delimiter = delimiter + else: + self.delimiter = get_delimiter(self.path, delimiters) if (not header and not columns) or (header and columns): raise ValueError("Tabular file must have either a header row or column names specified, but not both.")