Skip to content

Commit

Permalink
Fix support for VCF sequence indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Jun 17, 2023
1 parent e885c95 commit 27afe62
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
7 changes: 6 additions & 1 deletion augur/filter/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,12 @@ def import_sequence_index(args):

# Load the sequence index, if a path exists.
if sequence_index_path:
sequence_index = TabularFile(sequence_index_path, header=True, delimiters=[SEQUENCE_INDEX_DELIMITER])
try:
sequence_index = TabularFile(sequence_index_path, header=True, delimiters=[SEQUENCE_INDEX_DELIMITER])
except InvalidDelimiter:
# This can happen for single-column files (e.g. VCF sequence indexes).
# If so, use a tab character as an arbitrary delimiter.
sequence_index = TabularFile(sequence_index_path, header=True, delimiter='\t')
with Sqlite3Database(constants.RUNTIME_DB_FILE, mode="rw") as db:
# Import the sequence index.
_import_tabular_file(sequence_index, db, constants.SEQUENCE_INDEX_TABLE)
Expand Down
15 changes: 11 additions & 4 deletions augur/io/tabular_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ class TabularFile(File):
context-specific usage.
"""

def __init__(self, path: str, delimiters: Iterable[str] = None,
def __init__(self, path: str, delimiter: str = None, delimiters: Iterable[str] = None,
header: bool = False, columns: Iterable[str] = None):
"""
Parameters
----------
path
Path of tabular file.
delimiter
Use this as the delimiter.
delimiters
List of possible delimiters to use, in order of precedence.
header
Expand All @@ -40,11 +42,16 @@ def __init__(self, path: str, delimiters: Iterable[str] = None,
"""
super().__init__(path)

if delimiters is None:
if delimiter and delimiters:
raise ValueError("At most one of delimiter and delimiters can be set.")

if not delimiter and not delimiters:
delimiters = DEFAULT_DELIMITERS

self.delimiter = get_delimiter(self.path, delimiters)
"""Delimiter of tabular file."""
if delimiter:
self.delimiter = delimiter
else:
self.delimiter = get_delimiter(self.path, delimiters)

if (not header and not columns) or (header and columns):
raise ValueError("Tabular file must have either a header row or column names specified, but not both.")
Expand Down

0 comments on commit 27afe62

Please sign in to comment.