Skip to content

Commit

Permalink
Add argument to skip strain indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Dec 31, 2022
1 parent 0d6ad2e commit 5ec7ca9
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
5 changes: 4 additions & 1 deletion augur/db/import_/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ def register_parser(parent_subparsers):
parser.add_argument('--output-sqlite3', required=True, help="Destination SQLite3 database file.")
parser.add_argument('--metadata-table', default=DEFAULT_IMPORTED_METADATA_TABLE, help="Table name to store metadata in.")
parser.add_argument('--sequences-table', default=DEFAULT_IMPORTED_SEQUENCES_TABLE, help="Table name to store sequences in.")
parser.add_argument('--skip-strain-indexing', action='store_true', help="""
Skip adding an index on strain names. This speeds up data importing and saves some disk space, but
comes at the cost of increased execution time for queries on strain name.""")
return parser


Expand All @@ -23,7 +26,7 @@ def run(args):
if args.output_sqlite3:
import_sqlite3(args.metadata, args.metadata_table,
args.sequences, args.sequences_table,
args.output_sqlite3)
args.output_sqlite3, args.skip_strain_indexing)


def validate_args(args):
Expand Down
8 changes: 5 additions & 3 deletions augur/db/import_/sqlite3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,25 @@ class Sqlite3ImportError(AugurError):

def import_(metadata_file:str, metadata_table_name:str,
sequences_file:str, sequences_table_name:str,
db_file:str):
db_file:str, skip_strain_indexing:bool):
"""Import data into a SQLite3 database file."""
db_file_existed = os.path.exists(db_file)
with Sqlite3Database(db_file) as database:
try:
if bool(metadata_file and metadata_table_name):
metadata = Metadata(metadata_file)
import_metadata(metadata, metadata_table_name, database)
create_index(metadata.id_column, metadata_table_name, database)
if not skip_strain_indexing:
create_index(metadata.id_column, metadata_table_name, database)

if sequences_file and sequences_table_name:
# Get ID column name from metadata if available.
id_column = metadata.id_column if bool(metadata_file and metadata_table_name) else DEFAULT_SEQUENCES_ID_COLUMN

sequences = Sequences(sequences_file)
import_sequences(sequences, id_column, sequences_table_name, database)
create_index(id_column, sequences_table_name, database)
if not skip_strain_indexing:
create_index(id_column, sequences_table_name, database)
except Sqlite3ImportError as e:
# Delete the database file if it was created for this import.
if not db_file_existed:
Expand Down

0 comments on commit 5ec7ca9

Please sign in to comment.