Skip to content

Commit

Permalink
[wip] Check disk space
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Jul 7, 2023
1 parent 64f9325 commit b411c58
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
4 changes: 3 additions & 1 deletion augur/filter/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from tempfile import NamedTemporaryFile
from . import constants
from .dates import parse_dates
from .io import initialize_input_source_table, import_metadata, import_sequence_index, print_debug_report, write_outputs
from .io import check_disk_space, initialize_input_source_table, import_metadata, import_sequence_index, print_debug_report, write_outputs
from .include_exclude_rules import apply_filters, construct_filters
from .report import print_report
from .subsample import apply_subsampling
Expand All @@ -19,6 +19,8 @@ def run(args: Namespace):
if args.debug:
constants.RUNTIME_DEBUG = True

check_disk_space(args.metadata)

initialize_input_source_table()

import_metadata(args.metadata, args.metadata_id_columns, args.metadata_delimiters)
Expand Down
30 changes: 30 additions & 0 deletions augur/filter/io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import io
import os
from typing import Iterable, Sequence, Set
from tempfile import NamedTemporaryFile

import xopen
from augur.errors import AugurError
from augur.filter.debug import add_debugging
from augur.index import (
Expand All @@ -19,6 +22,33 @@
from . import constants


def check_disk_space(metadata_file: str):
"""Check whether there is enough disk space for augur filter to run, and
raise an error if there is not enough.
"""
# The database file is expected to be about 2x the disk space used by
# uncompressed metadata. This is an estimation based on test runs.

import shutil
total, used, free = shutil.disk_usage(__file__)

metadata = Metadata(metadata_file, id_columns=("strain", "name"))

try:
# Use threads=0 to force usage of Python libraries which implement seek and tell.
with metadata.open(threads=0) as f:
# This works for some streams, including uncompressed and gzip.
f.seek(0, os.SEEK_END)
size = f.tell()
if free < size * 2:
raise AugurError(f"Not enough disk space. augur filter will need to write a database file with expected size {_human_readable_size(size * 2)}.")
return
except io.UnsupportedOperation:
print_err("WARNING: File size of metadata could not be determined. If the uncompressed data is large, you may run out of disk space.")
# FIXME: For ZSTD files, try using frame content sizes (if available).
# ¹ https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header


def import_priorities_table(path):
"""Import a priorities file into the database."""
try:
Expand Down

0 comments on commit b411c58

Please sign in to comment.