diff --git a/augur/dates/__init__.py b/augur/dates/__init__.py index 2b31e0cb7..a8ccd0c22 100644 --- a/augur/dates/__init__.py +++ b/augur/dates/__init__.py @@ -76,37 +76,6 @@ def numeric_date_type(date): except InvalidDate as error: raise argparse.ArgumentTypeError(str(error)) from error -def is_date_ambiguous(date, ambiguous_by): - """ - Returns whether a given date string in the format of YYYY-MM-DD is ambiguous by a given part of the date (e.g., day, month, year, or any parts). - - Parameters - ---------- - date : str - Date string in the format of YYYY-MM-DD - ambiguous_by : str - Field of the date string to test for ambiguity ("day", "month", "year", "any") - """ - date_components = date.split('-', 2) - - if len(date_components) == 3: - year, month, day = date_components - elif len(date_components) == 2: - year, month = date_components - day = "XX" - else: - year = date_components[0] if date_components[0] else 'X' - month = "XX" - day = "XX" - - # Determine ambiguity hierarchically such that, for example, an ambiguous - # month implicates an ambiguous day even when day information is available. - return any(( - "X" in year, - "X" in month and ambiguous_by in ("any", "month", "day"), - "X" in day and ambiguous_by in ("any", "day") - )) - def get_numerical_date_from_value(value, fmt=None, min_max_year=None, ambiguity_resolver='both'): value = str(value) if re.match(r'^-*\d+\.\d+$', value): diff --git a/tests/dates/test_dates.py b/tests/dates/test_dates.py index 1427ea3fa..c7ae1dcec 100644 --- a/tests/dates/test_dates.py +++ b/tests/dates/test_dates.py @@ -60,35 +60,6 @@ def test_get_numerical_date_from_value_current_day_limit(self): == pytest.approx(2000.138, abs=1e-3) ) - def test_is_date_ambiguous(self): - """is_date_ambiguous should return true for ambiguous dates and false for valid dates.""" - # Test complete date strings with ambiguous values. - assert dates.is_date_ambiguous("2019-0X-0X", "any") - assert dates.is_date_ambiguous("2019-XX-09", "month") - assert dates.is_date_ambiguous("2019-03-XX", "day") - assert dates.is_date_ambiguous("201X-03-09", "year") - assert dates.is_date_ambiguous("20XX-01-09", "month") - assert dates.is_date_ambiguous("2019-XX-03", "day") - assert dates.is_date_ambiguous("20XX-01-03", "day") - - # Test incomplete date strings with ambiguous values. - assert dates.is_date_ambiguous("2019", "any") - assert dates.is_date_ambiguous("201X", "year") - assert dates.is_date_ambiguous("2019-XX", "month") - assert dates.is_date_ambiguous("2019-10", "day") - assert dates.is_date_ambiguous("2019-XX", "any") - assert dates.is_date_ambiguous("2019-XX", "day") - - # Test complete date strings without ambiguous dates for the requested field. - assert not dates.is_date_ambiguous("2019-09-03", "any") - assert not dates.is_date_ambiguous("2019-03-XX", "month") - assert not dates.is_date_ambiguous("2019-09-03", "day") - assert not dates.is_date_ambiguous("2019-XX-XX", "year") - - # Test incomplete date strings without ambiguous dates for the requested fields. - assert not dates.is_date_ambiguous("2019", "year") - assert not dates.is_date_ambiguous("2019-10", "month") - def test_get_numerical_dates_dict_error(self): """Using get_numerical_dates with metadata represented as a dict should raise an error.""" metadata = { diff --git a/tests/filter/__init__.py b/tests/filter/__init__.py new file mode 100644 index 000000000..0d004779d --- /dev/null +++ b/tests/filter/__init__.py @@ -0,0 +1,16 @@ +import argparse +import shlex +from augur.filter import register_arguments + + +def parse_args(args: str): + parser = argparse.ArgumentParser() + register_arguments(parser) + return parser.parse_args(shlex.split(args)) + + +def write_metadata(tmpdir, metadata): + fn = str(tmpdir / "metadata.tsv") + with open(fn, "w") as fh: + fh.write("\n".join(("\t".join(md) for md in metadata))) + return fn diff --git a/tests/filter/test_exclude_ambiguous_dates_by.py b/tests/filter/test_exclude_ambiguous_dates_by.py new file mode 100644 index 000000000..788e0138c --- /dev/null +++ b/tests/filter/test_exclude_ambiguous_dates_by.py @@ -0,0 +1,60 @@ +# This file contains functional tests that would normally be written as +# Cram-style tests. However, pytest is nice here since it is easy to use with +# parameterized inputs/outputs (not straightforward to set up for Cram tests¹). +# ¹ https://github.com/nextstrain/augur/pull/1183#discussion_r1142687476 + +import pytest + +from augur.errors import AugurError +from augur.filter._run import run + +from . import parse_args, write_metadata + + +@pytest.mark.parametrize( + "date, ambiguity", + [ + # Test complete date strings with ambiguous values. + ("2019-0X-0X", "any"), + ("2019-XX-XX", "month"), + ("2019-XX-XX", "day"), + ("2019-03-XX", "day"), + ("201X-XX-XX", "year"), + ("201X-XX-XX", "month"), + ("201X-XX-XX", "day"), + + # Test incomplete date strings with ambiguous values. + ("2019", "month"), + ("2019", "day"), + ("2019", "any"), + ("201X", "year"), + ("201X", "month"), + ("201X", "day"), + ("201X", "any"), + ], +) +def test_date_is_dropped(tmpdir, date, ambiguity): + metadata = write_metadata(tmpdir, (("strain","date"), + ("SEQ1" , date))) + args = parse_args(f'--metadata {metadata} --exclude-ambiguous-dates-by {ambiguity}') + with pytest.raises(AugurError, match="All samples have been dropped"): + run(args) + +@pytest.mark.parametrize( + "date, ambiguity", + [ + # Test complete date strings without the specified level of ambiguity. + ("2019-09-03", "any"), + ("2019-03-XX", "month"), + ("2019-09-03", "day"), + ("2019-XX-XX", "year"), + + # Test incomplete date strings without the specified level of ambiguity. + ("2019", "year"), + ], +) +def test_date_is_not_dropped(tmpdir, date, ambiguity): + metadata = write_metadata(tmpdir, (("strain","date"), + ("SEQ1" , date))) + args = parse_args(f'--metadata {metadata} --exclude-ambiguous-dates-by {ambiguity}') + run(args) diff --git a/tests/filter/test_relative_dates.py b/tests/filter/test_relative_dates.py index 39ea34a04..7d9d1f78f 100644 --- a/tests/filter/test_relative_dates.py +++ b/tests/filter/test_relative_dates.py @@ -4,26 +4,12 @@ # straightforward to set up for Cram tests¹). # ¹ https://github.com/nextstrain/augur/pull/1183#discussion_r1142687476 -import argparse from freezegun import freeze_time import pytest -import shlex -from augur.filter import register_arguments from augur.filter._run import run - -def parse_args(args): - parser = argparse.ArgumentParser() - register_arguments(parser) - return parser.parse_args(shlex.split(args)) - - -def write_metadata(tmpdir, metadata): - fn = str(tmpdir / "metadata.tsv") - with open(fn, "w") as fh: - fh.write("\n".join(("\t".join(md) for md in metadata))) - return fn +from . import parse_args, write_metadata @freeze_time("2020-03-25")