From 06030e8211c2e266abdbb7d61e67a1c5750cfcdc Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 1 Jul 2024 11:22:28 -0700 Subject: [PATCH] format-dates: Return fully masked dates for empty date fields If a date field is empty, it should be completely masked to indicate that the date is unknown. Resolves --- augur/curate/format_dates.py | 12 +++++++++++- .../curate/cram/format-dates/empty-date-field.t | 11 ++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/augur/curate/format_dates.py b/augur/curate/format_dates.py index 4fc94595a..7e39830cd 100644 --- a/augur/curate/format_dates.py +++ b/augur/curate/format_dates.py @@ -117,6 +117,10 @@ def format_date(date_string, expected_formats): >>> expected_formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%dT%H:%M:%SZ', '%m-%d'] + >>> format_date("", expected_formats) + 'XXXX-XX-XX' + >>> format_date(" ", expected_formats) + 'XXXX-XX-XX' >>> format_date("01-01", expected_formats) 'XXXX-XX-XX' >>> format_date("2020", expected_formats) @@ -133,6 +137,10 @@ def format_date(date_string, expected_formats): '2020-01-15' """ + date_string = date_string.strip() + if date_string == '': + return 'XXXX-XX-XX' + for date_format in expected_formats: try: parsed_date = datetime.strptime(date_string, date_format) @@ -180,7 +188,9 @@ def run(args, records): for field in args.date_fields: date_string = record.get(field) - if not date_string: + # TODO: This should raise an error if the expected date field does + # not exist in the the record + if date_string is None: continue formatted_date_string = format_date(date_string, args.expected_date_formats) diff --git a/tests/functional/curate/cram/format-dates/empty-date-field.t b/tests/functional/curate/cram/format-dates/empty-date-field.t index 7fde801ca..da33d5d01 100644 --- a/tests/functional/curate/cram/format-dates/empty-date-field.t +++ b/tests/functional/curate/cram/format-dates/empty-date-field.t @@ -2,19 +2,16 @@ Setup $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}" -Test empty date value. -This currently has the unexpected behavior of returning the empty string. +Test empty date value, which should be returned as a fully masked date. $ echo '{"record": 1, "date": ""}' \ > | ${AUGUR} curate format-dates \ > --date-fields "date" - {"record": 1, "date": ""} + {"record": 1, "date": "XXXX-XX-XX"} -Test whitespace only date value. -This currently raises an error. +Test whitespace only date value, which should be returned as a fully masked date. $ echo '{"record": 1, "date": " "}' \ > | ${AUGUR} curate format-dates \ > --date-fields "date" - ERROR: Unable to format date string ' ' in field 'date' of record 0. - [2] + {"record": 1, "date": "XXXX-XX-XX"}