diff --git a/tests/functional/curate/cram/rename/drop-columns.t b/tests/functional/curate/cram/rename/drop-columns.t new file mode 100644 index 000000000..d31844ae4 --- /dev/null +++ b/tests/functional/curate/cram/rename/drop-columns.t @@ -0,0 +1,29 @@ + +Setup + + $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}" + + $ cat >records.ndjson <<~~ + > {"strain": "s_1", "country": "c_1", "accession": "a_1"} + > {"strain": "s_2", "country": "c_2", "accession": "a_2"} + > {"strain": "s_3", "country": "c_3", "accession": "a_3"} + > ~~ + +Rename the strain column to accession -- we don't move the strain column to where accession was, +We simply rename it and drop the (old) accession column. The alternate (which I don't like) produces: +{"country": "c_1", "accession": "s_1"} +{"country": "c_2", "accession": "s_2"} +{"country": "c_3", "accession": "s_3"} + + $ $AUGUR curate rename --field-map "strain=accession" --force < <(cat records.ndjson) + {"accession": "s_1", "country": "c_1"} + {"accession": "s_2", "country": "c_2"} + {"accession": "s_3", "country": "c_3"} + +Similarly, renaming accession to strain re-names the column "in-place" and drops the (old) strain column + + $ $AUGUR curate rename --field-map "accession=strain" --force < <(cat records.ndjson) + {"country": "c_1", "strain": "a_1"} + {"country": "c_2", "strain": "a_2"} + {"country": "c_3", "strain": "a_3"} + diff --git a/tests/functional/curate/cram/rename/force-behaviour.t b/tests/functional/curate/cram/rename/force-behaviour.t new file mode 100644 index 000000000..5886c599c --- /dev/null +++ b/tests/functional/curate/cram/rename/force-behaviour.t @@ -0,0 +1,18 @@ +Setup + + $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}" + + + $ cat >records.ndjson <<~~ + > {"accession": "record_1", "country": ""} + > {"accession": "record_2", "country": "country_2"} + > ~~ + +Test that --force is required if a key exists, irregardless of its associated value +(This was not the behaviour in the precursor command `transform-field-names`) + + $ $AUGUR curate rename --field-map "accession=country" < <(cat records.ndjson) 1> out1.ndjson + WARNING: skipping rename of accession because record already has a field named country. + + $ diff records.ndjson out1.ndjson + diff --git a/tests/functional/curate/cram/rename/general.t b/tests/functional/curate/cram/rename/general.t new file mode 100644 index 000000000..f7f4401d7 --- /dev/null +++ b/tests/functional/curate/cram/rename/general.t @@ -0,0 +1,91 @@ +Setup + + $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}" + +The tests here use NDJSON I/O and don't explicitly test TSV I/O as we rely +on the general curate infrastructure to enforce that each row has the same +fields. See for more + + $ cat >records.ndjson <<~~ + > {"accession": "record_1", "country": "country_1"} + > {"accession": "record_2", "country": "country_2"} + > ~~ + +The --field-map argument is requried (error text not checked as it includes the entire argparse usage text) + + $ $AUGUR curate rename < <(cat records.ndjson) 2>/dev/null + [2] + +Rename "accession" to "strain" -- the order should be preserved (i.e. strain is first column) + + $ $AUGUR curate rename --field-map "accession=strain" < <(cat records.ndjson) + {"strain": "record_1", "country": "country_1"} + {"strain": "record_2", "country": "country_2"} + + +Rename "accession" to "country" - single error message is reported as we won't overwrite without --force +and we don't change the data + + $ $AUGUR curate rename --field-map "accession=country" < <(cat records.ndjson) > out2.ndjson + WARNING: skipping rename of accession because record already has a field named country. + + $ diff records.ndjson out2.ndjson + +Rename "accession" to "country" using --force + + $ $AUGUR curate rename --field-map "accession=country" --force < <(cat records.ndjson) + {"country": "record_1"} + {"country": "record_2"} + +Asking to rename multiple columns to the same new name is an error! + + $ $AUGUR curate rename --field-map "accession=foo" "country=foo" < <(cat records.ndjson) + ERROR: Asked to rename multiple fields to 'foo'. + [2] + +Asking to rename a non-existant column raises a warning. Using a warning not an error allows the command +to be idempotent. + + $ $AUGUR curate rename --field-map "strain=foo" < <(cat records.ndjson) 1> out3.ndjson + WARNING: Asked to rename field 'strain' (to 'foo') but it doesn't exist in the input data. + + $ diff records.ndjson out3.ndjson + + +Rename will re-order fields to match the first observed record (with any necessary changes applied) +This produces NDJSON output which more closely matches TSV output. + + $ cat >records.unordered.ndjson <<~~ + > {"accession": "record_1", "country": "country_1"} + > {"country": "country_2", "accession": "record_2"} + > ~~ + + $ $AUGUR curate rename --field-map "accession=id" --force < <(cat records.unordered.ndjson) + {"id": "record_1", "country": "country_1"} + {"id": "record_2", "country": "country_2"} + + +Using --field-map without a single '=' char is an error + $ $AUGUR curate rename --field-map "accession" < <(cat records.ndjson) + ERROR: The field-map 'accession' must contain a single '=' character. + [2] + +Using --field-map with more than a single '=' char is an error + $ $AUGUR curate rename --field-map "accession=id=strain" < <(cat records.ndjson) + ERROR: The field-map 'accession=id=strain' must contain a single '=' character. + [2] + +Using --field-map with spaces surrounding field names is OK (as long as you quote the arg appropriately) + $ $AUGUR curate rename --field-map " accession = strain " < <(cat records.ndjson) + {"strain": "record_1", "country": "country_1"} + {"strain": "record_2", "country": "country_2"} + +Using --field-map with an empty "old field" doesn't make sense + $ $AUGUR curate rename --field-map "=accession" < <(cat records.ndjson) + ERROR: The field-map '=accession' doesn't specify a name for the existing field. + [2] + +Using --field-map with an empty "new field" doesn't (yet) make sense + $ $AUGUR curate rename --field-map "accession=" < <(cat records.ndjson) + ERROR: The field-map 'accession=' doesn't specify a name for the new field. + [2]