diff --git a/ingest/rules/nextclade.smk b/ingest/rules/nextclade.smk index a70d994..ae8ab1f 100644 --- a/ingest/rules/nextclade.smk +++ b/ingest/rules/nextclade.smk @@ -61,14 +61,12 @@ rule run_nextclade: """ -rule join_metadata_and_nextclade: +rule nextclade_metadata: input: nextclade="results/nextclade.tsv", - metadata="data/subset_metadata.tsv", output: - metadata="results/metadata.tsv", + nextclade_metadata=temp("results/nextclade_metadata.tsv"), params: - metadata_id_field=config["curate"]["output_id_field"], nextclade_id_field=config["nextclade"]["id_field"], nextclade_field_map=[f"{old}={new}" for old, new in config["nextclade"]["field_map"].items()], nextclade_fields=",".join(config["nextclade"]["field_map"].values()), @@ -80,13 +78,28 @@ rule join_metadata_and_nextclade: --field-map {params.nextclade_field_map:q} \ --output-metadata - \ | tsv-select --header --fields {params.nextclade_fields:q} \ - | tsv-join -H \ - --filter-file - \ - --key-fields {params.nextclade_id_field} \ - --data-fields {params.metadata_id_field} \ - --append-fields '*' \ - --write-all ? \ - {input.metadata} \ - | tsv-select -H --exclude {params.nextclade_id_field} \ - > {output.metadata} + > {output.nextclade_metadata:q} + """ + + +rule join_metadata_and_nextclade: + input: + metadata="data/subset_metadata.tsv", + nextclade_metadata="results/nextclade_metadata.tsv", + output: + metadata="results/metadata.tsv", + params: + metadata_id_field=config["curate"]["output_id_field"], + nextclade_id_field=config["nextclade"]["id_field"], + shell: + r""" + augur merge \ + --metadata \ + metadata={input.metadata:q} \ + nextclade={input.nextclade_metadata:q} \ + --metadata-id-columns \ + metadata={params.metadata_id_field:q} \ + nextclade={params.nextclade_id_field:q} \ + --output-metadata {output.metadata:q} \ + --no-source-columns """