From e74fc6b8e43bd7efc7af7cb079f83809f90d2f88 Mon Sep 17 00:00:00 2001 From: j23414 Date: Tue, 4 Apr 2023 20:39:35 -0700 Subject: [PATCH] fix: refactor to use augur library Refactored to use the augur library to load the metadata file. * Using the augur.io.read_metadata simplified creating the name_lookup dictionary * To keep coding style consistent, separated out main, parse_args, and methods * To be consistent with augur export v2, write out a minified json file --- bin/set_final_strain_name.py | 67 +++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/bin/set_final_strain_name.py b/bin/set_final_strain_name.py index 0036f2a5..a5d6fa1d 100755 --- a/bin/set_final_strain_name.py +++ b/bin/set_final_strain_name.py @@ -1,5 +1,29 @@ -import pandas as pd +#! /usr/bin/env python + import json, argparse +import augur + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Swaps out the strain names in the Auspice JSON with the final strain name", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--input-auspice-json", type=str, required=True, help="input auspice_json" + ) + parser.add_argument("--metadata", type=str, required=True, help="input data") + parser.add_argument( + "--display-strain-name", + type=str, + required=True, + help="field to use as strain name in auspice", + ) + parser.add_argument( + "--output", type=str, metavar="JSON", required=True, help="output Auspice JSON" + ) + return parser.parse_args() + def replace_name_recursive(node, lookup): if node["name"] in lookup: @@ -9,28 +33,29 @@ def replace_name_recursive(node, lookup): for child in node["children"]: replace_name_recursive(child, lookup) -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="Swaps out the strain names in the Auspice JSON with the final strain name", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") - parser.add_argument('--metadata', type=str, required=True, help="input data") - parser.add_argument('--display-strain-name', type=str, required=True, help="field to use as strain name in auspice") - parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") - args = parser.parse_args() +def set_final_strain_name(auspice_json, metadata_file, display_strain_name, output): + with open(auspice_json, "r") as fh: + data = json.load(fh) - metadata = pd.read_csv(args.metadata, sep='\t') - name_lookup = {} - for ri, row in metadata.iterrows(): - strain_id = row['strain'] - name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] + metadata = augur.io.read_metadata(metadata_file) + if display_strain_name not in metadata.columns: + with open(output, "w") as fh: + json.dump(data, fh, allow_nan=False, indent=None, separators=",:") + return - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) + name_lookup = metadata[[display_strain_name]].to_dict()[display_strain_name] + replace_name_recursive(data["tree"], name_lookup) + with open(output, "w") as fh: + json.dump(data, fh, allow_nan=False, indent=None, separators=",:") + + +def main(): + args = parse_args() + set_final_strain_name( + args.input_auspice_json, args.metadata, args.display_strain_name, args.output + ) - replace_name_recursive(data['tree'], name_lookup) - with open(args.output, 'w') as fh: - json.dump(data, fh) +if __name__ == "__main__": + main()