From 408cd7d09f4c3c87b8d5c3bba65a2611bd40ce4d Mon Sep 17 00:00:00 2001 From: quentin0515 Date: Thu, 9 Dec 2021 11:28:40 -0800 Subject: [PATCH] dev output vcf --- README.md | 9 +++++++- combine.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++---- run_ttmars.sh | 1 + 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 40a214f..33aeaad 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ TT-Mars: S**t**ructural Varian**t**s Assess**m**ent B**a**sed on Haplotype-**r** `python combine.py output_dir num_X_chr` -## Positional parameters +## Positional arguments 1. `output_dir`: Output directory. 2. `if_hg38`: if reference is hg38 (True/False). @@ -34,6 +34,13 @@ TT-Mars: S**t**ructural Varian**t**s Assess**m**ent B**a**sed on Haplotype-**r** 11. `num_X_chr`: if male sample: 1; if female sample: 2. 12. `wlen_tp`: if TT-Mars validates wrong-length calls as TP (True/False). +## Optional arguments + +1. combine.py: +`-v/--vcf_out`: output results as vcf files, must be used together with `-f/--vcf_file`. +`-f VCF_FILE/--vcf_file VCF_FILE`: input vcf file, use as template. + + ## Example Output ttmars_combined_res.txt: diff --git a/combine.py b/combine.py index b7e0cb8..7b91ccb 100644 --- a/combine.py +++ b/combine.py @@ -1,14 +1,40 @@ import csv import sys -output_dir = sys.argv[1] + "/" -no_X_chr = sys.argv[2] - -if int(no_X_chr) == 1: +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("output_dir", + help="output directory") +parser.add_argument("no_X_chr", + choices=[1, 2], + help="male sample 1, female sample 2", + type=int) +parser.add_argument("-v", + "--vcf_out", + help="output results as vcf files, must be used together with -f/--vcf_file", + action="store_true") +parser.add_argument("-f", + "--vcf_file", + help="input vcf file, use as template") +args = parser.parse_args() + +if bool(args.vcf_out) ^ bool(args.vcf_file): + parser.error('-v/--vcf_out and -f/--vcf_file must be given together') + +output_dir = args.output_dir + "/" + +if int(args.no_X_chr) == 1: if_male = True else: if_male = False +if args.vcf_out: + if_vcf = True + in_vcf_file = args.vcf_file +else: + if_vcf = False + other_sv_res_file = output_dir+"ttmars_res.txt" regdup_res_file = output_dir+"ttmars_regdup_res.txt" @@ -73,6 +99,36 @@ g.write("\n") g.close() +#if output vcf +if if_vcf: + from pysam import VariantFile + vcf_in = VariantFile(in_vcf_file) + vcfh = vcf_in.header + #vcfh.add_meta('INFO', items=[('ID',"TTMars"), ('Number',1), ('Type','String'),('Description','TT-Mars NA12878 results: TP, FA, NA or .')]) + vcf_out_tp = VariantFile(output_dir+"ttmars_tp.vcf", 'w', header=vcfh) + vcf_out_fp = VariantFile(output_dir+"ttmars_fp.vcf", 'w', header=vcfh) + vcf_out_na = VariantFile(output_dir+"ttmars_na.vcf", 'w', header=vcfh) + + for rec in vcf_in.fetch(): + ref_name = rec.chrom + sv_type = rec.info['SVTYPE'] + sv_pos = rec.pos + sv_end = rec.stop + + try: + validation_res = sv_dict[(ref_name, int(sv_pos), int(sv_end), sv_type)][2] + if validation_res == 'True': + vcf_out_tp.write(rec) + elif validation_res == 'False': + vcf_out_fp.write(rec) + except: + vcf_out_na.write(rec) + + vcf_out_tp.close() + vcf_out_fp.close() + vcf_out_na.close() + vcf_in.close() + #remove files import os for name in ['assem1_non_cov_regions.bed', 'assem2_non_cov_regions.bed', diff --git a/run_ttmars.sh b/run_ttmars.sh index e8be85b..8ba2c22 100644 --- a/run_ttmars.sh +++ b/run_ttmars.sh @@ -41,4 +41,5 @@ python reg_dup.py "$output_dir" "$if_hg38" "$centro_file" "$files_dir"/assem1_no python chrx.py "$output_dir" "$if_hg38" "$centro_file" "$files_dir"/assem1_non_cov_regions.bed "$files_dir"/assem2_non_cov_regions.bed "$vcf_file" "$reference" "$asm_h1" "$asm_h2" "$files_dir"/lo_pos_assem1_result_compressed.bed "$files_dir"/lo_pos_assem2_result_compressed.bed "$tr_file" "$pass_only" "$seq_resolved" "$wlen_tp" +#optional arguments: -v/--vcf_out: output results as vcf files, -f VCF_FILE/--vcf_file VCF_FILE: input vcf file, use as template python combine.py "$output_dir" "$num_X_chr" \ No newline at end of file