Skip to content

Commit

Permalink
dev output vcf
Browse files Browse the repository at this point in the history
  • Loading branch information
quentin0515 committed Dec 9, 2021
1 parent 0a25bc0 commit 408cd7d
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 5 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ TT-Mars: S**t**ructural Varian**t**s Assess**m**ent B**a**sed on Haplotype-**r**

`python combine.py output_dir num_X_chr`

## Positional parameters
## Positional arguments

1. `output_dir`: Output directory.
2. `if_hg38`: if reference is hg38 (True/False).
Expand All @@ -34,6 +34,13 @@ TT-Mars: S**t**ructural Varian**t**s Assess**m**ent B**a**sed on Haplotype-**r**
11. `num_X_chr`: if male sample: 1; if female sample: 2.
12. `wlen_tp`: if TT-Mars validates wrong-length calls as TP (True/False).

## Optional arguments

1. combine.py:
`-v/--vcf_out`: output results as vcf files, must be used together with `-f/--vcf_file`.
`-f VCF_FILE/--vcf_file VCF_FILE`: input vcf file, use as template.


## Example Output

ttmars_combined_res.txt:
Expand Down
64 changes: 60 additions & 4 deletions combine.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,40 @@
import csv
import sys

output_dir = sys.argv[1] + "/"
no_X_chr = sys.argv[2]

if int(no_X_chr) == 1:
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("output_dir",
help="output directory")
parser.add_argument("no_X_chr",
choices=[1, 2],
help="male sample 1, female sample 2",
type=int)
parser.add_argument("-v",
"--vcf_out",
help="output results as vcf files, must be used together with -f/--vcf_file",
action="store_true")
parser.add_argument("-f",
"--vcf_file",
help="input vcf file, use as template")
args = parser.parse_args()

if bool(args.vcf_out) ^ bool(args.vcf_file):
parser.error('-v/--vcf_out and -f/--vcf_file must be given together')

output_dir = args.output_dir + "/"

if int(args.no_X_chr) == 1:
if_male = True
else:
if_male = False

if args.vcf_out:
if_vcf = True
in_vcf_file = args.vcf_file
else:
if_vcf = False

other_sv_res_file = output_dir+"ttmars_res.txt"
regdup_res_file = output_dir+"ttmars_regdup_res.txt"

Expand Down Expand Up @@ -73,6 +99,36 @@
g.write("\n")
g.close()

#if output vcf
if if_vcf:
from pysam import VariantFile
vcf_in = VariantFile(in_vcf_file)
vcfh = vcf_in.header
#vcfh.add_meta('INFO', items=[('ID',"TTMars"), ('Number',1), ('Type','String'),('Description','TT-Mars NA12878 results: TP, FA, NA or .')])
vcf_out_tp = VariantFile(output_dir+"ttmars_tp.vcf", 'w', header=vcfh)
vcf_out_fp = VariantFile(output_dir+"ttmars_fp.vcf", 'w', header=vcfh)
vcf_out_na = VariantFile(output_dir+"ttmars_na.vcf", 'w', header=vcfh)

for rec in vcf_in.fetch():
ref_name = rec.chrom
sv_type = rec.info['SVTYPE']
sv_pos = rec.pos
sv_end = rec.stop

try:
validation_res = sv_dict[(ref_name, int(sv_pos), int(sv_end), sv_type)][2]
if validation_res == 'True':
vcf_out_tp.write(rec)
elif validation_res == 'False':
vcf_out_fp.write(rec)
except:
vcf_out_na.write(rec)

vcf_out_tp.close()
vcf_out_fp.close()
vcf_out_na.close()
vcf_in.close()

#remove files
import os
for name in ['assem1_non_cov_regions.bed', 'assem2_non_cov_regions.bed',
Expand Down
1 change: 1 addition & 0 deletions run_ttmars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ python reg_dup.py "$output_dir" "$if_hg38" "$centro_file" "$files_dir"/assem1_no

python chrx.py "$output_dir" "$if_hg38" "$centro_file" "$files_dir"/assem1_non_cov_regions.bed "$files_dir"/assem2_non_cov_regions.bed "$vcf_file" "$reference" "$asm_h1" "$asm_h2" "$files_dir"/lo_pos_assem1_result_compressed.bed "$files_dir"/lo_pos_assem2_result_compressed.bed "$tr_file" "$pass_only" "$seq_resolved" "$wlen_tp"

#optional arguments: -v/--vcf_out: output results as vcf files, -f VCF_FILE/--vcf_file VCF_FILE: input vcf file, use as template
python combine.py "$output_dir" "$num_X_chr"

0 comments on commit 408cd7d

Please sign in to comment.