-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9ca9b47
commit c668ebc
Showing
18 changed files
with
1,013,257 additions
and
1,305 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# TT-Mars | ||
|
||
TT-Mars: S**t**ructural Varian**t**s Assess**m**ent B**a**sed on Haplotype-**r**esolved A**s**semblies. | ||
A sequence content based structural variants (SVs) validation method which aims to efficiently and accurately assess SV calls through assembly contigs and established alignment information. | ||
|
||
## Requirement | ||
|
||
Python 3 packages: pysam (https://github.com/pysam-developers/pysam). | ||
Softwares: samtools (https://github.com/samtools/samtools), samliftover (https://github.com/mchaisso/mcutils). | ||
|
||
## Positional parameters | ||
|
||
1. Output directory | ||
2. if hg38: if reference is hg38 (True/False). If False, TT-Mars will use hs37d5 as reference | ||
3. centromere file: centromere_positions.bed | ||
4. Assembly alignment bam file: assembly1.bam (haplotype-resolved) | ||
5. Assembly alignment bam file: assembly2.bam (haplotype-resolved) | ||
6. Average read depth: the average read depth of the read file | ||
7. Read file: read.bam (for high-depth-read calls only, need to modify the method to get high-depth regions) | ||
8. A callset file: callset.vcf.gz | ||
9. Referemce genome sequence file: reference_genome.fasta | ||
10. Assembly sequence files: assembly1.fasta/.fa (haplotype-resolved) | ||
11. Assembly sequence files: assembly2.fasta/.fa (haplotype-resolved) | ||
12. liftover file: assembly1_liftover.bed (provided) | ||
13. liftover file: assembly2_liftover.bed (provided) | ||
|
||
## Usage | ||
|
||
python ttmars.py output_dir if_hg38 centromere_positions.bed assembly1.bam assembly2.bam avg_read_depth read.bam callset.vcf.gz assembly1.fasta assembly2.fasta assembly1_liftover.bed assembly2_liftover.bed | ||
|
||
## Example Output | ||
|
||
ttmars_res.txt: | ||
(chr start end type relative_length relative_score validation_result) | ||
chr1 893792 893827 DEL 1.03 3.18 True | ||
|
||
## Available Data | ||
|
||
### Liftover files and assemblies | ||
| Samples | Reference Liftover Hap1 | Reference Liftover Hap2 | Assembly Liftover Hap1 | Assembly Liftover Hap2 | | ||
| :----: | :----: | :----: | :----: | :----: | | ||
| HG00096 | https://figshare.com/ndownloader/files/30817390 | https://figshare.com/ndownloader/files/30817384 | https://figshare.com/ndownloader/files/30817387 | https://figshare.com/ndownloader/files/30817381 | | ||
| HG00171 | https://figshare.com/ndownloader/files/30817402 | https://figshare.com/ndownloader/files/30817396 | https://figshare.com/ndownloader/files/30817399 | https://figshare.com/ndownloader/files/30817393 | | ||
| HG00513 | https://figshare.com/ndownloader/files/30817411 | https://figshare.com/ndownloader/files/30817405 | https://figshare.com/ndownloader/files/30817408 | https://figshare.com/ndownloader/files/30817414 | | ||
| HG00731 | https://figshare.com/ndownloader/files/30817426 | https://figshare.com/ndownloader/files/30817420 | https://figshare.com/ndownloader/files/30817423 | https://figshare.com/ndownloader/files/30817417 | | ||
| HG00732 | https://figshare.com/ndownloader/files/30817435 | https://figshare.com/ndownloader/files/30817429 | https://figshare.com/ndownloader/files/30817432 | https://figshare.com/ndownloader/files/30817438 | | ||
| HG00864 | https://figshare.com/ndownloader/files/30817450 | https://figshare.com/ndownloader/files/30817444 | https://figshare.com/ndownloader/files/30817447 | https://figshare.com/ndownloader/files/30817441 | | ||
| HG01114 | https://figshare.com/ndownloader/files/30817459 | https://figshare.com/ndownloader/files/30817453 | https://figshare.com/ndownloader/files/30817456 | https://figshare.com/ndownloader/files/30817462 | | ||
| HG01505 | https://figshare.com/ndownloader/files/30817471 | https://figshare.com/ndownloader/files/30817465 | https://figshare.com/ndownloader/files/30817468 | https://figshare.com/ndownloader/files/30817474 | | ||
| HG01596 | https://figshare.com/ndownloader/files/30817486 | https://figshare.com/ndownloader/files/30817480 | https://figshare.com/ndownloader/files/30817483 | https://figshare.com/ndownloader/files/30817477 | | ||
| HG03009 | https://figshare.com/ndownloader/files/30817498 | https://figshare.com/ndownloader/files/30817492 | https://figshare.com/ndownloader/files/30817495 | https://figshare.com/ndownloader/files/30817489 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import csv | ||
import sys | ||
|
||
output_dir = sys.argv[1] + "/" | ||
other_sv_res_file = output_dir+"ttmars_res.txt" | ||
regdup_res_file = output_dir+"ttmars_regdup_res.txt" | ||
|
||
with open(other_sv_res_file) as f: | ||
reader = csv.reader(f, delimiter="\t") | ||
other_sv_res = list(reader) | ||
f.close() | ||
|
||
with open(regdup_res_file) as f: | ||
reader = csv.reader(f, delimiter="\t") | ||
regdup_res = list(reader) | ||
f.close() | ||
|
||
sv_dict = {} | ||
for rec in other_sv_res: | ||
ref_name = rec[0] | ||
sv_pos = int(rec[1]) | ||
sv_end = int(rec[2]) | ||
sv_type = rec[3] | ||
sv_dict[(ref_name, int(sv_pos), int(sv_end), sv_type)] = [rec[4], rec[5], rec[6]] | ||
for rec in regdup_res: | ||
ref_name = rec[0] | ||
sv_pos = int(rec[1]) | ||
sv_end = int(rec[2]) | ||
sv_type = rec[3] | ||
if (ref_name, int(sv_pos), int(sv_end), sv_type) in sv_dict: | ||
if rec[6] == 'True' and sv_dict[(ref_name, int(sv_pos), int(sv_end), sv_type)][2] == 'False': | ||
sv_dict[(ref_name, int(sv_pos), int(sv_end), sv_type)] = [rec[4], rec[5], rec[6]] | ||
else: | ||
sv_dict[(ref_name, int(sv_pos), int(sv_end), sv_type)] = [rec[4], rec[5], rec[6]] | ||
|
||
g = open(output_dir + "/ttmars_combined_res.txt", "w") | ||
for key in sv_dict: | ||
res = sv_dict[key] | ||
|
||
g.write(str(key[0]) + "\t") | ||
g.write(str(key[1]) + "\t") | ||
g.write(str(key[2]) + "\t") | ||
g.write(str(key[3]) + "\t") | ||
g.write(str(res[0]) + "\t") | ||
g.write(str(res[1]) + "\t") | ||
g.write(str(res[2])) | ||
g.write("\n") | ||
g.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import csv | ||
import sys | ||
interval = 20 | ||
|
||
output_dir = sys.argv[1] + "/" | ||
in_file_name = output_dir + sys.argv[2] | ||
out_file_name = output_dir + sys.argv[3] | ||
|
||
#consider reverse strand also | ||
with open(in_file_name, "r") as file: | ||
reader = csv.reader(file, delimiter="\t") | ||
start_row = next(reader) | ||
chr_name = start_row[4] | ||
contig_name = start_row[0] | ||
chr_pos = int(start_row[5]) | ||
contig_pos = int(start_row[1]) | ||
pre_chr_pos = chr_pos | ||
pre_contig_pos = contig_pos | ||
ctr = 1 | ||
forward_strand = True | ||
out_file = open(out_file_name, "w") | ||
out_file.write(chr_name + "\t" + contig_name + "\t") | ||
out_file.write(str(chr_pos)+":"+str(contig_pos)+":") | ||
|
||
for row in reader: | ||
if (row[4] == chr_name) and (row[0] == contig_name) and (int(row[5]) - pre_chr_pos == interval) and \ | ||
(int(row[1]) - pre_contig_pos == interval) and forward_strand: | ||
pre_chr_pos = int(row[5]) | ||
pre_contig_pos = int(row[1]) | ||
ctr += 1 | ||
elif (row[4] == chr_name) and (row[0] == contig_name) and (int(row[5]) - pre_chr_pos == interval) and \ | ||
(int(row[1]) - pre_contig_pos == -interval) and not forward_strand: | ||
pre_chr_pos = int(row[5]) | ||
pre_contig_pos = int(row[1]) | ||
ctr += 1 | ||
else: | ||
#if different chr or contig | ||
if row[4] != chr_name or row[0] != contig_name: | ||
#print(chr_name, contig_name) | ||
out_file.write(str(ctr)+":") | ||
if forward_strand: | ||
out_file.write("+;") | ||
else: | ||
out_file.write("-;") | ||
out_file.write("\n") | ||
chr_name = row[4] | ||
contig_name = row[0] | ||
chr_pos = int(row[5]) | ||
contig_pos = int(row[1]) | ||
pre_chr_pos = chr_pos | ||
pre_contig_pos = contig_pos | ||
ctr = 1 | ||
forward_strand = True | ||
out_file.write(chr_name + "\t" + contig_name + "\t") | ||
out_file.write(str(chr_pos)+":"+str(contig_pos)+":") | ||
elif int(row[5]) - pre_chr_pos != interval: | ||
out_file.write(str(ctr)+":") | ||
if forward_strand: | ||
out_file.write("+;") | ||
else: | ||
out_file.write("-;") | ||
chr_pos = int(row[5]) | ||
contig_pos = int(row[1]) | ||
pre_chr_pos = chr_pos | ||
pre_contig_pos = contig_pos | ||
out_file.write(str(chr_pos)+":"+str(contig_pos)+":") | ||
ctr = 1 | ||
forward_strand = True | ||
elif (int(row[1]) - pre_contig_pos != interval) and (int(row[1]) - pre_contig_pos != -interval): | ||
out_file.write(str(ctr)+":") | ||
if forward_strand: | ||
out_file.write("+;") | ||
else: | ||
out_file.write("-;") | ||
chr_pos = int(row[5]) | ||
contig_pos = int(row[1]) | ||
pre_chr_pos = chr_pos | ||
pre_contig_pos = contig_pos | ||
out_file.write(str(chr_pos)+":"+str(contig_pos)+":") | ||
ctr = 1 | ||
forward_strand = True | ||
elif (int(row[1]) - pre_contig_pos == interval) and not forward_strand: | ||
out_file.write(str(ctr)+":") | ||
out_file.write("-;") | ||
chr_pos = int(row[5]) | ||
contig_pos = int(row[1]) | ||
pre_chr_pos = chr_pos | ||
pre_contig_pos = contig_pos | ||
out_file.write(str(chr_pos)+":"+str(contig_pos)+":") | ||
ctr = 1 | ||
forward_strand = True | ||
elif (int(row[1]) - pre_contig_pos == -interval) and forward_strand: | ||
out_file.write(str(ctr)+":") | ||
out_file.write("+;") | ||
chr_pos = int(row[5]) | ||
contig_pos = int(row[1]) | ||
pre_chr_pos = chr_pos | ||
pre_contig_pos = contig_pos | ||
out_file.write(str(chr_pos)+":"+str(contig_pos)+":") | ||
ctr = 1 | ||
forward_strand = False | ||
#write last lines | ||
out_file.close() |
Oops, something went wrong.