-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_vcflen.py
93 lines (78 loc) · 2.5 KB
/
plot_vcflen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import sys
from pysam import VariantFile
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams.update({"font.size": 17})
sns.set_style("whitegrid")
colors = [sns.color_palette("bright")[1]] + [
sns.color_palette("dark")[i] for i in [0, 2, 6, 7, 9]
]
def parse_vcf(vcf_path, tag=""):
vcf = VariantFile(vcf_path)
Vs = []
for rec in vcf.fetch():
if list(rec.filter) != ["PASS"] and list(rec.filter) != []:
continue
if "SVLEN" in rec.info:
l = rec.info["SVLEN"]
else:
l = len(rec.alts[0]) - len(rec.ref)
if type(l) == type(()): # fix for pbsv
l = l[0]
if rec.info["SVTYPE"] == "DEL" and l > 0: # fix for debreak
l = -l
if abs(l) < 30:
continue
if l > 0:
Vs.append([tag, l, "Ins"])
else:
Vs.append([tag, -l, "Del"])
return Vs
def main():
df = pd.DataFrame(columns=["VCF", "Length", "Type"], data=[])
for name, vcf_path in zip(sys.argv[1::2], sys.argv[2::2]):
Vs = parse_vcf(vcf_path, name)
print(name, len(Vs))
print(name, "del", len([v for v in Vs if v[2] == "Del"]))
print(name, "ins", len([v for v in Vs if v[2] == "Ins"]))
new_df = pd.DataFrame(columns=["VCF", "Length", "Type"], data=Vs)
df = pd.concat([df, new_df], ignore_index=True)
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(7, 7))
sns.kdeplot(
data=df.loc[df["Type"] == "Del"],
x="Length",
log_scale=True,
hue="VCF",
palette=colors,
hue_order=["SVDSS", "cuteSV", "pbsv", "sniffles", "SVIM", "debreak"],
legend=False,
ax=ax1,
linewidth=1.3,
)
sns.kdeplot(
data=df.loc[df["Type"] == "Ins"],
x="Length",
log_scale=True,
hue="VCF",
palette=colors,
hue_order=["SVDSS", "cuteSV", "pbsv", "sniffles", "SVIM", "debreak"],
ax=ax2,
linewidth=1.3,
)
ax1.set_title("Deletions")
ax2.set_title("Insertions")
ax1.set_xlabel("")
ax2.set_xlabel("")
ax1.set_ylabel("Density")
plt.xscale("symlog")
ax1.invert_xaxis()
fig.supxlabel("Length")
plt.subplots_adjust(wspace=0, hspace=0)
ax2.set_xticks(ax2.get_xticks()[2::2])
ax1.set_xticks(ax2.get_xticks())
# plt.suptitle("(a) SVs size distribution")
plt.savefig("var-hist.png", dpi=300)
plt.savefig("var-hist.svg", dpi=300)
if __name__ == "__main__":
main()