-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvcf-select
More file actions
executable file
·62 lines (54 loc) · 1.59 KB
/
vcf-select
File metadata and controls
executable file
·62 lines (54 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#### t.py
#### made by Daniel Minseok Kwon
#### 2020-08-31 12:50:13
#########################
import sys
import os
SVRNAME = os.uname()[1]
if "MBI" in SVRNAME.upper():
sys_path="/Users/pcaso/bin/python_lib"
elif SVRNAME == "T7":
sys_path="/ms1/bin/python_lib"
else:
sys_path="/home/mk446/bin/python_lib"
sys.path.append(sys_path)
import file_util
import proc_util
def vcf_select(base_vcf, pos_target_vcf):
out = base_vcf + '_' + pos_target_vcf + '.taget.vcf'
m = {}
for line in file_util.gzopen(pos_target_vcf):
line = file_util.decodeb(line)
if line[0] != '#':
arr = line.split('\t')
m[arr[0].replace('chr', '') + '_' + arr[1]]=1
print('total target:', len(m.keys()))
i = 0
cnt = 0
f = open(out, 'w')
print('Saving..', out)
for line in file_util.gzopen(base_vcf):
line = file_util.decodeb(line)
if line[0] == '#':
f.write(line)
else:
arr = line.split('\t')
try:
m[arr[0].replace('chr', '') + '_' + arr[1]]
# print(line, end="")
f.write(line)
cnt += 1
except KeyError:
pass
if i % 1000000 == 0:
print("\tprocessing.....",cnt, 'included.', i ,arr[:5])
i += 1
f.close()
print('Saved..', out)
if __name__ == "__main__":
print('vcf-select [BASE VCF] [POSITION TARGET VCF]')
base_vcf = sys.argv[1]
pos_target_vcf = sys.argv[2]
vcf_select(base_vcf, pos_target_vcf)