-
Notifications
You must be signed in to change notification settings - Fork 1
/
map_snps_to_genes.py
70 lines (57 loc) · 1.76 KB
/
map_snps_to_genes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
'''
TO DO:
DUE FRIDAY
1. Map SNP ranges to genes
2. Get network data before parsing and figuring it out
3. Start running this file on all chromosomes (larger ones too)
4. Clustering
5. How to represent network? Adjacency matrix?
'''
#Set up the list of SNPs we need
f = open("GWAS.txt", 'r')
gwasSnps = []
line = f.readline()
while (line != ""):
line = f.readline()
tokens = line.split(" ")
gwasSnps.append((tokens[0],tokens[2]))
f.close()
ld = f.open("LD.txt", 'r')
ldcoeffs = {}
allSnps = []
line = ld.readline()
currentSnp = None
while(line != ""):
tokens = line.split(" ")
if not tokens[3] == currentSnp:
currentSnp = tokens[3]
allSnps.add(currentSnp)
ldcoeffs[(currentSnp,tokens[4])] = (tokens[6],tokens[1]) #TODO
line = ld.readline()
ld.close()
snpRanges = []
#Builds snpRanges: list of tuples (snp, minPos, maxPos of linked alleles)
for snpPair in gwasSnps:
snp = snpPair[0]
maxPos = snpPair[1]
minPos = snpPair[1]
lds = ldcoeffs[snp]
for otherSnp in allSnps:
if otherSnp == snp:
continue
else if (otherSnp, snp) in ldcoeffs.keys():
current = ldcoeffs[(otherSnp,snp)]
else:
current = ldcoeffs[(snp,otherSnp)]
if current[0] >= 0.5:
if current[1] > maxPos:
maxPos = current[1]
if current[1] < minPos:
minPos = current[1]
snpRanges.append((snp,minPos,maxPos))
#Write the SNP ranges to a file. Can get rid of this later
snpRangeFile = open("snp_ranges",'w')
for snp in snpRanges:
snpRangeFile.write("{0[0]} {0[1]} {0[2]}".format(snp))
snpRangeFile.close()
#TODO associate genes with SNPs and write to file