-
Notifications
You must be signed in to change notification settings - Fork 4
/
precompute.py
135 lines (107 loc) · 5.26 KB
/
precompute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python
import os
import sys
from multiprocessing import Pool
def lockAndWriteFile(start, answerTuple):
succeeded = False
while not succeeded:
try:
f = open('results.csv', 'a')
data = str(answerTuple).replace('(', '').replace(')', '').replace('[', '').replace(']', '')
f.write(str(start) + ',' + data + '\n')
f.close()
succeeded = True
except:
succeeded = False
def outputThreemerNormalization(request):
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SkittleTree.settings")
from SkittleCore.Graphs import ThreeMerDetector
from SkittleCore.Graphs.SkittleGraphTransforms import getChunkStart
specimen = request[0]
chromosomes = ['chr2']
for chromosome in chromosomes:
chunks = [getChunkStart(x) for x in range(7077889, 11610965, 2 ** 16)] # range(16518287, 20987087, 2**16)
for targetIndex in range(len(chunks)):
start = chunks[targetIndex]
widths = range(10, 500, 20)
for widthIndex in range(request[2], len(widths), request[1]):
width = widths[widthIndex]
state = makeRequestPacket(specimen, chromosome, start)
state.width = width
state.requestedGraph = 't'
print "Computing: ", state.specimen, state.chromosome, state.start, state.width
# state.readAndAppendNextChunk()
# GraphRequestHandler.handleRequest(state)
answerTuple = ThreeMerDetector.calculateOutputPixels(state)
lockAndWriteFile(state.start, answerTuple)
print "Done computing ", state.specimen, state.chromosome, state.start, state.width
def precomputeRepeatMap(request):
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SkittleTree.settings")
from SkittleCore import GraphRequestHandler
from DNAStorage import StorageRequestHandler
specimen = request[0]
dbSpecimen = StorageRequestHandler.GetSpecimen(specimen)
chromosomes = StorageRequestHandler.GetRelatedChromosomes(dbSpecimen)
for chromosome in chromosomes:
length = StorageRequestHandler.GetChromosomeLength(specimen, chromosome)
chunks = range(1, length + 1, 2 ** 16)
for targetIndex in range(request[2], len(chunks), request[1]):
start = chunks[targetIndex]
state = makeRequestPacket(specimen, chromosome, start)
print "Computing: ", state.specimen, state.chromosome, state.start
GraphRequestHandler.handleRequest(state)
print "Done computing ", state.specimen, state.chromosome, state.start
def makeRequestPacket(specimen, chromosome, start, graphSymbol='m', scale=1):
from SkittleCore import models
from SkittleCore.Graphs import RepeatMap
state = models.RequestPacket()
state.specimen = specimen
state.chromosome = chromosome
state.start = start
state.scale = scale
state.width = RepeatMap.skixelsPerSample
state.requestedGraph = graphSymbol
return state
def startThreemer(specimen, nProcessors):
requests = [(specimen, nProcessors, PID) for PID in range(nProcessors)]
processors = Pool(nProcessors)
processors.map(outputThreemerNormalization, requests)
def startRepeatMap(specimen, nProcessors):
# ProcessorRequest = namedtuple('ProcessorRequest', ['specimen', 'nProcessors', 'PID'])
requests = [(specimen, nProcessors, PID) for PID in range(nProcessors)]
processors = Pool(nProcessors)
processors.map(precomputeRepeatMap, requests)
def benchmarkHere(request):
precomputeAnyGraph(request)
def precomputeAnyGraph(request):
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SkittleTree.settings")
from SkittleCore import GraphRequestHandler
from DNAStorage import StorageRequestHandler
specimen = request[2]
chromosomes = StorageRequestHandler.GetRelatedChromosomes(specimen)
for chromosome in chromosomes:
length = StorageRequestHandler.GetChromosomeLength(specimen, chromosome)
chunks = range(1, length+1, 2**16)[-7:]
for targetIndex in range(request[1], len(chunks),
request[0]): # this loop divies up the jobs by PID according to modulo nProcessors
start = chunks[targetIndex]
state = makeRequestPacket(specimen, chromosome, start, request[3], request[4])
print "Computing: ", state.specimen, state.chromosome, state.start
GraphRequestHandler.handleRequest(state)
print "Done computing ", state.specimen, state.chromosome, state.start
def allGraphs(specimen, nProcessors):
processors = Pool(nProcessors)
for graphSymbol in ['n', 'm', 'r', 'o', 'b', 'h', 't']:
for scale in [1, 16]: # scales we'd like to test
requests = [(nProcessors, PID, specimen, graphSymbol, scale) for PID in range(nProcessors)]
processors.map(benchmarkHere, requests)
if __name__ == "__main__":
if len(sys.argv) >= 3:
nProcessors = int(sys.argv[2])
else:
nProcessors = 3
specimen = sys.argv[1]
print "Specimen: ", specimen, "Processors: ", str(nProcessors)
# startThreemer(specimen, nProcessors)
# startRepeatMap(specimen, nProcessors)
allGraphs(specimen, nProcessors)