@@ -77,6 +77,37 @@ __device__ Overlap create_simple_overlap(const Anchor& start, const Anchor& end,
7777 return overlap;
7878}
7979
80+ Overlap create_simple_overlap_cpu (const Anchor& start, const Anchor& end, const int32_t num_anchors)
81+ {
82+ Overlap overlap;
83+ overlap.num_residues_ = num_anchors;
84+
85+ overlap.query_read_id_ = start.query_read_id_ ;
86+ overlap.target_read_id_ = start.target_read_id_ ;
87+ assert (start.query_read_id_ == end.query_read_id_ && start.target_read_id_ == end.target_read_id_ );
88+
89+ overlap.query_start_position_in_read_ = min (start.query_position_in_read_ , end.query_position_in_read_ );
90+ overlap.query_end_position_in_read_ = max (start.query_position_in_read_ , end.query_position_in_read_ );
91+ bool is_negative_strand = end.target_position_in_read_ < start.target_position_in_read_ ;
92+ if (is_negative_strand)
93+ {
94+ overlap.relative_strand = RelativeStrand::Reverse;
95+ overlap.target_start_position_in_read_ = end.target_position_in_read_ ;
96+ overlap.target_end_position_in_read_ = start.target_position_in_read_ ;
97+ }
98+ else
99+ {
100+ overlap.relative_strand = RelativeStrand::Forward;
101+ overlap.target_start_position_in_read_ = start.target_position_in_read_ ;
102+ overlap.target_end_position_in_read_ = end.target_position_in_read_ ;
103+ }
104+ return overlap;
105+ }
106+
107+ // TODO VI: this may have some thread overwrite issues, as well as some problems
108+ // uninitialized variables in overlap struct.
109+ // This also has an upper bound on how many anchors we actually process. If num_anchors is greater
110+ // than 1792 * 32, we never actually process that anchor
80111__global__ void backtrace_anchors_to_overlaps (const Anchor* anchors,
81112 Overlap* overlaps,
82113 double * scores,
@@ -125,6 +156,82 @@ __global__ void backtrace_anchors_to_overlaps(const Anchor* anchors,
125156 }
126157}
127158
159+ __global__ void backtrace_anchors_to_overlaps_debug (const Anchor* anchors,
160+ Overlap* overlaps,
161+ double * scores,
162+ bool * max_select_mask,
163+ int32_t * predecessors,
164+ const int32_t n_anchors,
165+ const int32_t min_score)
166+ {
167+ int32_t end = n_anchors - 1 ;
168+ for (int32_t i = end; i >= 0 ; i--)
169+ {
170+ if (scores[i] >= min_score)
171+ {
172+ int32_t index = i;
173+ int32_t first_index = index;
174+ int32_t num_anchors_in_chain = 0 ;
175+ Anchor final_anchor = anchors[i];
176+
177+ while (index != -1 )
178+ {
179+ first_index = index;
180+ int32_t pred = predecessors[index];
181+ if (pred != -1 )
182+ {
183+ max_select_mask[pred] = false ;
184+ }
185+ num_anchors_in_chain++;
186+ index = predecessors[index];
187+ }
188+ Anchor first_anchor = anchors[first_index];
189+ overlaps[i] = create_simple_overlap (first_anchor, final_anchor, num_anchors_in_chain);
190+ }
191+ else
192+ {
193+ max_select_mask[i] = false ;
194+ }
195+ }
196+ }
197+ void backtrace_anchors_to_overlaps_cpu (const Anchor* anchors,
198+ Overlap* overlaps,
199+ double * scores,
200+ bool * max_select_mask,
201+ int32_t * predecessors,
202+ const int32_t n_anchors,
203+ const int32_t min_score)
204+ {
205+ int32_t end = n_anchors - 1 ;
206+ for (int32_t i = end; i >= 0 ; i--)
207+ {
208+ if (scores[i] >= min_score)
209+ {
210+ int32_t index = i;
211+ int32_t first_index = index;
212+ int32_t num_anchors_in_chain = 0 ;
213+ Anchor final_anchor = anchors[i];
214+
215+ while (index != -1 )
216+ {
217+ first_index = index;
218+ int32_t pred = predecessors[index];
219+ if (pred != -1 )
220+ {
221+ max_select_mask[pred] = false ;
222+ }
223+ num_anchors_in_chain++;
224+ index = predecessors[index];
225+ }
226+ Anchor first_anchor = anchors[first_index];
227+ overlaps[i] = create_simple_overlap_cpu (first_anchor, final_anchor, num_anchors_in_chain);
228+ }
229+ else
230+ {
231+ max_select_mask[i] = false ;
232+ }
233+ }
234+ }
128235__global__ void convert_offsets_to_ends (std::int32_t * starts, std::int32_t * lengths, std::int32_t * ends, std::int32_t n_starts)
129236{
130237 std::int32_t d_tid = blockIdx .x * blockDim .x + threadIdx .x ;
@@ -153,9 +260,6 @@ __global__ void calculate_tiles_per_read(const std::int32_t* lengths,
153260 }
154261}
155262
156- // TODO VI: threads may be overwriting each other.
157- // because the number of queries is greater than number of tiles, each thread here has to write the query starts
158- // for multiple tiles
159263__global__ void calculate_tile_starts (const std::int32_t * query_starts,
160264 const std::int32_t * tiles_per_query,
161265 std::int32_t * tile_starts,
@@ -164,27 +268,16 @@ __global__ void calculate_tile_starts(const std::int32_t* query_starts,
164268 const std::int32_t * tiles_per_query_up_to_point)
165269{
166270 int32_t d_thread_id = blockIdx .x * blockDim .x + threadIdx .x ;
167- int32_t stride = blockDim .x * gridDim .x ;
168271 if (d_thread_id < num_queries)
169272 {
170273 // for each tile, we look up the query it corresponds to and offset it by the which tile in the query
171274 // we're at multiplied by the total size of the tile
172- // TODO VI: this memory access pattern seems a little off? Thread i and thread i+1 would overwrite eachother, no?
173275 for (int i = 0 ; i < tiles_per_query[d_thread_id]; i++)
174276 {
175277 // finds the offset in the ragged array and offsets to find start of "next" sub array
176278 tile_starts[tiles_per_query_up_to_point[d_thread_id] + i] = query_starts[d_thread_id] + (i * tile_size);
177279 }
178280 }
179- // int counter = 0;
180- // for (int i =0; i < num_queries; i++)
181- // {
182- // for (int j = 0; j < tiles_per_query[i]; j++)
183- // {
184- // tile_starts[counter] = query_starts[i] + (j * tile_size);
185- // counter++;
186- // }
187- // }
188281}
189282
190283void encode_anchor_query_locations (const Anchor* anchors,
@@ -454,4 +547,4 @@ void encode_overlap_query_target_pairs(Overlap* overlaps,
454547} // namespace chainerutils
455548} // namespace cudamapper
456549} // namespace genomeworks
457- } // namespace claraparabricks
550+ } // namespace claraparabricks
0 commit comments