edawson · edawson · Oct 20, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 16, 2020
diff --git a/cudamapper/src/chainer_utils.cu b/cudamapper/src/chainer_utils.cu
@@ -77,6 +77,37 @@ __device__ Overlap create_simple_overlap(const Anchor& start, const Anchor& end,
     return overlap;
 }
 
+Overlap create_simple_overlap_cpu(const Anchor& start, const Anchor& end, const int32_t num_anchors)
+{
+    Overlap overlap;
+    overlap.num_residues_ = num_anchors;
+
+    overlap.query_read_id_  = start.query_read_id_;
+    overlap.target_read_id_ = start.target_read_id_;
+    assert(start.query_read_id_ == end.query_read_id_ && start.target_read_id_ == end.target_read_id_);
+
+    overlap.query_start_position_in_read_ = min(start.query_position_in_read_, end.query_position_in_read_);
+    overlap.query_end_position_in_read_   = max(start.query_position_in_read_, end.query_position_in_read_);
+    bool is_negative_strand               = end.target_position_in_read_ < start.target_position_in_read_;
+    if (is_negative_strand)
+    {
+        overlap.relative_strand                = RelativeStrand::Reverse;
+        overlap.target_start_position_in_read_ = end.target_position_in_read_;
+        overlap.target_end_position_in_read_   = start.target_position_in_read_;
+    }
+    else
+    {
+        overlap.relative_strand                = RelativeStrand::Forward;
+        overlap.target_start_position_in_read_ = start.target_position_in_read_;
+        overlap.target_end_position_in_read_   = end.target_position_in_read_;
+    }
+    return overlap;
+}
+
+// TODO VI: this may have some thread overwrite issues, as well as some problems
+// uninitialized variables in overlap struct.
+// This also has an upper bound on how many anchors we actually process. If num_anchors is greater
+// than 1792 * 32, we never actually process that anchor
 __global__ void backtrace_anchors_to_overlaps(const Anchor* anchors,
                                               Overlap* overlaps,
                                               double* scores,
@@ -125,6 +156,82 @@ __global__ void backtrace_anchors_to_overlaps(const Anchor* anchors,
     }
 }
 
+__global__ void backtrace_anchors_to_overlaps_debug(const Anchor* anchors,
+                                              Overlap* overlaps,
+                                              double* scores,
+                                              bool* max_select_mask,
+                                              int32_t* predecessors,
+                                              const int32_t n_anchors,
+                                              const int32_t min_score)
+{
+    int32_t end = n_anchors - 1;
+    for (int32_t i = end; i >= 0; i--)
+    {
+        if (scores[i] >= min_score)
+        {
+            int32_t index                = i;
+            int32_t first_index          = index;
+            int32_t num_anchors_in_chain = 0;
+            Anchor final_anchor          = anchors[i];
+
+            while (index != -1)
+            {
+                first_index  = index;
+                int32_t pred = predecessors[index];
+                if (pred != -1)
+                {
+                    max_select_mask[pred] = false;
+                }
+                num_anchors_in_chain++;
+                index = predecessors[index];
+            }
+            Anchor first_anchor            = anchors[first_index];
+            overlaps[i] = create_simple_overlap(first_anchor, final_anchor, num_anchors_in_chain);
+        }
+        else
+        {
+            max_select_mask[i] = false;
+        }
+    }
+}
+void backtrace_anchors_to_overlaps_cpu(const Anchor* anchors,
+                                       Overlap* overlaps,
+                                       double* scores,
+                                       bool* max_select_mask,
+                                       int32_t* predecessors,
+                                       const int32_t n_anchors,
+                                       const int32_t min_score)
+{
+    int32_t end = n_anchors - 1;
+    for (int32_t i = end; i >= 0; i--)
+    {
+        if (scores[i] >= min_score)
+        {
+            int32_t index                = i;
+            int32_t first_index          = index;
+            int32_t num_anchors_in_chain = 0;
+            Anchor final_anchor          = anchors[i];
+
+            while (index != -1)
+            {
+                first_index  = index;
+                int32_t pred = predecessors[index];
+                if (pred != -1)
+                {
+                    max_select_mask[pred] = false;
+                }
+                num_anchors_in_chain++;
+                index = predecessors[index];
+            }
+            Anchor first_anchor            = anchors[first_index];
+            overlaps[i] = create_simple_overlap_cpu(first_anchor, final_anchor, num_anchors_in_chain);
+        }
+        else
+        {
+            max_select_mask[i] = false;
+        }
+    }
+}
 __global__ void convert_offsets_to_ends(std::int32_t* starts, std::int32_t* lengths, std::int32_t* ends, std::int32_t n_starts)
 {
     std::int32_t d_tid = blockIdx.x * blockDim.x + threadIdx.x;
@@ -153,9 +260,6 @@ __global__ void calculate_tiles_per_read(const std::int32_t* lengths,
     }
 }
 
-// TODO VI: threads may be overwriting each other.
-// because the number of queries is greater than number of tiles, each thread here has to write the query starts
-// for multiple tiles
 __global__ void calculate_tile_starts(const std::int32_t* query_starts,
                                       const std::int32_t* tiles_per_query,
                                       std::int32_t* tile_starts,
@@ -164,27 +268,16 @@ __global__ void calculate_tile_starts(const std::int32_t* query_starts,
                                       const std::int32_t* tiles_per_query_up_to_point)
 {
     int32_t d_thread_id = blockIdx.x * blockDim.x + threadIdx.x;
-    int32_t stride      = blockDim.x * gridDim.x;
     if (d_thread_id < num_queries)
     {
         // for each tile, we look up the query it corresponds to and offset it by the which tile in the query
         // we're at multiplied by the total size of the tile
-        // TODO VI: this memory access pattern seems a little off? Thread i and thread i+1 would overwrite eachother, no?
         for (int i = 0; i < tiles_per_query[d_thread_id]; i++)
         {
             //finds the offset in the ragged array and offsets to find start of "next" sub array
             tile_starts[tiles_per_query_up_to_point[d_thread_id] + i] = query_starts[d_thread_id] + (i * tile_size);
         }
     }
-    //int counter = 0;
-    //for (int i =0; i < num_queries; i++)
-    //{
-    //    for (int j = 0; j < tiles_per_query[i]; j++)
-    //    {
-    //        tile_starts[counter] = query_starts[i] + (j * tile_size);
-    //        counter++;
-    //    }
-    //}
 }
 
 void encode_anchor_query_locations(const Anchor* anchors,
@@ -454,4 +547,4 @@ void encode_overlap_query_target_pairs(Overlap* overlaps,
 } // namespace chainerutils
 } // namespace cudamapper
 } // namespace genomeworks
-} // namespace claraparabricks
+} // namespace claraparabricks
diff --git a/cudamapper/src/chainer_utils.cuh b/cudamapper/src/chainer_utils.cuh
@@ -123,6 +123,16 @@ struct TileResults
     }
 };
 
+__device__ __forceinline__ Anchor empty_anchor()
+{
+    Anchor empty;
+    empty.query_read_id_ = UINT32_MAX;
+    empty.target_read_id_          = UINT32_MAX;
+    empty.query_position_in_read_  = UINT32_MAX;
+    empty.target_position_in_read_  = UINT32_MAX;
+    return empty;
+}
+
 __device__ bool
 operator==(const QueryTargetPair& a, const QueryTargetPair& b);
 
@@ -134,6 +144,22 @@ __global__ void backtrace_anchors_to_overlaps(const Anchor* anchors,
                                               const int32_t n_anchors,
                                               const int32_t min_score);
 
+__global__ void backtrace_anchors_to_overlaps_debug(const Anchor* anchors,
+                                              Overlap* overlaps,
+                                              double* scores,
+                                              bool* max_select_mask,
+                                              int32_t* predecessors,
+                                              const int32_t n_anchors,
+                                              const int32_t min_score);
+
+void backtrace_anchors_to_overlaps_cpu(const Anchor* anchors,
+                                       Overlap* overlaps,
+                                       double* scores,
+                                       bool* max_select_mask,
+                                       int32_t* predecessors,
+                                       const int32_t n_anchors,
+                                       const int32_t min_score);
+
 __global__ void convert_offsets_to_ends(std::int32_t* starts, std::int32_t* lengths, std::int32_t* ends, std::int32_t n_starts);
 
 __global__ void calculate_tile_starts(const std::int32_t* query_starts,

diff --git a/cudamapper/src/overlapper.cpp b/cudamapper/src/overlapper.cpp
@@ -262,9 +262,10 @@ void filter_self_mappings(std::vector<Overlap>& overlaps,
                           const double max_percent_overlap)
 {
 
+    // TODO This is causing the segfault on some reads. Fix looking up overlaps with uninitialzed values
     auto remove_self_helper = [&query_parser, &target_parser, &max_percent_overlap](const Overlap& o) {
-        claraparabricks::genomeworks::io::FastaSequence query  = query_parser.get_sequence_by_id(o.query_read_id_);
-        claraparabricks::genomeworks::io::FastaSequence target = target_parser.get_sequence_by_id(o.target_read_id_);
+        const claraparabricks::genomeworks::io::FastaSequence& query  = query_parser.get_sequence_by_id(o.query_read_id_);
+        const claraparabricks::genomeworks::io::FastaSequence& target = target_parser.get_sequence_by_id(o.target_read_id_);
         if (query.name != target.name)
             return false;
         std::size_t read_len        = query.seq.size();