StarRocks · kangkaisen · Jul 4, 2022 · Jun 30, 2022 · Jul 1, 2022 · Jul 1, 2022
@@ -50,60 +50,60 @@ void DataSegment::init(const std::vector<ExprContext*>* sort_exprs, const ChunkP
 Status DataSegment::get_filter_array(std::vector<DataSegment>& data_segments, size_t rows_to_sort,
                                      std::vector<std::vector<uint8_t>>& filter_array,
                                      const std::vector<int>& sort_order_flags, const std::vector<int>& null_first_flags,
-                                     uint32_t& least_num, uint32_t& middle_num) {
+                                     uint32_t& smaller_num, uint32_t& include_num) {
     size_t dats_segment_size = data_segments.size();
     std::vector<CompareVector> compare_results_array(dats_segment_size);
 
-    // first compare with last row of this chunk.
+    // First compare the chunk with last row of this segment.
     {
         get_compare_results_colwise(rows_to_sort - 1, order_by_columns, compare_results_array, data_segments,
                                     sort_order_flags, null_first_flags);
     }
 
-    // but we only have one compare.
-    // compare with first row of this DataSegment,
-    // then we set BEFORE_LAST_RESULT and IN_LAST_RESULT at filter_array.
+    // Since the first and the last of segment is the same value,
+    // we can get both `SMALLER_THAN_MIN_OF_SEGMENT` and `INCLUDE_IN_SEGMENT` parts
+    // with only one comparation
     if (rows_to_sort == 1) {
-        least_num = 0, middle_num = 0;
+        smaller_num = 0, include_num = 0;
         filter_array.resize(dats_segment_size);
         for (size_t i = 0; i < dats_segment_size; ++i) {
             size_t rows = data_segments[i].chunk->num_rows();
             filter_array[i].resize(rows);
 
             for (size_t j = 0; j < rows; ++j) {
                 if (compare_results_array[i][j] < 0) {
-                    filter_array[i][j] = DataSegment::BEFORE_LAST_RESULT;
-                    ++least_num;
+                    filter_array[i][j] = DataSegment::SMALLER_THAN_MIN_OF_SEGMENT;
+                    ++smaller_num;
                 } else {
-                    filter_array[i][j] = DataSegment::IN_LAST_RESULT;
-                    ++middle_num;
+                    filter_array[i][j] = DataSegment::INCLUDE_IN_SEGMENT;
+                    ++include_num;
                 }
             }
         }
     } else {
         std::vector<size_t> first_size_array;
         first_size_array.resize(dats_segment_size);
 
-        middle_num = 0;
+        include_num = 0;
         filter_array.resize(dats_segment_size);
         for (size_t i = 0; i < dats_segment_size; ++i) {
             DataSegment& segment = data_segments[i];
             size_t rows = segment.chunk->num_rows();
             filter_array[i].resize(rows);
 
-            size_t local_first_size = middle_num;
+            size_t local_first_size = include_num;
             for (size_t j = 0; j < rows; ++j) {
                 if (compare_results_array[i][j] <= 0) {
-                    filter_array[i][j] = DataSegment::IN_LAST_RESULT;
-                    ++middle_num;
+                    filter_array[i][j] = DataSegment::INCLUDE_IN_SEGMENT;
+                    ++include_num;
                 }
             }
 
-            // obtain number of rows for second compare.
-            first_size_array[i] = middle_num - local_first_size;
+            // Obtain number of rows for second compare.
+            first_size_array[i] = include_num - local_first_size;
         }
 
-        // second compare with first row of this chunk, use rows from first compare.
+        // Second compare with first row of this chunk, use rows from first compare.
         {
             for (size_t i = 0; i < dats_segment_size; i++) {
                 for (auto& cmp : compare_results_array[i]) {
@@ -116,19 +116,19 @@ Status DataSegment::get_filter_array(std::vector<DataSegment>& data_segments, si
                                         null_first_flags);
         }
 
-        least_num = 0;
+        smaller_num = 0;
         for (size_t i = 0; i < dats_segment_size; ++i) {
             DataSegment& segment = data_segments[i];
             size_t rows = segment.chunk->num_rows();
 
             for (size_t j = 0; j < rows; ++j) {
                 if (compare_results_array[i][j] < 0) {
-                    filter_array[i][j] = DataSegment::BEFORE_LAST_RESULT;
-                    ++least_num;
+                    filter_array[i][j] = DataSegment::SMALLER_THAN_MIN_OF_SEGMENT;
+                    ++smaller_num;
                 }
             }
         }
-        middle_num -= least_num;
+        include_num -= smaller_num;
     }
 
     return Status::OK();

@@ -13,8 +13,9 @@
 namespace starrocks::vectorized {
 
 struct DataSegment {
-    static const uint8_t BEFORE_LAST_RESULT = 2;
-    static const uint8_t IN_LAST_RESULT = 1;
+    static const uint8_t SMALLER_THAN_MIN_OF_SEGMENT = 2;
+    static const uint8_t INCLUDE_IN_SEGMENT = 1;
+    static const uint8_t LARGER_THAN_MAX_OF_SEGMENT = 0;
 
     ChunkPtr chunk;
     Columns order_by_columns;
@@ -27,15 +28,13 @@ struct DataSegment {
 
     void init(const std::vector<ExprContext*>* sort_exprs, const ChunkPtr& cnk);
 
-    // there is two compares in the method,
+    // There is two compares in the method,
     // the first is:
-    //     compare every row in every DataSegment of data_segments with rows_to_sort - 1 row of this DataSegment,
-    //     obtain every row compare result in compare_results_array, if < 0, use it to set IN at filter_array.
+    //     compare every row in every DataSegment of data_segments with `rows_to_sort - 1` row of this DataSegment,
+    //     obtain every row compare result in compare_results_array, if <= 0, mark it with `INCLUDE_IN_SEGMENT`.
     // the second is:
-    //     compare every row in compare_results_array that less than 0, use it to compare with first row of this DataSegment,
-    //     as the first step, we set BEFORE_LAST_RESULT at filter_array.
-    //
-    // Actually, we Count the results in the first compare for the second compare.
+    //     compare every row in compare_results_array that <= 0 (i.e. `INCLUDE_IN_SEGMENT` part) with the first row of this DataSegment,
+    //     if < 0, then mark it with `SMALLER_THAN_MIN_OF_SEGMENT`
     Status get_filter_array(std::vector<DataSegment>& data_segments, size_t rows_to_sort,
                             std::vector<std::vector<uint8_t>>& filter_array, const std::vector<int>& sort_order_flags,
                             const std::vector<int>& null_first_flags, uint32_t& least_num, uint32_t& middle_num);