forked from apache/doris
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[fix](merge-on-write) segcompaction should process delete bitmap if n…
…ecessary (apache#38369) Issue Number: close #xxx When loading data to a unique key table with sequence column, some data in current load job might be marked as delete due to a lower sequence value. If there's many segments in such load job, segcompaction might be triggered, which don't process the delete bitmap currently, will cause data correctness issue For example: 1. we have 4 segments in current load job initially, and due to seq column, some rows are marked as deleted 2. after segcompaction, if we don't process the delete bitmap, it's content is still corresponding to the old segment layout, and row 7,14,15 is not mark deleted correctly on new generated segment 1. 3. in this PR, we convert old delete bitmap to fit new segment layout, it use similar way as base/cumulative compaction to convert delete bitmaps on old layout to new one, but the rowid conversion is simpler 
- Loading branch information
1 parent
610f694
commit 115393e
Showing
10 changed files
with
1,100 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#pragma once | ||
|
||
#include <map> | ||
#include <vector> | ||
|
||
#include "olap/olap_common.h" | ||
#include "olap/utils.h" | ||
|
||
namespace doris { | ||
|
||
// Simple verion of rowid conversion, for segcompaction | ||
// convert rows from several segments to rows in 1 segment | ||
class SimpleRowIdConversion { | ||
public: | ||
SimpleRowIdConversion(const RowsetId& rowset_id) : _rowst_id(rowset_id) {}; | ||
~SimpleRowIdConversion() = default; | ||
|
||
// resize segment rowid map to its rows num | ||
void reset_segment_map(const std::map<uint32_t, uint32_t>& num_rows) { | ||
_cur_dst_segment_rowid = 0; | ||
for (auto seg_rows : num_rows) { | ||
_segments_rowid_map.emplace(seg_rows.first, | ||
std::vector<uint32_t>(seg_rows.second, UINT32_MAX)); | ||
} | ||
} | ||
|
||
// add row id to the map | ||
void add(const std::vector<RowLocation>& rss_row_ids) { | ||
for (auto& item : rss_row_ids) { | ||
if (item.row_id == -1) { | ||
continue; | ||
} | ||
DCHECK(_segments_rowid_map.find(item.segment_id) != _segments_rowid_map.end() && | ||
_segments_rowid_map[item.segment_id].size() > item.row_id); | ||
_segments_rowid_map[item.segment_id][item.row_id] = _cur_dst_segment_rowid++; | ||
} | ||
} | ||
|
||
// get destination RowLocation | ||
// return non-zero if the src RowLocation does not exist | ||
int get(const RowLocation& src) const { | ||
auto it = _segments_rowid_map.find(src.segment_id); | ||
if (it == _segments_rowid_map.end()) { | ||
return -1; | ||
} | ||
const auto& rowid_map = it->second; | ||
if (src.row_id >= rowid_map.size() || UINT32_MAX == rowid_map[src.row_id]) { | ||
return -1; | ||
} | ||
|
||
return rowid_map[src.row_id]; | ||
} | ||
|
||
private: | ||
// key: index indicates src segment. | ||
// value: index indicates row id of source segment, value indicates row id of destination | ||
// segment. UINT32_MAX indicates current row not exist. | ||
std::map<uint32_t, std::vector<uint32_t>> _segments_rowid_map; | ||
|
||
// dst rowset id | ||
RowsetId _rowst_id; | ||
|
||
// current rowid of dst segment | ||
std::uint32_t _cur_dst_segment_rowid = 0; | ||
}; | ||
|
||
} // namespace doris |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.