Skip to content

Commit 10a3ce3

Browse files
authored
BTreeIndex Stats iterator interface (#1949)
1 parent b051ac8 commit 10a3ce3

14 files changed

+504
-143
lines changed

ydb/core/tablet_flat/flat_part_btree_index_iter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ class TPartBtreeIndexIt : public IIndexIter {
264264
return State.back().EndRowId;
265265
}
266266

267-
bool HasKeyCells() const override {
267+
TPos GetKeyCellsCount() const override {
268268
Y_ABORT_UNLESS(IsLeaf());
269269
return State.back().BeginKey.Count();
270270
}

ydb/core/tablet_flat/flat_part_index_iter.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44
#include "flat_page_index.h"
55
#include "flat_part_index_iter_iface.h"
66
#include "flat_table_part.h"
7+
#include "flat_stat_part_group_iter_iface.h"
78
#include <ydb/library/yverify_stream/yverify_stream.h>
89

910

1011
namespace NKikimr::NTable {
1112

12-
class TPartIndexIt : public IIndexIter {
13+
class TPartIndexIt : public IIndexIter, public IStatsPartGroupIterator {
1314
public:
1415
using TCells = NPage::TCells;
1516
using TRecord = NPage::TIndex::TRecord;
@@ -26,6 +27,10 @@ class TPartIndexIt : public IIndexIter {
2627
, EndRowId(groupId.IsMain() && part->Stat.Rows ? part->Stat.Rows : Max<TRowId>())
2728
{ }
2829

30+
EReady Start() override {
31+
return Seek(0);
32+
}
33+
2934
EReady Seek(TRowId rowId) override {
3035
auto index = TryGetIndex();
3136
if (!index) {
@@ -123,10 +128,10 @@ class TPartIndexIt : public IIndexIter {
123128
: EndRowId;
124129
}
125130

126-
bool HasKeyCells() const override {
131+
TPos GetKeyCellsCount() const override {
127132
Y_ABORT_UNLESS(Index);
128133
Y_ABORT_UNLESS(Iter);
129-
return true;
134+
return GroupInfo.KeyTypes.size();
130135
}
131136

132137
TCell GetKeyCell(TPos index) const override {

ydb/core/tablet_flat/flat_part_index_iter_iface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace NKikimr::NTable {
2222
virtual TRowId GetRowId() const = 0;
2323
virtual TRowId GetNextRowId() const = 0;
2424

25-
virtual bool HasKeyCells() const = 0;
25+
virtual TPos GetKeyCellsCount() const = 0;
2626
virtual TCell GetKeyCell(TPos index) const = 0;
2727

2828
virtual ~IIndexIter() = default;

ydb/core/tablet_flat/flat_part_iter_multi.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -544,8 +544,8 @@ namespace NTable {
544544
return Exhausted();
545545
};
546546
case EReady::Data: {
547-
Y_DEBUG_ABORT_UNLESS(Index.HasKeyCells(), "Non-first page is expected to have key cells");
548-
if (Index.HasKeyCells()) {
547+
Y_DEBUG_ABORT_UNLESS(Index.GetKeyCellsCount(), "Non-first page is expected to have key cells");
548+
if (Index.GetKeyCellsCount()) {
549549
if (!checkIndex()) {
550550
// First row for the next RowId
551551
MaxVersion = TRowVersion::Max();
@@ -632,8 +632,8 @@ namespace NTable {
632632
}
633633

634634
// We need exact match on rowId, bail on larger values
635-
Y_DEBUG_ABORT_UNLESS(Index.GetRowId() == 0 || Index.HasKeyCells(), "Non-first page is expected to have key cells");
636-
if (Index.HasKeyCells()) {
635+
Y_DEBUG_ABORT_UNLESS(Index.GetRowId() == 0 || Index.GetKeyCellsCount(), "Non-first page is expected to have key cells");
636+
if (Index.GetKeyCellsCount()) {
637637
TRowId indexRowId = Index.GetKeyCell(0).AsValue<TRowId>();
638638
if (rowId < indexRowId) {
639639
// We cannot compute MaxVersion anyway as indexRowId row may be presented on the previous page
@@ -675,8 +675,8 @@ namespace NTable {
675675
return Terminate(ready);
676676
}
677677

678-
Y_DEBUG_ABORT_UNLESS(Index.HasKeyCells(), "Non-first page is expected to have key cells");
679-
if (Y_LIKELY(Index.HasKeyCells())) {
678+
Y_DEBUG_ABORT_UNLESS(Index.GetKeyCellsCount(), "Non-first page is expected to have key cells");
679+
if (Y_LIKELY(Index.GetKeyCellsCount())) {
680680
if (!checkIndex()) {
681681
// First row for the nextRowId
682682
MaxVersion = TRowVersion::Max();
@@ -706,7 +706,7 @@ namespace NTable {
706706
Data = Page->Begin();
707707
Y_ABORT_UNLESS(Data);
708708

709-
if (Index.HasKeyCells()) {
709+
if (Index.GetKeyCellsCount()) {
710710
// Must have rowId as we have checked index
711711
Y_ABORT_UNLESS(checkData() && RowVersion <= rowVersion, "Index and Data are out of sync");
712712

ydb/core/tablet_flat/flat_stat_part.h

Lines changed: 77 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
#pragma once
22

33
#include "flat_part_iface.h"
4-
#include "flat_part_index_iter.h"
54
#include "flat_part_laid.h"
65
#include "flat_page_frames.h"
7-
#include "util_basics.h"
6+
#include "flat_stat_part_group_iter_iface.h"
87

98
#include <library/cpp/containers/stack_vector/stack_vec.h>
109
#include <util/draft/holder_vector.h>
@@ -31,101 +30,108 @@ struct TPartDataStats {
3130
};
3231

3332
// Iterates over part index and calculates total row count and data size
34-
// NOTE: we don't know row count for the last page so we also ignore its size
35-
// This shouldn't be a problem for big parts with many pages
36-
// This iterator skipps pages that are screened. Currently the logic is simple:
33+
// This iterator skips pages that are screened. Currently the logic is simple:
3734
// if page start key is screened then we assume that the whole previous page is screened
3835
// if page start key is not screened then the whole previous page is added to stats
39-
class TScreenedPartIndexIterator {
36+
class TStatsScreenedPartIterator {
37+
using TGroupId = NPage::TGroupId;
38+
using TFrames = NPage::TFrames;
39+
4040
public:
41-
TScreenedPartIndexIterator(TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyColumns,
42-
TIntrusiveConstPtr<NPage::TFrames> small, TIntrusiveConstPtr<NPage::TFrames> large)
41+
TStatsScreenedPartIterator(TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyDefaults,
42+
TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large)
4343
: Part(std::move(partView.Part))
44-
, Pos(Part.Get(), env, {})
45-
, KeyColumns(std::move(keyColumns))
44+
, KeyDefaults(std::move(keyDefaults))
45+
, Groups(::Reserve(Part->GroupsCount))
46+
, HistoricGroups(::Reserve(Part->HistoricGroupsCount))
4647
, Screen(std::move(partView.Screen))
4748
, Small(std::move(small))
4849
, Large(std::move(large))
4950
, CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0, 1))
5051
{
51-
AltGroups.reserve(Part->GroupsCount - 1);
52-
for (ui32 group : xrange(size_t(1), Part->GroupsCount)) {
53-
AltGroups.emplace_back(Part.Get(), env, NPage::TGroupId(group));
52+
for (ui32 groupIndex : xrange(Part->GroupsCount)) {
53+
Groups.push_back(CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex)));
5454
}
55-
for (ui32 group : xrange(Part->HistoricGroupsCount)) {
56-
HistoryGroups.emplace_back(Part.Get(), env, NPage::TGroupId(group, true));
55+
for (ui32 groupIndex : xrange(Part->HistoricGroupsCount)) {
56+
HistoricGroups.push_back(CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex, true)));
5757
}
5858
}
5959

6060
EReady Start() {
61-
auto ready = Pos.Seek(0);
62-
if (ready != EReady::Page) {
63-
FillKey();
64-
}
61+
auto ready = EReady::Data;
6562

66-
for (auto& g : AltGroups) {
67-
if (g.Pos.Seek(0) == EReady::Page) {
63+
for (auto& iter : Groups) {
64+
if (iter->Start() == EReady::Page) {
6865
ready = EReady::Page;
6966
}
7067
}
71-
for (auto& g : HistoryGroups) {
72-
if (g.Pos.Seek(0) == EReady::Page) {
68+
for (auto& iter : HistoricGroups) {
69+
if (iter->Start() == EReady::Page) {
7370
ready = EReady::Page;
7471
}
7572
}
7673

74+
if (ready != EReady::Page) {
75+
FillKey();
76+
}
77+
7778
return ready;
7879
}
7980

8081
bool IsValid() const {
81-
return Pos.IsValid();
82+
return Groups[0]->IsValid();
8283
}
8384

8485
EReady Next(TPartDataStats& stats) {
8586
Y_ABORT_UNLESS(IsValid());
8687

87-
auto curPageId = Pos.GetPageId();
88-
LastRowId = Pos.GetRowId();
89-
auto ready = Pos.Next();
88+
auto curPageId = Groups[0]->GetPageId();
89+
LastRowId = Groups[0]->GetRowId();
90+
auto ready = Groups[0]->Next();
9091
if (ready == EReady::Page) {
9192
return ready;
9293
}
93-
ui64 rowCount = IncludedRows(GetLastRowId(), GetCurrentRowId());
94-
stats.RowCount += rowCount;
9594

96-
if (rowCount) AddPageSize(stats.DataSize, curPageId);
97-
TRowId nextRowId = ready == EReady::Data ? Pos.GetRowId() : Max<TRowId>();
98-
for (auto& g : AltGroups) {
99-
while (g.Pos.IsValid() && g.Pos.GetRowId() < nextRowId) {
95+
ui64 rowCount = CountUnscreenedRows(GetLastRowId(), GetCurrentRowId());
96+
stats.RowCount += rowCount;
97+
if (rowCount) {
98+
AddPageSize(stats.DataSize, curPageId, TGroupId(0));
99+
}
100+
101+
TRowId nextRowId = ready == EReady::Data ? Groups[0]->GetRowId() : Max<TRowId>();
102+
for (auto groupIndex : xrange<ui32>(1, Groups.size())) {
103+
while (Groups[groupIndex]->IsValid() && Groups[groupIndex]->GetRowId() < nextRowId) {
100104
// eagerly include all data up to the next row id
101-
if (rowCount) AddPageSize(stats.DataSize, g.Pos.GetPageId(), g.GroupId);
102-
if (g.Pos.Next() == EReady::Page) {
105+
if (rowCount) {
106+
AddPageSize(stats.DataSize, Groups[groupIndex]->GetPageId(), TGroupId(groupIndex));
107+
}
108+
if (Groups[groupIndex]->Next() == EReady::Page) {
103109
ready = EReady::Page;
104110
break;
105111
}
106112
}
107113
}
108114

109-
// Include mvcc data
110-
if (!HistoryGroups.empty()) {
111-
auto& h = HistoryGroups[0];
112-
const auto& hscheme = Part->Scheme->HistoryGroup;
113-
Y_DEBUG_ABORT_UNLESS(hscheme.ColsKeyIdx.size() == 3);
114-
while (h.Pos.IsValid() && h.Pos.GetRecord()->Cell(hscheme.ColsKeyIdx[0]).AsValue<TRowId>() < nextRowId) {
115+
if (HistoricGroups) {
116+
Y_DEBUG_ABORT_UNLESS(Part->Scheme->HistoryGroup.ColsKeyIdx.size() == 3);
117+
while (HistoricGroups[0]->IsValid() && (!HistoricGroups[0]->GetKeyCellsCount() || HistoricGroups[0]->GetKeyCell(0).AsValue<TRowId>() < nextRowId)) {
115118
// eagerly include all history up to the next row id
116-
if (rowCount) AddPageSize(stats.DataSize, h.Pos.GetPageId(), h.GroupId);
117-
if (h.Pos.Next() == EReady::Page) {
119+
if (rowCount) {
120+
AddPageSize(stats.DataSize, HistoricGroups[0]->GetPageId(), TGroupId(0, true));
121+
}
122+
if (HistoricGroups[0]->Next() == EReady::Page) {
118123
ready = EReady::Page;
119124
break;
120125
}
121126
}
122-
TRowId nextHistoryRowId = h.Pos.IsValid() ? h.Pos.GetRowId() : Max<TRowId>();
123-
for (size_t index = 1; index < HistoryGroups.size(); ++index) {
124-
auto& g = HistoryGroups[index];
125-
while (g.Pos.IsValid() && g.Pos.GetRowId() < nextHistoryRowId) {
127+
TRowId nextHistoryRowId = HistoricGroups[0]->IsValid() ? HistoricGroups[0]->GetRowId() : Max<TRowId>();
128+
for (auto groupIndex : xrange<ui32>(1, Groups.size())) {
129+
while (HistoricGroups[groupIndex]->IsValid() && HistoricGroups[groupIndex]->GetRowId() < nextHistoryRowId) {
126130
// eagerly include all data up to the next row id
127-
if (rowCount) AddPageSize(stats.DataSize, g.Pos.GetPageId(), g.GroupId);
128-
if (g.Pos.Next() == EReady::Page) {
131+
if (rowCount) {
132+
AddPageSize(stats.DataSize, HistoricGroups[groupIndex]->GetPageId(), TGroupId(groupIndex, true));
133+
}
134+
if (HistoricGroups[groupIndex]->Next() == EReady::Page) {
129135
ready = EReady::Page;
130136
break;
131137
}
@@ -143,12 +149,13 @@ class TScreenedPartIndexIterator {
143149
}
144150

145151
FillKey();
152+
146153
return ready;
147154
}
148155

149156
TDbTupleRef GetCurrentKey() const {
150-
Y_ABORT_UNLESS(KeyColumns->BasicTypes().size() == CurrentKey.size());
151-
return TDbTupleRef(KeyColumns->BasicTypes().data(), CurrentKey.data(), CurrentKey.size());
157+
Y_ABORT_UNLESS(KeyDefaults->BasicTypes().size() == CurrentKey.size());
158+
return TDbTupleRef(KeyDefaults->BasicTypes().data(), CurrentKey.data(), CurrentKey.size());
152159
}
153160

154161
private:
@@ -158,23 +165,22 @@ class TScreenedPartIndexIterator {
158165

159166
ui64 GetCurrentRowId() const {
160167
if (IsValid()) {
161-
return Pos.GetRowId();
168+
return Groups[0]->GetRowId();
162169
}
163-
if (TRowId endRowId = Pos.GetEndRowId(); endRowId != Max<TRowId>()) {
170+
if (TRowId endRowId = Groups[0]->GetEndRowId(); endRowId != Max<TRowId>()) {
164171
// This would include the last page rows when known
165172
return endRowId;
166173
}
167174
return LastRowId;
168175
}
169176

170-
private:
171-
void AddPageSize(TPartDataSize& stats, TPageId pageId, NPage::TGroupId groupId = { }) const {
177+
void AddPageSize(TPartDataSize& stats, TPageId pageId, TGroupId groupId) const {
178+
// TODO: move to IStatsPartGroupIterator
172179
ui64 size = Part->GetPageSize(pageId, groupId);
173180
ui8 channel = Part->GetPageChannel(pageId, groupId);
174181
stats.Add(size, channel);
175182
}
176183

177-
private:
178184
void FillKey() {
179185
CurrentKey.clear();
180186

@@ -183,18 +189,19 @@ class TScreenedPartIndexIterator {
183189

184190
ui32 keyIdx = 0;
185191
// Add columns that are present in the part
186-
for (;keyIdx < Part->Scheme->Groups[0].KeyTypes.size(); ++keyIdx) {
187-
CurrentKey.push_back(Pos.GetRecord()->Cell(Part->Scheme->Groups[0].ColsKeyIdx[keyIdx]));
192+
if (ui32 keyCellsCount = Groups[0]->GetKeyCellsCount()) {
193+
for (;keyIdx < keyCellsCount; ++keyIdx) {
194+
CurrentKey.push_back(Groups[0]->GetKeyCell(keyIdx));
195+
}
188196
}
189197

190198
// Extend with default values if needed
191-
for (;keyIdx < KeyColumns->Defs.size(); ++keyIdx) {
192-
CurrentKey.push_back(KeyColumns->Defs[keyIdx]);
199+
for (;keyIdx < KeyDefaults->Defs.size(); ++keyIdx) {
200+
CurrentKey.push_back(KeyDefaults->Defs[keyIdx]);
193201
}
194202
}
195203

196-
private:
197-
ui64 IncludedRows(TRowId beginRowId, TRowId endRowId) noexcept {
204+
ui64 CountUnscreenedRows(TRowId beginRowId, TRowId endRowId) noexcept {
198205
if (!Screen) {
199206
// Include all rows
200207
return endRowId - beginRowId;
@@ -222,8 +229,7 @@ class TScreenedPartIndexIterator {
222229
return rowCount;
223230
}
224231

225-
private:
226-
void AddBlobsSize(TPartDataSize& stats, const NPage::TFrames* frames, ELargeObj lob, ui32 &prevPage) noexcept {
232+
void AddBlobsSize(TPartDataSize& stats, const TFrames* frames, ELargeObj lob, ui32 &prevPage) noexcept {
227233
const auto row = GetLastRowId();
228234
const auto end = GetCurrentRowId();
229235

@@ -235,35 +241,24 @@ class TScreenedPartIndexIterator {
235241
stats.Add(rel.Size, channel);
236242
++prevPage;
237243
} else if (!rel.IsHead()) {
238-
Y_ABORT("Got unaligned NPage::TFrames head record");
244+
Y_ABORT("Got unaligned TFrames head record");
239245
} else {
240246
break;
241247
}
242248
}
243249
}
244250

245-
private:
246-
struct TGroupState {
247-
TPartIndexIt Pos;
248-
const NPage::TGroupId GroupId;
249-
250-
TGroupState(const TPart* part, IPages* env, NPage::TGroupId groupId)
251-
: Pos(part, env, groupId)
252-
, GroupId(groupId)
253-
{ }
254-
};
255-
256251
private:
257252
TIntrusiveConstPtr<TPart> Part;
258-
TPartIndexIt Pos;
259-
TIntrusiveConstPtr<TKeyCellDefaults> KeyColumns;
253+
TIntrusiveConstPtr<TKeyCellDefaults> KeyDefaults;
260254
TSmallVec<TCell> CurrentKey;
261255
ui64 LastRowId = 0;
262-
TSmallVec<TGroupState> AltGroups;
263-
TSmallVec<TGroupState> HistoryGroups;
256+
257+
TVector<THolder<IStatsPartGroupIterator>> Groups;
258+
TVector<THolder<IStatsPartGroupIterator>> HistoricGroups;
264259
TIntrusiveConstPtr<TScreen> Screen;
265-
TIntrusiveConstPtr<NPage::TFrames> Small; /* Inverted index for small blobs */
266-
TIntrusiveConstPtr<NPage::TFrames> Large; /* Inverted index for large blobs */
260+
TIntrusiveConstPtr<TFrames> Small; /* Inverted index for small blobs */
261+
TIntrusiveConstPtr<TFrames> Large; /* Inverted index for large blobs */
267262
size_t CurrentHoleIdx = 0;
268263
TScreen::THole CurrentHole;
269264
ui32 PrevSmallPage = 0;

0 commit comments

Comments
 (0)