1
1
#pragma once
2
2
3
3
#include " flat_part_iface.h"
4
- #include " flat_part_index_iter.h"
5
4
#include " flat_part_laid.h"
6
5
#include " flat_page_frames.h"
7
- #include " util_basics .h"
6
+ #include " flat_stat_part_group_iter_iface .h"
8
7
9
8
#include < library/cpp/containers/stack_vector/stack_vec.h>
10
9
#include < util/draft/holder_vector.h>
@@ -31,101 +30,108 @@ struct TPartDataStats {
31
30
};
32
31
33
32
// Iterates over part index and calculates total row count and data size
34
- // NOTE: we don't know row count for the last page so we also ignore its size
35
- // This shouldn't be a problem for big parts with many pages
36
- // This iterator skipps pages that are screened. Currently the logic is simple:
33
+ // This iterator skips pages that are screened. Currently the logic is simple:
37
34
// if page start key is screened then we assume that the whole previous page is screened
38
35
// if page start key is not screened then the whole previous page is added to stats
39
- class TScreenedPartIndexIterator {
36
+ class TStatsScreenedPartIterator {
37
+ using TGroupId = NPage::TGroupId;
38
+ using TFrames = NPage::TFrames;
39
+
40
40
public:
41
- TScreenedPartIndexIterator (TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyColumns ,
42
- TIntrusiveConstPtr<NPage:: TFrames> small, TIntrusiveConstPtr<NPage:: TFrames> large)
41
+ TStatsScreenedPartIterator (TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyDefaults ,
42
+ TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large)
43
43
: Part(std::move(partView.Part))
44
- , Pos(Part.Get(), env, {})
45
- , KeyColumns(std::move(keyColumns))
44
+ , KeyDefaults(std::move(keyDefaults))
45
+ , Groups(::Reserve(Part->GroupsCount))
46
+ , HistoricGroups(::Reserve(Part->HistoricGroupsCount))
46
47
, Screen(std::move(partView.Screen))
47
48
, Small(std::move(small))
48
49
, Large(std::move(large))
49
50
, CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0 , 1 ))
50
51
{
51
- AltGroups.reserve (Part->GroupsCount - 1 );
52
- for (ui32 group : xrange (size_t (1 ), Part->GroupsCount )) {
53
- AltGroups.emplace_back (Part.Get (), env, NPage::TGroupId (group));
52
+ for (ui32 groupIndex : xrange (Part->GroupsCount )) {
53
+ Groups.push_back (CreateStatsPartGroupIterator (Part.Get (), env, TGroupId (groupIndex)));
54
54
}
55
- for (ui32 group : xrange (Part->HistoricGroupsCount )) {
56
- HistoryGroups. emplace_back ( Part.Get (), env, NPage:: TGroupId (group , true ));
55
+ for (ui32 groupIndex : xrange (Part->HistoricGroupsCount )) {
56
+ HistoricGroups. push_back ( CreateStatsPartGroupIterator ( Part.Get (), env, TGroupId (groupIndex , true ) ));
57
57
}
58
58
}
59
59
60
60
EReady Start () {
61
- auto ready = Pos.Seek (0 );
62
- if (ready != EReady::Page) {
63
- FillKey ();
64
- }
61
+ auto ready = EReady::Data;
65
62
66
- for (auto & g : AltGroups ) {
67
- if (g. Pos . Seek ( 0 ) == EReady::Page) {
63
+ for (auto & iter : Groups ) {
64
+ if (iter-> Start ( ) == EReady::Page) {
68
65
ready = EReady::Page;
69
66
}
70
67
}
71
- for (auto & g : HistoryGroups ) {
72
- if (g. Pos . Seek ( 0 ) == EReady::Page) {
68
+ for (auto & iter : HistoricGroups ) {
69
+ if (iter-> Start ( ) == EReady::Page) {
73
70
ready = EReady::Page;
74
71
}
75
72
}
76
73
74
+ if (ready != EReady::Page) {
75
+ FillKey ();
76
+ }
77
+
77
78
return ready;
78
79
}
79
80
80
81
bool IsValid () const {
81
- return Pos. IsValid ();
82
+ return Groups[ 0 ]-> IsValid ();
82
83
}
83
84
84
85
EReady Next (TPartDataStats& stats) {
85
86
Y_ABORT_UNLESS (IsValid ());
86
87
87
- auto curPageId = Pos. GetPageId ();
88
- LastRowId = Pos. GetRowId ();
89
- auto ready = Pos. Next ();
88
+ auto curPageId = Groups[ 0 ]-> GetPageId ();
89
+ LastRowId = Groups[ 0 ]-> GetRowId ();
90
+ auto ready = Groups[ 0 ]-> Next ();
90
91
if (ready == EReady::Page) {
91
92
return ready;
92
93
}
93
- ui64 rowCount = IncludedRows (GetLastRowId (), GetCurrentRowId ());
94
- stats.RowCount += rowCount;
95
94
96
- if (rowCount) AddPageSize (stats.DataSize , curPageId);
97
- TRowId nextRowId = ready == EReady::Data ? Pos.GetRowId () : Max<TRowId>();
98
- for (auto & g : AltGroups) {
99
- while (g.Pos .IsValid () && g.Pos .GetRowId () < nextRowId) {
95
+ ui64 rowCount = CountUnscreenedRows (GetLastRowId (), GetCurrentRowId ());
96
+ stats.RowCount += rowCount;
97
+ if (rowCount) {
98
+ AddPageSize (stats.DataSize , curPageId, TGroupId (0 ));
99
+ }
100
+
101
+ TRowId nextRowId = ready == EReady::Data ? Groups[0 ]->GetRowId () : Max<TRowId>();
102
+ for (auto groupIndex : xrange<ui32>(1 , Groups.size ())) {
103
+ while (Groups[groupIndex]->IsValid () && Groups[groupIndex]->GetRowId () < nextRowId) {
100
104
// eagerly include all data up to the next row id
101
- if (rowCount) AddPageSize (stats.DataSize , g.Pos .GetPageId (), g.GroupId );
102
- if (g.Pos .Next () == EReady::Page) {
105
+ if (rowCount) {
106
+ AddPageSize (stats.DataSize , Groups[groupIndex]->GetPageId (), TGroupId (groupIndex));
107
+ }
108
+ if (Groups[groupIndex]->Next () == EReady::Page) {
103
109
ready = EReady::Page;
104
110
break ;
105
111
}
106
112
}
107
113
}
108
114
109
- // Include mvcc data
110
- if (!HistoryGroups.empty ()) {
111
- auto & h = HistoryGroups[0 ];
112
- const auto & hscheme = Part->Scheme ->HistoryGroup ;
113
- Y_DEBUG_ABORT_UNLESS (hscheme.ColsKeyIdx .size () == 3 );
114
- while (h.Pos .IsValid () && h.Pos .GetRecord ()->Cell (hscheme.ColsKeyIdx [0 ]).AsValue <TRowId>() < nextRowId) {
115
+ if (HistoricGroups) {
116
+ Y_DEBUG_ABORT_UNLESS (Part->Scheme ->HistoryGroup .ColsKeyIdx .size () == 3 );
117
+ while (HistoricGroups[0 ]->IsValid () && (!HistoricGroups[0 ]->GetKeyCellsCount () || HistoricGroups[0 ]->GetKeyCell (0 ).AsValue <TRowId>() < nextRowId)) {
115
118
// eagerly include all history up to the next row id
116
- if (rowCount) AddPageSize (stats.DataSize , h.Pos .GetPageId (), h.GroupId );
117
- if (h.Pos .Next () == EReady::Page) {
119
+ if (rowCount) {
120
+ AddPageSize (stats.DataSize , HistoricGroups[0 ]->GetPageId (), TGroupId (0 , true ));
121
+ }
122
+ if (HistoricGroups[0 ]->Next () == EReady::Page) {
118
123
ready = EReady::Page;
119
124
break ;
120
125
}
121
126
}
122
- TRowId nextHistoryRowId = h.Pos .IsValid () ? h.Pos .GetRowId () : Max<TRowId>();
123
- for (size_t index = 1 ; index < HistoryGroups.size (); ++index) {
124
- auto & g = HistoryGroups[index];
125
- while (g.Pos .IsValid () && g.Pos .GetRowId () < nextHistoryRowId) {
127
+ TRowId nextHistoryRowId = HistoricGroups[0 ]->IsValid () ? HistoricGroups[0 ]->GetRowId () : Max<TRowId>();
128
+ for (auto groupIndex : xrange<ui32>(1 , Groups.size ())) {
129
+ while (HistoricGroups[groupIndex]->IsValid () && HistoricGroups[groupIndex]->GetRowId () < nextHistoryRowId) {
126
130
// eagerly include all data up to the next row id
127
- if (rowCount) AddPageSize (stats.DataSize , g.Pos .GetPageId (), g.GroupId );
128
- if (g.Pos .Next () == EReady::Page) {
131
+ if (rowCount) {
132
+ AddPageSize (stats.DataSize , HistoricGroups[groupIndex]->GetPageId (), TGroupId (groupIndex, true ));
133
+ }
134
+ if (HistoricGroups[groupIndex]->Next () == EReady::Page) {
129
135
ready = EReady::Page;
130
136
break ;
131
137
}
@@ -143,12 +149,13 @@ class TScreenedPartIndexIterator {
143
149
}
144
150
145
151
FillKey ();
152
+
146
153
return ready;
147
154
}
148
155
149
156
TDbTupleRef GetCurrentKey () const {
150
- Y_ABORT_UNLESS (KeyColumns ->BasicTypes ().size () == CurrentKey.size ());
151
- return TDbTupleRef (KeyColumns ->BasicTypes ().data (), CurrentKey.data (), CurrentKey.size ());
157
+ Y_ABORT_UNLESS (KeyDefaults ->BasicTypes ().size () == CurrentKey.size ());
158
+ return TDbTupleRef (KeyDefaults ->BasicTypes ().data (), CurrentKey.data (), CurrentKey.size ());
152
159
}
153
160
154
161
private:
@@ -158,23 +165,22 @@ class TScreenedPartIndexIterator {
158
165
159
166
ui64 GetCurrentRowId () const {
160
167
if (IsValid ()) {
161
- return Pos. GetRowId ();
168
+ return Groups[ 0 ]-> GetRowId ();
162
169
}
163
- if (TRowId endRowId = Pos. GetEndRowId (); endRowId != Max<TRowId>()) {
170
+ if (TRowId endRowId = Groups[ 0 ]-> GetEndRowId (); endRowId != Max<TRowId>()) {
164
171
// This would include the last page rows when known
165
172
return endRowId;
166
173
}
167
174
return LastRowId;
168
175
}
169
176
170
- private:
171
- void AddPageSize (TPartDataSize& stats, TPageId pageId, NPage::TGroupId groupId = { }) const {
177
+ void AddPageSize (TPartDataSize& stats, TPageId pageId, TGroupId groupId) const {
178
+ // TODO: move to IStatsPartGroupIterator
172
179
ui64 size = Part->GetPageSize (pageId, groupId);
173
180
ui8 channel = Part->GetPageChannel (pageId, groupId);
174
181
stats.Add (size, channel);
175
182
}
176
183
177
- private:
178
184
void FillKey () {
179
185
CurrentKey.clear ();
180
186
@@ -183,18 +189,19 @@ class TScreenedPartIndexIterator {
183
189
184
190
ui32 keyIdx = 0 ;
185
191
// Add columns that are present in the part
186
- for (;keyIdx < Part->Scheme ->Groups [0 ].KeyTypes .size (); ++keyIdx) {
187
- CurrentKey.push_back (Pos.GetRecord ()->Cell (Part->Scheme ->Groups [0 ].ColsKeyIdx [keyIdx]));
192
+ if (ui32 keyCellsCount = Groups[0 ]->GetKeyCellsCount ()) {
193
+ for (;keyIdx < keyCellsCount; ++keyIdx) {
194
+ CurrentKey.push_back (Groups[0 ]->GetKeyCell (keyIdx));
195
+ }
188
196
}
189
197
190
198
// Extend with default values if needed
191
- for (;keyIdx < KeyColumns ->Defs .size (); ++keyIdx) {
192
- CurrentKey.push_back (KeyColumns ->Defs [keyIdx]);
199
+ for (;keyIdx < KeyDefaults ->Defs .size (); ++keyIdx) {
200
+ CurrentKey.push_back (KeyDefaults ->Defs [keyIdx]);
193
201
}
194
202
}
195
203
196
- private:
197
- ui64 IncludedRows (TRowId beginRowId, TRowId endRowId) noexcept {
204
+ ui64 CountUnscreenedRows (TRowId beginRowId, TRowId endRowId) noexcept {
198
205
if (!Screen) {
199
206
// Include all rows
200
207
return endRowId - beginRowId;
@@ -222,8 +229,7 @@ class TScreenedPartIndexIterator {
222
229
return rowCount;
223
230
}
224
231
225
- private:
226
- void AddBlobsSize (TPartDataSize& stats, const NPage::TFrames* frames, ELargeObj lob, ui32 &prevPage) noexcept {
232
+ void AddBlobsSize (TPartDataSize& stats, const TFrames* frames, ELargeObj lob, ui32 &prevPage) noexcept {
227
233
const auto row = GetLastRowId ();
228
234
const auto end = GetCurrentRowId ();
229
235
@@ -235,35 +241,24 @@ class TScreenedPartIndexIterator {
235
241
stats.Add (rel.Size , channel);
236
242
++prevPage;
237
243
} else if (!rel.IsHead ()) {
238
- Y_ABORT (" Got unaligned NPage:: TFrames head record" );
244
+ Y_ABORT (" Got unaligned TFrames head record" );
239
245
} else {
240
246
break ;
241
247
}
242
248
}
243
249
}
244
250
245
- private:
246
- struct TGroupState {
247
- TPartIndexIt Pos;
248
- const NPage::TGroupId GroupId;
249
-
250
- TGroupState (const TPart* part, IPages* env, NPage::TGroupId groupId)
251
- : Pos(part, env, groupId)
252
- , GroupId(groupId)
253
- { }
254
- };
255
-
256
251
private:
257
252
TIntrusiveConstPtr<TPart> Part;
258
- TPartIndexIt Pos;
259
- TIntrusiveConstPtr<TKeyCellDefaults> KeyColumns;
253
+ TIntrusiveConstPtr<TKeyCellDefaults> KeyDefaults;
260
254
TSmallVec<TCell> CurrentKey;
261
255
ui64 LastRowId = 0 ;
262
- TSmallVec<TGroupState> AltGroups;
263
- TSmallVec<TGroupState> HistoryGroups;
256
+
257
+ TVector<THolder<IStatsPartGroupIterator>> Groups;
258
+ TVector<THolder<IStatsPartGroupIterator>> HistoricGroups;
264
259
TIntrusiveConstPtr<TScreen> Screen;
265
- TIntrusiveConstPtr<NPage:: TFrames> Small; /* Inverted index for small blobs */
266
- TIntrusiveConstPtr<NPage:: TFrames> Large; /* Inverted index for large blobs */
260
+ TIntrusiveConstPtr<TFrames> Small; /* Inverted index for small blobs */
261
+ TIntrusiveConstPtr<TFrames> Large; /* Inverted index for large blobs */
267
262
size_t CurrentHoleIdx = 0 ;
268
263
TScreen::THole CurrentHole;
269
264
ui32 PrevSmallPage = 0 ;
0 commit comments