11#pragma once
22
33#include " flat_part_iface.h"
4- #include " flat_part_index_iter.h"
54#include " flat_part_laid.h"
65#include " flat_page_frames.h"
7- #include " util_basics .h"
6+ #include " flat_stat_part_group_iter_iface .h"
87
98#include < library/cpp/containers/stack_vector/stack_vec.h>
109#include < util/draft/holder_vector.h>
@@ -31,101 +30,108 @@ struct TPartDataStats {
3130};
3231
3332// Iterates over part index and calculates total row count and data size
34- // NOTE: we don't know row count for the last page so we also ignore its size
35- // This shouldn't be a problem for big parts with many pages
36- // This iterator skipps pages that are screened. Currently the logic is simple:
33+ // This iterator skips pages that are screened. Currently the logic is simple:
3734// if page start key is screened then we assume that the whole previous page is screened
3835// if page start key is not screened then the whole previous page is added to stats
39- class TScreenedPartIndexIterator {
36+ class TStatsScreenedPartIterator {
37+ using TGroupId = NPage::TGroupId;
38+ using TFrames = NPage::TFrames;
39+
4040public:
41- TScreenedPartIndexIterator (TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyColumns ,
42- TIntrusiveConstPtr<NPage:: TFrames> small, TIntrusiveConstPtr<NPage:: TFrames> large)
41+ TStatsScreenedPartIterator (TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyDefaults ,
42+ TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large)
4343 : Part(std::move(partView.Part))
44- , Pos(Part.Get(), env, {})
45- , KeyColumns(std::move(keyColumns))
44+ , KeyDefaults(std::move(keyDefaults))
45+ , Groups(::Reserve(Part->GroupsCount))
46+ , HistoricGroups(::Reserve(Part->HistoricGroupsCount))
4647 , Screen(std::move(partView.Screen))
4748 , Small(std::move(small))
4849 , Large(std::move(large))
4950 , CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0 , 1 ))
5051 {
51- AltGroups.reserve (Part->GroupsCount - 1 );
52- for (ui32 group : xrange (size_t (1 ), Part->GroupsCount )) {
53- AltGroups.emplace_back (Part.Get (), env, NPage::TGroupId (group));
52+ for (ui32 groupIndex : xrange (Part->GroupsCount )) {
53+ Groups.push_back (CreateStatsPartGroupIterator (Part.Get (), env, TGroupId (groupIndex)));
5454 }
55- for (ui32 group : xrange (Part->HistoricGroupsCount )) {
56- HistoryGroups. emplace_back ( Part.Get (), env, NPage:: TGroupId (group , true ));
55+ for (ui32 groupIndex : xrange (Part->HistoricGroupsCount )) {
56+ HistoricGroups. push_back ( CreateStatsPartGroupIterator ( Part.Get (), env, TGroupId (groupIndex , true ) ));
5757 }
5858 }
5959
6060 EReady Start () {
61- auto ready = Pos.Seek (0 );
62- if (ready != EReady::Page) {
63- FillKey ();
64- }
61+ auto ready = EReady::Data;
6562
66- for (auto & g : AltGroups ) {
67- if (g. Pos . Seek ( 0 ) == EReady::Page) {
63+ for (auto & iter : Groups ) {
64+ if (iter-> Start ( ) == EReady::Page) {
6865 ready = EReady::Page;
6966 }
7067 }
71- for (auto & g : HistoryGroups ) {
72- if (g. Pos . Seek ( 0 ) == EReady::Page) {
68+ for (auto & iter : HistoricGroups ) {
69+ if (iter-> Start ( ) == EReady::Page) {
7370 ready = EReady::Page;
7471 }
7572 }
7673
74+ if (ready != EReady::Page) {
75+ FillKey ();
76+ }
77+
7778 return ready;
7879 }
7980
8081 bool IsValid () const {
81- return Pos. IsValid ();
82+ return Groups[ 0 ]-> IsValid ();
8283 }
8384
8485 EReady Next (TPartDataStats& stats) {
8586 Y_ABORT_UNLESS (IsValid ());
8687
87- auto curPageId = Pos. GetPageId ();
88- LastRowId = Pos. GetRowId ();
89- auto ready = Pos. Next ();
88+ auto curPageId = Groups[ 0 ]-> GetPageId ();
89+ LastRowId = Groups[ 0 ]-> GetRowId ();
90+ auto ready = Groups[ 0 ]-> Next ();
9091 if (ready == EReady::Page) {
9192 return ready;
9293 }
93- ui64 rowCount = IncludedRows (GetLastRowId (), GetCurrentRowId ());
94- stats.RowCount += rowCount;
9594
96- if (rowCount) AddPageSize (stats.DataSize , curPageId);
97- TRowId nextRowId = ready == EReady::Data ? Pos.GetRowId () : Max<TRowId>();
98- for (auto & g : AltGroups) {
99- while (g.Pos .IsValid () && g.Pos .GetRowId () < nextRowId) {
95+ ui64 rowCount = CountUnscreenedRows (GetLastRowId (), GetCurrentRowId ());
96+ stats.RowCount += rowCount;
97+ if (rowCount) {
98+ AddPageSize (stats.DataSize , curPageId, TGroupId (0 ));
99+ }
100+
101+ TRowId nextRowId = ready == EReady::Data ? Groups[0 ]->GetRowId () : Max<TRowId>();
102+ for (auto groupIndex : xrange<ui32>(1 , Groups.size ())) {
103+ while (Groups[groupIndex]->IsValid () && Groups[groupIndex]->GetRowId () < nextRowId) {
100104 // eagerly include all data up to the next row id
101- if (rowCount) AddPageSize (stats.DataSize , g.Pos .GetPageId (), g.GroupId );
102- if (g.Pos .Next () == EReady::Page) {
105+ if (rowCount) {
106+ AddPageSize (stats.DataSize , Groups[groupIndex]->GetPageId (), TGroupId (groupIndex));
107+ }
108+ if (Groups[groupIndex]->Next () == EReady::Page) {
103109 ready = EReady::Page;
104110 break ;
105111 }
106112 }
107113 }
108114
109- // Include mvcc data
110- if (!HistoryGroups.empty ()) {
111- auto & h = HistoryGroups[0 ];
112- const auto & hscheme = Part->Scheme ->HistoryGroup ;
113- Y_DEBUG_ABORT_UNLESS (hscheme.ColsKeyIdx .size () == 3 );
114- while (h.Pos .IsValid () && h.Pos .GetRecord ()->Cell (hscheme.ColsKeyIdx [0 ]).AsValue <TRowId>() < nextRowId) {
115+ if (HistoricGroups) {
116+ Y_DEBUG_ABORT_UNLESS (Part->Scheme ->HistoryGroup .ColsKeyIdx .size () == 3 );
117+ while (HistoricGroups[0 ]->IsValid () && (!HistoricGroups[0 ]->GetKeyCellsCount () || HistoricGroups[0 ]->GetKeyCell (0 ).AsValue <TRowId>() < nextRowId)) {
115118 // eagerly include all history up to the next row id
116- if (rowCount) AddPageSize (stats.DataSize , h.Pos .GetPageId (), h.GroupId );
117- if (h.Pos .Next () == EReady::Page) {
119+ if (rowCount) {
120+ AddPageSize (stats.DataSize , HistoricGroups[0 ]->GetPageId (), TGroupId (0 , true ));
121+ }
122+ if (HistoricGroups[0 ]->Next () == EReady::Page) {
118123 ready = EReady::Page;
119124 break ;
120125 }
121126 }
122- TRowId nextHistoryRowId = h.Pos .IsValid () ? h.Pos .GetRowId () : Max<TRowId>();
123- for (size_t index = 1 ; index < HistoryGroups.size (); ++index) {
124- auto & g = HistoryGroups[index];
125- while (g.Pos .IsValid () && g.Pos .GetRowId () < nextHistoryRowId) {
127+ TRowId nextHistoryRowId = HistoricGroups[0 ]->IsValid () ? HistoricGroups[0 ]->GetRowId () : Max<TRowId>();
128+ for (auto groupIndex : xrange<ui32>(1 , Groups.size ())) {
129+ while (HistoricGroups[groupIndex]->IsValid () && HistoricGroups[groupIndex]->GetRowId () < nextHistoryRowId) {
126130 // eagerly include all data up to the next row id
127- if (rowCount) AddPageSize (stats.DataSize , g.Pos .GetPageId (), g.GroupId );
128- if (g.Pos .Next () == EReady::Page) {
131+ if (rowCount) {
132+ AddPageSize (stats.DataSize , HistoricGroups[groupIndex]->GetPageId (), TGroupId (groupIndex, true ));
133+ }
134+ if (HistoricGroups[groupIndex]->Next () == EReady::Page) {
129135 ready = EReady::Page;
130136 break ;
131137 }
@@ -143,12 +149,13 @@ class TScreenedPartIndexIterator {
143149 }
144150
145151 FillKey ();
152+
146153 return ready;
147154 }
148155
149156 TDbTupleRef GetCurrentKey () const {
150- Y_ABORT_UNLESS (KeyColumns ->BasicTypes ().size () == CurrentKey.size ());
151- return TDbTupleRef (KeyColumns ->BasicTypes ().data (), CurrentKey.data (), CurrentKey.size ());
157+ Y_ABORT_UNLESS (KeyDefaults ->BasicTypes ().size () == CurrentKey.size ());
158+ return TDbTupleRef (KeyDefaults ->BasicTypes ().data (), CurrentKey.data (), CurrentKey.size ());
152159 }
153160
154161private:
@@ -158,23 +165,22 @@ class TScreenedPartIndexIterator {
158165
159166 ui64 GetCurrentRowId () const {
160167 if (IsValid ()) {
161- return Pos. GetRowId ();
168+ return Groups[ 0 ]-> GetRowId ();
162169 }
163- if (TRowId endRowId = Pos. GetEndRowId (); endRowId != Max<TRowId>()) {
170+ if (TRowId endRowId = Groups[ 0 ]-> GetEndRowId (); endRowId != Max<TRowId>()) {
164171 // This would include the last page rows when known
165172 return endRowId;
166173 }
167174 return LastRowId;
168175 }
169176
170- private:
171- void AddPageSize (TPartDataSize& stats, TPageId pageId, NPage::TGroupId groupId = { }) const {
177+ void AddPageSize (TPartDataSize& stats, TPageId pageId, TGroupId groupId) const {
178+ // TODO: move to IStatsPartGroupIterator
172179 ui64 size = Part->GetPageSize (pageId, groupId);
173180 ui8 channel = Part->GetPageChannel (pageId, groupId);
174181 stats.Add (size, channel);
175182 }
176183
177- private:
178184 void FillKey () {
179185 CurrentKey.clear ();
180186
@@ -183,18 +189,19 @@ class TScreenedPartIndexIterator {
183189
184190 ui32 keyIdx = 0 ;
185191 // Add columns that are present in the part
186- for (;keyIdx < Part->Scheme ->Groups [0 ].KeyTypes .size (); ++keyIdx) {
187- CurrentKey.push_back (Pos.GetRecord ()->Cell (Part->Scheme ->Groups [0 ].ColsKeyIdx [keyIdx]));
192+ if (ui32 keyCellsCount = Groups[0 ]->GetKeyCellsCount ()) {
193+ for (;keyIdx < keyCellsCount; ++keyIdx) {
194+ CurrentKey.push_back (Groups[0 ]->GetKeyCell (keyIdx));
195+ }
188196 }
189197
190198 // Extend with default values if needed
191- for (;keyIdx < KeyColumns ->Defs .size (); ++keyIdx) {
192- CurrentKey.push_back (KeyColumns ->Defs [keyIdx]);
199+ for (;keyIdx < KeyDefaults ->Defs .size (); ++keyIdx) {
200+ CurrentKey.push_back (KeyDefaults ->Defs [keyIdx]);
193201 }
194202 }
195203
196- private:
197- ui64 IncludedRows (TRowId beginRowId, TRowId endRowId) noexcept {
204+ ui64 CountUnscreenedRows (TRowId beginRowId, TRowId endRowId) noexcept {
198205 if (!Screen) {
199206 // Include all rows
200207 return endRowId - beginRowId;
@@ -222,8 +229,7 @@ class TScreenedPartIndexIterator {
222229 return rowCount;
223230 }
224231
225- private:
226- void AddBlobsSize (TPartDataSize& stats, const NPage::TFrames* frames, ELargeObj lob, ui32 &prevPage) noexcept {
232+ void AddBlobsSize (TPartDataSize& stats, const TFrames* frames, ELargeObj lob, ui32 &prevPage) noexcept {
227233 const auto row = GetLastRowId ();
228234 const auto end = GetCurrentRowId ();
229235
@@ -235,35 +241,24 @@ class TScreenedPartIndexIterator {
235241 stats.Add (rel.Size , channel);
236242 ++prevPage;
237243 } else if (!rel.IsHead ()) {
238- Y_ABORT (" Got unaligned NPage:: TFrames head record" );
244+ Y_ABORT (" Got unaligned TFrames head record" );
239245 } else {
240246 break ;
241247 }
242248 }
243249 }
244250
245- private:
246- struct TGroupState {
247- TPartIndexIt Pos;
248- const NPage::TGroupId GroupId;
249-
250- TGroupState (const TPart* part, IPages* env, NPage::TGroupId groupId)
251- : Pos(part, env, groupId)
252- , GroupId(groupId)
253- { }
254- };
255-
256251private:
257252 TIntrusiveConstPtr<TPart> Part;
258- TPartIndexIt Pos;
259- TIntrusiveConstPtr<TKeyCellDefaults> KeyColumns;
253+ TIntrusiveConstPtr<TKeyCellDefaults> KeyDefaults;
260254 TSmallVec<TCell> CurrentKey;
261255 ui64 LastRowId = 0 ;
262- TSmallVec<TGroupState> AltGroups;
263- TSmallVec<TGroupState> HistoryGroups;
256+
257+ TVector<THolder<IStatsPartGroupIterator>> Groups;
258+ TVector<THolder<IStatsPartGroupIterator>> HistoricGroups;
264259 TIntrusiveConstPtr<TScreen> Screen;
265- TIntrusiveConstPtr<NPage:: TFrames> Small; /* Inverted index for small blobs */
266- TIntrusiveConstPtr<NPage:: TFrames> Large; /* Inverted index for large blobs */
260+ TIntrusiveConstPtr<TFrames> Small; /* Inverted index for small blobs */
261+ TIntrusiveConstPtr<TFrames> Large; /* Inverted index for large blobs */
267262 size_t CurrentHoleIdx = 0 ;
268263 TScreen::THole CurrentHole;
269264 ui32 PrevSmallPage = 0 ;
0 commit comments