Skip to content

Commit 3f2835a

Browse files
authored
Merge df01bd9 into e837989
2 parents e837989 + df01bd9 commit 3f2835a

File tree

8 files changed

+110
-44
lines changed

8 files changed

+110
-44
lines changed

ydb/core/tablet_flat/flat_page_btree_index.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <ydb/core/base/defs.h>
4+
#include <ydb/core/scheme/scheme_tablecell.h>
45
#include <util/generic/bitmap.h>
56
#include "flat_page_base.h"
67
#include "flat_page_label.h"
@@ -282,6 +283,23 @@ namespace NKikimr::NTable::NPage {
282283
return Count() > 0;
283284
}
284285

286+
void Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const
287+
{
288+
out << '(';
289+
290+
auto iter = Iter();
291+
for (TPos pos : xrange(iter.Count())) {
292+
if (pos != 0) {
293+
out << ", ";
294+
}
295+
TString value;
296+
DbgPrintValue(value, iter.Next(), keyDefaults.BasicTypes()[pos]);
297+
out << value;
298+
}
299+
300+
out << ')';
301+
}
302+
285303
private:
286304
const TIsNullBitmap* IsNullBitmap;
287305
TColumns Columns;

ydb/core/tablet_flat/flat_part_slice.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace {
1212

1313
void PrintCells(IOutputStream& out, TArrayRef<const TCell> cells, const TCellDefaults& cellDefaults)
1414
{
15-
out << '{';
15+
out << '(';
1616
size_t pos = 0;
1717
for (const TCell& cell : cells) {
1818
if (pos != 0) {
@@ -22,7 +22,7 @@ void PrintCells(IOutputStream& out, TArrayRef<const TCell> cells, const TCellDef
2222
DbgPrintValue(value, cell, cellDefaults.Types[pos++]);
2323
out << value;
2424
}
25-
out << '}';
25+
out << ')';
2626
}
2727

2828
bool ValidateSlices(TConstArrayRef<TSlice> slices) noexcept
@@ -183,7 +183,7 @@ void TSlice::Describe(IOutputStream& out) const
183183
{
184184
out << (FirstInclusive ? '[' : '(');
185185
out << FirstRowId;
186-
out << ',';
186+
out << ", ";
187187
if (LastRowId != Max<TRowId>()) {
188188
out << LastRowId;
189189
} else {
@@ -192,6 +192,15 @@ void TSlice::Describe(IOutputStream& out) const
192192
out << (LastInclusive ? ']' : ')');
193193
}
194194

195+
void TSlice::Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const
196+
{
197+
out << "{rows: ";
198+
Describe(out);
199+
out << " keys: ";
200+
TBounds::Describe(out, keyDefaults);
201+
out << "}";
202+
}
203+
195204
void TSlices::Describe(IOutputStream& out) const
196205
{
197206
bool first = true;
@@ -206,6 +215,20 @@ void TSlices::Describe(IOutputStream& out) const
206215
out << (first ? "}" : " }");
207216
}
208217

218+
void TSlices::Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const
219+
{
220+
bool first = true;
221+
out << "{ ";
222+
for (const auto& bounds : *this) {
223+
if (first)
224+
first = false;
225+
else
226+
out << ", ";
227+
bounds.Describe(out, keyDefaults);
228+
}
229+
out << (first ? "}" : " }");
230+
}
231+
209232
void TSlices::Validate() const
210233
{
211234
TRowId lastEnd = 0;

ydb/core/tablet_flat/flat_part_slice.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ namespace NTable {
133133
}
134134

135135
void Describe(IOutputStream& out) const;
136+
void Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const;
136137

137138
/**
138139
* Returns true if first row of a is less than first row of b
@@ -328,6 +329,8 @@ namespace NTable {
328329

329330
void Describe(IOutputStream& out) const;
330331

332+
void Describe(IOutputStream& out, const TKeyCellDefaults& keyDefaults) const;
333+
331334
/**
332335
* Validate slices are correct, crash otherwise
333336
*/

ydb/core/tablet_flat/flat_part_writer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ namespace NTable {
969969

970970
TPos it;
971971
for (it = 0; it < Key.size(); it++) {
972-
if (int cmp = CompareTypedCells(PrevPageLastKey[it], Key[it], layout.KeyTypes[it])) {
972+
if (CompareTypedCells(PrevPageLastKey[it], Key[it], layout.KeyTypes[it]) != 0) {
973973
break;
974974
}
975975
}

ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class TTableHistogramBuilderBtreeIndex {
4848
{
4949
}
5050

51-
TString ToString() const {
51+
TString ToString(const TKeyCellDefaults &keyDefaults) const {
5252
return TStringBuilder()
5353
<< "Part: " << Part->Label.ToString()
5454
<< " PageId: " << PageId
@@ -57,8 +57,8 @@ class TTableHistogramBuilderBtreeIndex {
5757
<< " EndRowId: " << EndRowId
5858
<< " BeginDataSize: " << BeginDataSize
5959
<< " EndDataSize: " << EndDataSize
60-
<< " BeginKey: " << BeginKey.Count()
61-
<< " EndKey: " << EndKey.Count()
60+
<< " BeginKey: " << NFmt::Do(BeginKey, keyDefaults)
61+
<< " EndKey: " << NFmt::Do(EndKey, keyDefaults)
6262
<< " State: " << (ui32)State;
6363
}
6464

@@ -130,15 +130,17 @@ class TTableHistogramBuilderBtreeIndex {
130130
};
131131

132132
struct TEvent {
133-
TCellsIterable Key;
134-
bool IsBegin;
135133
TNodeState* Node;
134+
bool IsBegin;
136135

137-
TString ToString() const {
136+
TString ToString(const TKeyCellDefaults &keyDefaults) const {
138137
return TStringBuilder()
139-
<< Node->ToString()
140-
<< " IsBegin: " << IsBegin
141-
<< " Key: " << Key.Count();
138+
<< "IsBegin: " << IsBegin
139+
<< " " << Node->ToString(keyDefaults);
140+
}
141+
142+
const TCellsIterable& GetKey() const {
143+
return IsBegin ? Node->BeginKey : Node->EndKey;
142144
}
143145
};
144146

@@ -149,7 +151,7 @@ class TTableHistogramBuilderBtreeIndex {
149151
return Compare(a, b) > 0;
150152
}
151153

152-
i8 Compare(const TEvent& a, const TEvent& b) const {
154+
int Compare(const TEvent& a, const TEvent& b) const {
153155
// events go in order:
154156
// - Key = {}, IsBegin = true
155157
// - ...
@@ -161,13 +163,16 @@ class TTableHistogramBuilderBtreeIndex {
161163
// - ...
162164
// - Key = {}, IsBegin = false
163165

164-
if (a.Key && b.Key) { // compare by keys
165-
auto cmp = CompareKeys(a.Key, b.Key, KeyDefaults);
166+
// end goes before begin in order to
167+
// close previous node before open the next one
168+
169+
if (a.GetKey() && b.GetKey()) { // compare by keys
170+
auto cmp = CompareKeys(a.GetKey(), b.GetKey(), KeyDefaults);
166171
if (cmp != 0) {
167172
return cmp;
168173
}
169174
// keys are the same, compare by begin flag, end events first:
170-
return Compare(a.IsBegin ? 1 : -1, b.IsBegin ? 1 : -1);
175+
return Compare(a.IsBegin ? +1 : -1, b.IsBegin ? +1 : -1);
171176
}
172177

173178
// category = -1 for Key = { }, IsBegin = true
@@ -177,14 +182,14 @@ class TTableHistogramBuilderBtreeIndex {
177182
}
178183

179184
private:
180-
static i8 GetCategory(const TEvent& a) {
181-
if (a.Key) {
185+
static int GetCategory(const TEvent& a) {
186+
if (a.GetKey()) {
182187
return 0;
183188
}
184189
return a.IsBegin ? -1 : +1;
185190
}
186191

187-
static i8 Compare(i8 a, i8 b) {
192+
static int Compare(int a, int b) {
188193
if (a < b) return -1;
189194
if (a > b) return +1;
190195
return 0;
@@ -226,6 +231,9 @@ class TTableHistogramBuilderBtreeIndex {
226231

227232
for (auto index : xrange(Subset.Flatten.size())) {
228233
auto& part = Subset.Flatten[index];
234+
if (part.Slices) {
235+
LOG_BUILD_STATS("slicing part " << part->Label << ": " << NFmt::Do(*part.Slices, KeyDefaults));
236+
}
229237
auto& meta = part->IndexPages.GetBTree({});
230238
TCellsIterable beginKey = EmptyKey;
231239
if (part.Slices && part.Slices->front().FirstKey.GetCells()) {
@@ -235,7 +243,7 @@ class TTableHistogramBuilderBtreeIndex {
235243
if (part.Slices && part.Slices->back().LastKey.GetCells()) {
236244
endKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->back().LastKey);
237245
}
238-
LoadedStateNodes.emplace_back(part.Part.Get(), meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), 0, meta.GetDataSize(), beginKey, endKey);
246+
LoadedStateNodes.emplace_back(part.Part.Get(), meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), 0, meta.GetTotalDataSize(), beginKey, endKey);
239247
ready &= SlicePart(*part.Slices, LoadedStateNodes.back());
240248
}
241249

@@ -261,13 +269,13 @@ class TTableHistogramBuilderBtreeIndex {
261269

262270
if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) {
263271
// skip the node
264-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => skip");
272+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => skip");
265273
return true;
266274
}
267275

268276
if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) {
269277
// take the node
270-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => take");
278+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => take");
271279
AddFutureEvents(node);
272280
return true;
273281
}
@@ -278,17 +286,17 @@ class TTableHistogramBuilderBtreeIndex {
278286
// can't split, decide by node.EndRowId - 1
279287
// TODO: decide by non-empty slice and node intersection, but this requires size calculation changes too
280288
if (it->Has(node.EndRowId - 1)) {
281-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => take root");
289+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => take leaf");
282290
AddFutureEvents(node);
283291
} else {
284-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => skip root");
292+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => skip leaf");
285293
}
286294
return true;
287295
}
288296

289297
bool ready = true;
290298

291-
LOG_BUILD_STATS("slicing node " << node.ToString() << " => split");
299+
LOG_BUILD_STATS("slicing node " << node.ToString(KeyDefaults) << " => split");
292300
const auto addNode = [&](TNodeState& child) {
293301
ready &= SlicePart(slices, child);
294302
};
@@ -341,10 +349,10 @@ class TTableHistogramBuilderBtreeIndex {
341349
<< " openedSortedByRowCount: " << openedSortedByRowCount.size()
342350
<< " openedSortedByDataSize: " << openedSortedByDataSize.size()
343351
<< " FutureEvents: " << FutureEvents.size()
344-
<< " currentKeyPointer: " << currentKeyPointer.ToString());
352+
<< " currentKeyPointer: " << currentKeyPointer.ToString(KeyDefaults));
345353

346354
auto processEvent = [&](const TEvent& event) {
347-
LOG_BUILD_STATS("processing event " << event.ToString());
355+
LOG_BUILD_STATS("processing event " << event.ToString(KeyDefaults));
348356
Y_DEBUG_ABORT_UNLESS(NodeEventKeyGreater.Compare(event, currentKeyPointer) <= 0, "Can't process future events");
349357
if (event.IsBegin) {
350358
if (event.Node->Open(openedRowCount, openedDataSize)) {
@@ -370,7 +378,7 @@ class TTableHistogramBuilderBtreeIndex {
370378
// TODO: skip all closed nodes and don't process them here
371379
// TODO: don't compare each node key and replace it with parentNode.Seek(currentKeyPointer)
372380
auto cmp = NodeEventKeyGreater.Compare(event, currentKeyPointer);
373-
LOG_BUILD_STATS("adding event " << (i32)cmp << " " << event.ToString());
381+
LOG_BUILD_STATS("adding event " << (i32)cmp << " " << event.ToString(KeyDefaults));
374382
if (cmp <= 0) { // event happened
375383
processEvent(event);
376384
if (cmp == 0) {
@@ -381,8 +389,8 @@ class TTableHistogramBuilderBtreeIndex {
381389
}
382390
};
383391
const auto addNode = [&](TNodeState& node) {
384-
addEvent(TEvent{node.BeginKey, true, &node});
385-
addEvent(TEvent{node.EndKey, false, &node});
392+
addEvent(TEvent{&node, true});
393+
addEvent(TEvent{&node, false});
386394
};
387395

388396
// may safely skip current key pointer and go further only if at the next iteration
@@ -395,7 +403,7 @@ class TTableHistogramBuilderBtreeIndex {
395403
openedSortedByRowCount.pop();
396404

397405
LOG_BUILD_STATS("loading node by row count trigger"
398-
<< node->ToString()
406+
<< node->ToString(KeyDefaults)
399407
<< " closedRowCount: " << closedRowCount
400408
<< " openedRowCount: " << openedRowCount
401409
<< " nextHistogramRowCount: " << nextHistogramRowCount);
@@ -413,7 +421,7 @@ class TTableHistogramBuilderBtreeIndex {
413421
openedSortedByDataSize.pop();
414422

415423
LOG_BUILD_STATS("loading node by data size trigger"
416-
<< node->ToString()
424+
<< node->ToString(KeyDefaults)
417425
<< " closedDataSize: " << closedDataSize
418426
<< " openedDataSize: " << openedDataSize
419427
<< " nextHistogramDataSize: " << nextHistogramDataSize);
@@ -439,7 +447,7 @@ class TTableHistogramBuilderBtreeIndex {
439447
<< " openedSortedByRowCount: " << openedSortedByRowCount.size()
440448
<< " openedSortedByDataSize: " << openedSortedByDataSize.size()
441449
<< " FutureEvents: " << FutureEvents.size()
442-
<< " currentKeyPointer: " << currentKeyPointer.ToString());
450+
<< " currentKeyPointer: " << currentKeyPointer.ToString(KeyDefaults));
443451

444452
// add current key pointer to a histogram if we either:
445453
// - failed to split opened nodes and may exceed a next histogram bucket value (plus its gaps)
@@ -449,7 +457,7 @@ class TTableHistogramBuilderBtreeIndex {
449457
// - minus size of all nodes that start at current key pointer
450458
// - plus half of size of all ohter opened nodes (as they exact position is unknown)
451459
// also check that current key pointer value is > then last presented value in a histogram
452-
if (currentKeyPointer.Key) {
460+
if (currentKeyPointer.GetKey()) {
453461
if (nextHistogramRowCount != Max<ui64>()) {
454462
if (closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap || closedRowCount > nextHistogramRowCount - RowCountResolutionGap) {
455463
ui64 currentKeyRowCountOpens = 0;
@@ -461,7 +469,7 @@ class TTableHistogramBuilderBtreeIndex {
461469
Y_ABORT_UNLESS(currentKeyRowCountOpens <= openedRowCount);
462470
ui64 currentKeyPointerRowCount = closedRowCount + (openedRowCount - currentKeyRowCountOpens) / 2;
463471
if ((stats.RowCountHistogram.empty() ? 0 : stats.RowCountHistogram.back().Value) < currentKeyPointerRowCount && currentKeyPointerRowCount < stats.RowCount) {
464-
AddKey(stats.RowCountHistogram, currentKeyPointer.Key, currentKeyPointerRowCount);
472+
AddKey(stats.RowCountHistogram, currentKeyPointer.GetKey(), currentKeyPointerRowCount);
465473
nextHistogramRowCount = Max(currentKeyPointerRowCount + 1, nextHistogramRowCount + RowCountResolution);
466474
if (nextHistogramRowCount + RowCountResolutionGap > stats.RowCount) {
467475
nextHistogramRowCount = Max<ui64>();
@@ -480,7 +488,7 @@ class TTableHistogramBuilderBtreeIndex {
480488
Y_ABORT_UNLESS(currentKeyDataSizeOpens <= openedDataSize);
481489
ui64 currentKeyPointerDataSize = closedDataSize + (openedDataSize - currentKeyDataSizeOpens) / 2;
482490
if ((stats.DataSizeHistogram.empty() ? 0 : stats.DataSizeHistogram.back().Value) < currentKeyPointerDataSize && currentKeyPointerDataSize < stats.DataSize.Size) {
483-
AddKey(stats.DataSizeHistogram, currentKeyPointer.Key, currentKeyPointerDataSize);
491+
AddKey(stats.DataSizeHistogram, currentKeyPointer.GetKey(), currentKeyPointerDataSize);
484492
nextHistogramDataSize = Max(currentKeyPointerDataSize + 1, nextHistogramDataSize + DataSizeResolution);
485493
if (nextHistogramDataSize + DataSizeResolutionGap > stats.DataSize.Size) {
486494
nextHistogramDataSize = Max<ui64>();
@@ -507,7 +515,7 @@ class TTableHistogramBuilderBtreeIndex {
507515
return true;
508516
}
509517

510-
void AddKey(THistogram& histogram, TCellsIterable& key, ui64 value) {
518+
void AddKey(THistogram& histogram, const TCellsIterable& key, ui64 value) {
511519
TVector<TCell> keyCells;
512520

513521
// add columns that are present in the part:
@@ -555,8 +563,14 @@ class TTableHistogramBuilderBtreeIndex {
555563
}
556564

557565
void AddFutureEvents(TNodeState& node) {
558-
FutureEvents.push(TEvent{node.BeginKey, true, &node});
559-
FutureEvents.push(TEvent{node.EndKey, false, &node});
566+
auto cmp = NodeEventKeyGreater.Compare(TEvent{&node, true}, TEvent{&node, false});
567+
LOG_BUILD_STATS("adding node future events " << (i32)cmp << " " << node.ToString(KeyDefaults));
568+
if (node.GetRowCount() > 1 && cmp >= 0) {
569+
Y_DEBUG_ABORT_UNLESS(cmp < 0);
570+
}
571+
572+
FutureEvents.push(TEvent{&node, true});
573+
FutureEvents.push(TEvent{&node, false});
560574
}
561575

562576
private:

ydb/core/tablet_flat/ut/ut_stat.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) {
461461
for (auto &part : subset.Flatten) {
462462
TTestEnv env;
463463
auto index = CreateIndexIter(part.Part.Get(), &env, {});
464-
Cerr << " " << index->GetEndRowId() << " rows, "
464+
Cerr << " " << part->Label << " " << index->GetEndRowId() << " rows, "
465465
<< IndexTools::CountMainPages(*part.Part) << " pages, "
466466
<< (part->IndexPages.HasBTree() ? part->IndexPages.GetBTree({}).LevelCount : -1) << " levels: ";
467467
for (ui32 sample : xrange(1u, samples + 1)) {
@@ -477,7 +477,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) {
477477
}
478478
Cerr << ") ";
479479
}
480-
// Cerr << DumpPart(*part.As<TPartStore>(), 2) << Endl;
480+
Cerr << DumpPart(*part.As<TPartStore>(), 2) << Endl;
481481
Cerr << Endl;
482482
}
483483
}

0 commit comments

Comments
 (0)