Skip to content

Commit 03cfdfd

Browse files
fix blob range construction and index control (#13131)
1 parent 0fe447b commit 03cfdfd

File tree

23 files changed

+169
-48
lines changed

23 files changed

+169
-48
lines changed

ydb/core/kqp/ut/olap/indexes_ut.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) {
2323
csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1));
2424
csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30);
2525
csController->SetOverrideMemoryLimitForPortionReading(1e+10);
26+
csController->SetOverrideBlobSplitSettings(NOlap::NSplitter::TSplitSettings());
2627

2728
TLocalHelper(kikimr).CreateTestOlapTable();
2829
auto tableClient = kikimr.GetTableClient();
@@ -103,6 +104,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) {
103104
csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1));
104105
csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1));
105106
csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30);
107+
csController->SetOverrideBlobSplitSettings(NOlap::NSplitter::TSplitSettings());
106108

107109
TLocalHelper(kikimr).CreateTestOlapTableWithoutStore();
108110
auto tableClient = kikimr.GetTableClient();
@@ -347,6 +349,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) {
347349
auto csController = NYDBTest::TControllers::RegisterCSControllerGuard<NOlap::TWaitCompactionController>();
348350
csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30);
349351
csController->SetOverrideMemoryLimitForPortionReading(1e+10);
352+
csController->SetOverrideBlobSplitSettings(NOlap::NSplitter::TSplitSettings());
350353
TLocalHelper(*Kikimr).CreateTestOlapTable();
351354
auto tableClient = Kikimr->GetTableClient();
352355

@@ -367,7 +370,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) {
367370
auto alterQuery =
368371
TStringBuilder() << Sprintf(
369372
R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_ngramm_uid, TYPE=BLOOM_NGRAMM_FILTER,
370-
FEATURES=`{"column_name" : "resource_id", "ngramm_size" : 3, "hashes_count" : 2, "filter_size_bytes" : 64024}`);
373+
FEATURES=`{"column_name" : "resource_id", "ngramm_size" : 3, "hashes_count" : 2, "filter_size_bytes" : 512, "records_count" : 1024}`);
371374
)",
372375
StorageId.data());
373376
auto session = tableClient.CreateSession().GetValueSync().GetSession();

ydb/core/kqp/ut/olap/sys_view_ut.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) {
115115
ui64 bytesPK1;
116116
{
117117
auto csController = NYDBTest::TControllers::RegisterCSControllerGuard<NOlap::TWaitCompactionController>();
118+
csController->SetOverrideBlobSplitSettings(NOlap::NSplitter::TSplitSettings());
118119
auto settings = TKikimrSettings()
119120
.SetWithSampleTables(false);
120121
TKikimrRunner kikimr(settings);
@@ -127,6 +128,7 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) {
127128
}
128129

129130
auto csController = NYDBTest::TControllers::RegisterCSControllerGuard<NOlap::TWaitCompactionController>();
131+
csController->SetOverrideBlobSplitSettings(NOlap::NSplitter::TSplitSettings());
130132
ui64 rawBytesUnpack1PK = 0;
131133
ui64 bytesUnpack1PK = 0;
132134
ui64 rawBytesPackAndUnpack2PK;

ydb/core/protos/flat_scheme_op.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ message TRequestedBloomNGrammFilter {
396396
optional uint32 FilterSizeBytes = 2;
397397
optional uint32 HashesCount = 3;
398398
optional string ColumnName = 4;
399+
optional uint32 RecordsCount = 5;
399400
}
400401

401402
message TRequestedMaxIndex {
@@ -432,6 +433,7 @@ message TBloomNGrammFilter {
432433
optional uint32 FilterSizeBytes = 2;
433434
optional uint32 HashesCount = 3;
434435
optional uint32 ColumnId = 4;
436+
optional uint32 RecordsCount = 5;
435437
}
436438

437439
message TMaxIndex {

ydb/core/tx/columnshard/blobs_action/abstract/storage.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "storage.h"
22

3+
#include <ydb/core/tx/columnshard/hooks/abstract/abstract.h>
4+
35
namespace NKikimr::NOlap {
46

57
bool TCommonBlobsTracker::IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const {
@@ -42,4 +44,8 @@ void IBlobsStorageOperator::Stop() {
4244
Stopped = true;
4345
}
4446

47+
const NSplitter::TSplitSettings& IBlobsStorageOperator::GetBlobSplitSettings() const {
48+
return NYDBTest::TControllers::GetColumnShardController()->GetBlobSplitSettings(DoGetBlobSplitSettings());
4549
}
50+
51+
} // namespace NKikimr::NOlap

ydb/core/tx/columnshard/blobs_action/abstract/storage.h

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,28 @@
11
#pragma once
2+
#include "gc.h"
3+
#include "read.h"
24
#include "remove.h"
35
#include "write.h"
4-
#include "read.h"
5-
#include "gc.h"
66

77
#include <ydb/core/tx/columnshard/blobs_action/blob_manager_db.h>
8-
#include <ydb/core/tx/columnshard/blobs_action/counters/storage.h>
98
#include <ydb/core/tx/columnshard/blobs_action/counters/remove_gc.h>
9+
#include <ydb/core/tx/columnshard/blobs_action/counters/storage.h>
1010
#include <ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.h>
11+
#include <ydb/core/tx/tiering/abstract/manager.h>
1112

1213
#include <ydb/library/accessor/accessor.h>
13-
#include <ydb/core/tx/tiering/abstract/manager.h>
1414

1515
namespace NKikimr::NOlap {
1616

1717
class TCommonBlobsTracker: public IBlobInUseTracker {
1818
private:
1919
// List of blobs that are used by in-flight requests
2020
THashMap<TUnifiedBlobId, i64> BlobsUseCount;
21+
2122
protected:
2223
virtual bool DoUseBlob(const TUnifiedBlobId& blobId) override;
2324
virtual bool DoFreeBlob(const TUnifiedBlobId& blobId) override;
25+
2426
public:
2527
virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const override;
2628
virtual void OnBlobFree(const TUnifiedBlobId& blobId) = 0;
@@ -34,8 +36,10 @@ class IBlobsStorageOperator {
3436
YDB_READONLY(bool, Stopped, false);
3537
std::shared_ptr<NBlobOperations::TStorageCounters> Counters;
3638
YDB_ACCESSOR_DEF(std::shared_ptr<NDataSharing::TStorageSharedBlobsManager>, SharedBlobs);
39+
3740
protected:
38-
virtual std::shared_ptr<IBlobsDeclareRemovingAction> DoStartDeclareRemovingAction(const std::shared_ptr<NBlobOperations::TRemoveDeclareCounters>& counters) = 0;
41+
virtual std::shared_ptr<IBlobsDeclareRemovingAction> DoStartDeclareRemovingAction(
42+
const std::shared_ptr<NBlobOperations::TRemoveDeclareCounters>& counters) = 0;
3943
virtual std::shared_ptr<IBlobsWritingAction> DoStartWritingAction() = 0;
4044
virtual std::shared_ptr<IBlobsReadingAction> DoStartReadingAction() = 0;
4145
virtual bool DoLoad(IBlobManagerDb& dbBlobs) = 0;
@@ -67,16 +71,13 @@ class IBlobsStorageOperator {
6771
IBlobsStorageOperator(const TString& storageId, const std::shared_ptr<NDataSharing::TStorageSharedBlobsManager>& sharedBlobs)
6872
: SelfTabletId(sharedBlobs->GetSelfTabletId())
6973
, StorageId(storageId)
70-
, SharedBlobs(sharedBlobs)
71-
{
74+
, SharedBlobs(sharedBlobs) {
7275
Counters = std::make_shared<NBlobOperations::TStorageCounters>(storageId);
7376
}
7477

7578
void Stop();
7679

77-
const NSplitter::TSplitSettings& GetBlobSplitSettings() const {
78-
return DoGetBlobSplitSettings();
79-
}
80+
const NSplitter::TSplitSettings& GetBlobSplitSettings() const;
8081

8182
virtual TTabletsByBlob GetBlobsToDelete() const = 0;
8283
virtual bool HasToDelete(const TUnifiedBlobId& blobId, const TTabletId initiatorTabletId) const = 0;
@@ -120,7 +121,8 @@ class IBlobsStorageOperator {
120121
}
121122

122123
[[nodiscard]] std::shared_ptr<IBlobsGCAction> CreateGC() {
123-
NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("storage_id", GetStorageId())("tablet_id", GetSelfTabletId());
124+
NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)(
125+
"storage_id", GetStorageId())("tablet_id", GetSelfTabletId());
124126
if (CurrentGCAction && CurrentGCAction->IsInProgress()) {
125127
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_BLOBS)("event", "gc_in_progress");
126128
return nullptr;
@@ -137,4 +139,4 @@ class IBlobsStorageOperator {
137139
virtual bool IsReady() const = 0;
138140
};
139141

140-
}
142+
} // namespace NKikimr::NOlap

ydb/core/tx/columnshard/blobs_action/local/storage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,4 @@ class TOperator: public IBlobsStorageOperator {
5353
}
5454
};
5555

56-
}
56+
} // namespace NKikimr::NOlap::NBlobOperations::NLocal

ydb/core/tx/columnshard/engines/scheme/index_info.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,8 @@ NKikimr::TConclusionStatus TIndexInfo::AppendIndex(const THashMap<ui32, std::vec
419419
std::shared_ptr<IPortionDataChunk> chunk = index->BuildIndex(originalData, recordsCount, *this);
420420
auto opStorage = operators->GetOperatorVerified(index->GetStorageId());
421421
if ((i64)chunk->GetPackedSize() > opStorage->GetBlobSplitSettings().GetMaxBlobSize()) {
422-
return TConclusionStatus::Fail("blob size for secondary data (" + ::ToString(indexId) + ") bigger than limit (" +
422+
return TConclusionStatus::Fail("blob size for secondary data (" + ::ToString(indexId) + ":" + ::ToString(chunk->GetPackedSize()) + ":" +
423+
::ToString(recordsCount) + ") bigger than limit (" +
423424
::ToString(opStorage->GetBlobSplitSettings().GetMaxBlobSize()) + ")");
424425
}
425426
if (index->GetStorageId() == IStoragesManager::LocalMetadataStorageId) {

ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
#include <ydb/core/formats/arrow/accessor/plain/accessor.h>
44
#include <ydb/core/formats/arrow/arrow_helpers.h>
55
#include <ydb/core/formats/arrow/serializer/native.h>
6-
#include <ydb/core/tx/columnshard/engines/scheme/index_info.h>
76
#include <ydb/core/tx/columnshard/engines/portions/write_with_blobs.h>
7+
#include <ydb/core/tx/columnshard/engines/scheme/index_info.h>
88
#include <ydb/core/tx/columnshard/engines/storage/chunks/column.h>
9+
#include <ydb/core/tx/columnshard/hooks/abstract/abstract.h>
910
#include <ydb/core/tx/columnshard/splitter/batch_slice.h>
1011

1112
#include <ydb/library/formats/arrow/simple_arrays_cache.h>
@@ -60,7 +61,7 @@ TConclusion<std::shared_ptr<NArrow::TGeneralContainer>> ISnapshotSchema::Normali
6061
}
6162
if (restoreColumnIds.contains(columnId)) {
6263
AFL_VERIFY(!!GetExternalDefaultValueVerified(columnId) || GetIndexInfo().IsNullableVerified(columnId))("column_name",
63-
GetIndexInfo().GetColumnName(columnId, false))("id", columnId);
64+
GetIndexInfo().GetColumnName(columnId, false))("id", columnId);
6465
result->AddField(resultField, GetColumnLoaderVerified(columnId)->BuildDefaultAccessor(batch->num_rows())).Validate();
6566
}
6667
}
@@ -324,7 +325,8 @@ TConclusion<TWritePortionInfoWithBlobsResult> ISnapshotSchema::PrepareForWrite(c
324325

325326
TGeneralSerializedSlice slice(chunks, schemaDetails, splitterCounters);
326327
std::vector<TSplittedBlob> blobs;
327-
if (!slice.GroupBlobs(blobs, NSplitter::TEntityGroups(NSplitter::TSplitSettings(), NBlobOperations::TGlobal::DefaultStorageId))) {
328+
if (!slice.GroupBlobs(blobs, NSplitter::TEntityGroups(NYDBTest::TControllers::GetColumnShardController()->GetBlobSplitSettings(),
329+
NBlobOperations::TGlobal::DefaultStorageId))) {
328330
return TConclusionStatus::Fail("cannot split data for appropriate blobs size");
329331
}
330332
auto constructor =

ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,32 @@ class TFixStringBitsStorage {
3838
return (bytesCount + ((bitsCount % 8) ? 1 : 0)) * 8;
3939
}
4040

41+
TString DebugString() const {
42+
TStringBuilder sb;
43+
ui32 count1 = 0;
44+
ui32 count0 = 0;
45+
for (ui32 i = 0; i < GetSizeBits(); ++i) {
46+
if (Get(i)) {
47+
// sb << 1 << " ";
48+
++count1;
49+
} else {
50+
// sb << 0 << " ";
51+
++count0;
52+
}
53+
// if (i % 20 == 0) {
54+
// sb << i << " ";
55+
// }
56+
}
57+
sb << GetSizeBits() << "=" << count0 << "[0]+" << count1 << "[1]";
58+
return sb;
59+
}
60+
4161
template <class TBitsVector>
4262
TFixStringBitsStorage(const TBitsVector& bitsVector)
4363
: TFixStringBitsStorage(TSizeDetector<TBitsVector>::GetSize(bitsVector)) {
4464
ui32 byteIdx = 0;
4565
ui8 byteCurrent = 0;
46-
ui8 shiftCurrent = 0;
66+
ui8 shiftCurrent = 1;
4767
for (ui32 i = 0; i < TSizeDetector<TBitsVector>::GetSize(bitsVector); ++i) {
4868
if (i && i % 8 == 0) {
4969
Data[byteIdx] = (char)byteCurrent;

ydb/core/tx/columnshard/engines/storage/indexes/bloom_ngramm/checker.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,15 @@ bool TFilterChecker::DoCheckImpl(const std::vector<TString>& blobs) const {
2121
for (auto&& blob : blobs) {
2222
TFixStringBitsStorage bits(blob);
2323
bool found = true;
24+
TStringBuilder sb;
2425
for (auto&& i : HashValues) {
26+
sb << i % bits.GetSizeBits() << ",";
2527
if (!bits.Get(i % bits.GetSizeBits())) {
2628
found = false;
2729
break;
2830
}
2931
}
32+
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("size", bits.GetSizeBits())("found", found)("hashes", sb)("details", bits.DebugString());
3033
if (found) {
3134
// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("size", bArray.length())("data", bArray.ToString())("index_id", GetIndexId());
3235
return true;

0 commit comments

Comments
 (0)