Skip to content

Commit 63b0998

Browse files
batch portions on write (#13182)
1 parent d591f19 commit 63b0998

File tree

31 files changed

+862
-209
lines changed

31 files changed

+862
-209
lines changed

ydb/core/formats/arrow/reader/position.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,33 @@ class TIntervalPositions {
467467
public:
468468
using const_iterator = std::vector<TIntervalPosition>::const_iterator;
469469

470+
void Merge(const TIntervalPositions& from) {
471+
auto itSelf = Positions.begin();
472+
auto itFrom = from.Positions.begin();
473+
while (itSelf != Positions.end() && itFrom != from.Positions.end()) {
474+
if (*itSelf < *itFrom) {
475+
Positions.emplace_back(*itSelf);
476+
++itSelf;
477+
} else if (*itFrom < *itSelf) {
478+
Positions.emplace_back(*itFrom);
479+
++itFrom;
480+
} else {
481+
Positions.emplace_back(*itFrom);
482+
++itSelf;
483+
++itFrom;
484+
}
485+
}
486+
if (itSelf == Positions.end()) {
487+
Positions.insert(Positions.end(), itFrom, from.Positions.end());
488+
} else {
489+
Positions.insert(Positions.end(), itSelf, Positions.end());
490+
}
491+
}
492+
493+
ui32 GetPointsCount() const {
494+
return Positions.size();
495+
}
496+
470497
bool IsEmpty() const {
471498
return Positions.empty();
472499
}

ydb/core/kqp/ut/olap/helpers/typed_local.cpp

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
#include "typed_local.h"
2-
#include "query_executor.h"
31
#include "get_value.h"
2+
#include "query_executor.h"
3+
#include "typed_local.h"
4+
5+
#include <ydb/core/formats/arrow/arrow_helpers.h>
6+
#include <ydb/core/grpc_services/base/base.h>
7+
#include <ydb/core/grpc_services/local_rpc/local_rpc.h>
8+
9+
#include <ydb/public/api/protos/ydb_table.pb.h>
410

511
namespace NKikimr::NKqp {
612

@@ -37,8 +43,8 @@ TString TTypedLocalHelper::GetMultiColumnTestTableSchema(ui32 reps) const {
3743
return result;
3844
}
3945

40-
void TTypedLocalHelper::CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount, ui32 tableShardsCount) {
41-
CreateSchemaOlapTablesWithStore(GetMultiColumnTestTableSchema(reps), {TableName}, "olapStore", storeShardsCount, tableShardsCount);
46+
void TTypedLocalHelper::CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount, ui32 tableShardsCount) {
47+
CreateSchemaOlapTablesWithStore(GetMultiColumnTestTableSchema(reps), { TableName }, "olapStore", storeShardsCount, tableShardsCount);
4248
}
4349

4450
void TTypedLocalHelper::ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus /*= EStatus::SUCCESS*/) const {
@@ -69,7 +75,8 @@ void TTypedLocalHelper::PrintCount() {
6975
}
7076

7177
NKikimr::NKqp::TTypedLocalHelper::TDistribution TTypedLocalHelper::GetDistribution(const bool verbose /*= false*/) {
72-
const TString selectQuery = "PRAGMA Kikimr.OptUseFinalizeByKey='true';SELECT COUNT(*) as c, field FROM `" + TablePath + "` GROUP BY field ORDER BY field";
78+
const TString selectQuery =
79+
"PRAGMA Kikimr.OptUseFinalizeByKey='true';SELECT COUNT(*) as c, field FROM `" + TablePath + "` GROUP BY field ORDER BY field";
7380

7481
auto tableClient = KikimrRunner.GetTableClient();
7582
auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose);
@@ -101,7 +108,8 @@ NKikimr::NKqp::TTypedLocalHelper::TDistribution TTypedLocalHelper::GetDistributi
101108
return TDistribution(count, *minCount, *maxCount, groups.size());
102109
}
103110

104-
void TTypedLocalHelper::GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose /*= false*/, const std::vector<TString> columnNames /*= {}*/) {
111+
void TTypedLocalHelper::GetVolumes(
112+
ui64& rawBytes, ui64& bytes, const bool verbose /*= false*/, const std::vector<TString> columnNames /*= {}*/) {
105113
TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity == 1";
106114
if (columnNames.size()) {
107115
selectQuery += " AND EntityName IN ('" + JoinSeq("','", columnNames) + "')";
@@ -160,7 +168,9 @@ void TTypedLocalHelper::GetCount(ui64& count) {
160168

161169
void TTypedLocalHelper::FillPKOnly(const double pkKff /*= 0*/, const ui32 numRows /*= 800000*/) const {
162170
std::vector<NArrow::NConstruction::IArrayBuilder::TPtr> builders;
163-
builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TIntSeqFiller<arrow::Int64Type>>::BuildNotNullable("pk_int", numRows * pkKff));
171+
builders.emplace_back(
172+
NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TIntSeqFiller<arrow::Int64Type>>::BuildNotNullable(
173+
"pk_int", numRows * pkKff));
164174
NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders);
165175
std::shared_ptr<arrow::RecordBatch> batch = batchBuilder.BuildBatch(numRows);
166176
TBase::SendDataViaActorSystem(TablePath, batch);
@@ -181,4 +191,47 @@ void TTypedLocalHelper::GetStats(std::vector<NJson::TJsonValue>& stats, const bo
181191
}
182192
}
183193

184-
}
194+
void TTypedLocalHelper::TWritingGuard::SendDataViaActorSystem(TString testTable, std::shared_ptr<arrow::RecordBatch> batch,
195+
const Ydb::StatusIds_StatusCode expectedStatus /*= = Ydb::StatusIds::SUCCESS*/) const {
196+
auto* runtime = KikimrRunner.GetTestServer().GetRuntime();
197+
198+
UNIT_ASSERT(batch);
199+
UNIT_ASSERT(batch->num_rows());
200+
auto data = NArrow::SerializeBatchNoCompression(batch);
201+
UNIT_ASSERT(!data.empty());
202+
TString serializedSchema = NArrow::SerializeSchema(*batch->schema());
203+
UNIT_ASSERT(serializedSchema);
204+
205+
Ydb::Table::BulkUpsertRequest request;
206+
request.mutable_arrow_batch_settings()->set_schema(serializedSchema);
207+
request.set_data(data);
208+
request.set_table(testTable);
209+
210+
using TEvBulkUpsertRequest = NGRpcService::TGrpcRequestOperationCall<Ydb::Table::BulkUpsertRequest, Ydb::Table::BulkUpsertResponse>;
211+
auto future = NRpcService::DoLocalRpc<TEvBulkUpsertRequest>(std::move(request), "", "", runtime->GetActorSystem(0));
212+
Responses.fetch_add(1);
213+
auto* responsesLocal = &Responses;
214+
future.Subscribe([responsesLocal, expectedStatus](const NThreading::TFuture<Ydb::Table::BulkUpsertResponse> f) mutable {
215+
responsesLocal->fetch_add(-1);
216+
auto op = f.GetValueSync().operation();
217+
if (op.status() != Ydb::StatusIds::SUCCESS) {
218+
for (auto& issue : op.issues()) {
219+
Cerr << issue.message() << " ";
220+
}
221+
Cerr << "\n";
222+
}
223+
UNIT_ASSERT_VALUES_EQUAL(op.status(), expectedStatus);
224+
});
225+
}
226+
227+
void TTypedLocalHelper::TWritingGuard::WaitWritings() {
228+
auto* runtime = KikimrRunner.GetTestServer().GetRuntime();
229+
TDispatchOptions options;
230+
options.CustomFinalCondition = [&]() {
231+
return Responses.load() == 0;
232+
};
233+
234+
runtime->DispatchEvents(options);
235+
}
236+
237+
} // namespace NKikimr::NKqp

ydb/core/kqp/ut/olap/helpers/typed_local.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,44 @@ class TTypedLocalHelper: public Tests::NCS::THelper {
3737
SetShardingMethod("HASH_FUNCTION_CONSISTENCY_64");
3838
}
3939

40+
class TWritingGuard {
41+
private:
42+
TKikimrRunner& KikimrRunner;
43+
const TString TablePath;
44+
mutable std::atomic<size_t> Responses = 0;
45+
void SendDataViaActorSystem(TString testTable, std::shared_ptr<arrow::RecordBatch> batch,
46+
const Ydb::StatusIds_StatusCode expectedStatus = Ydb::StatusIds::SUCCESS) const;
47+
48+
void WaitWritings();
49+
50+
public:
51+
TWritingGuard(TKikimrRunner& kikimrRunner, const TString& tablePath)
52+
: KikimrRunner(kikimrRunner)
53+
, TablePath(tablePath)
54+
{
55+
}
56+
57+
template <class TFiller>
58+
void FillTable(const TFiller& fillPolicy, const double pkKff = 0, const ui32 numRows = 800000) const {
59+
std::vector<NArrow::NConstruction::IArrayBuilder::TPtr> builders;
60+
builders.emplace_back(
61+
NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TIntSeqFiller<arrow::Int64Type>>::BuildNotNullable(
62+
"pk_int", numRows * pkKff));
63+
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<TFiller>>("field", fillPolicy));
64+
NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders);
65+
std::shared_ptr<arrow::RecordBatch> batch = batchBuilder.BuildBatch(numRows);
66+
SendDataViaActorSystem(TablePath, batch, Ydb::StatusIds::SUCCESS);
67+
}
68+
69+
void Finalize() {
70+
WaitWritings();
71+
}
72+
};
73+
74+
TWritingGuard StartWriting(const TString& tablePath) {
75+
return TWritingGuard(KikimrRunner, tablePath);
76+
}
77+
4078
void ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus = NYdb::EStatus::SUCCESS) const;
4179

4280
TString GetQueryResult(const TString& request) const;

ydb/core/kqp/ut/olap/write_ut.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,40 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) {
194194
UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("count")), 800000);
195195
}
196196

197+
Y_UNIT_TEST(MultiWriteInTime) {
198+
auto settings = TKikimrSettings().SetWithSampleTables(false);
199+
settings.AppConfig.MutableColumnShardConfig()->SetWritingBufferDurationMs(15000);
200+
TKikimrRunner kikimr(settings);
201+
Tests::NCommon::TLoggerInit(kikimr).Initialize();
202+
TTypedLocalHelper helper("Utf8", kikimr);
203+
helper.CreateTestOlapTable();
204+
auto writeGuard = helper.StartWriting("/Root/olapStore/olapTable");
205+
writeGuard.FillTable(NArrow::NConstruction::TStringPoolFiller(1, 1, "aaa", 1), 0, 800000);
206+
Sleep(TDuration::Seconds(1));
207+
writeGuard.FillTable(NArrow::NConstruction::TStringPoolFiller(1, 1, "bbb", 1), 0.5, 800000);
208+
Sleep(TDuration::Seconds(1));
209+
writeGuard.FillTable(NArrow::NConstruction::TStringPoolFiller(1, 1, "ccc", 1), 0.75, 800000);
210+
Sleep(TDuration::Seconds(1));
211+
writeGuard.Finalize();
212+
213+
auto selectQuery = TString(R"(
214+
SELECT
215+
field, count(*) as count,
216+
FROM `/Root/olapStore/olapTable`
217+
GROUP BY field
218+
ORDER BY field
219+
)");
220+
221+
auto tableClient = kikimr.GetTableClient();
222+
auto rows = ExecuteScanQuery(tableClient, selectQuery);
223+
UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("count")), 400000);
224+
UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("field")), "aaa");
225+
UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("count")), 200000);
226+
UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[1].at("field")), "bbb");
227+
UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("count")), 800000);
228+
UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("field")), "ccc");
229+
}
230+
197231
Y_UNIT_TEST(WriteDeleteCleanGC) {
198232
auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard<NKikimr::NOlap::TWaitCompactionController>();
199233
csController->SetSmallSizeDetector(1000000);

ydb/core/protos/config.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,6 +1789,7 @@ message TColumnShardConfig {
17891789
optional uint32 RestoreDataOnWriteTimeoutSeconds = 30;
17901790
optional bool UseSlicesFilter = 31 [default = true];
17911791
optional uint32 LimitForPortionsMetadataAsk = 32 [default = 1000];
1792+
optional uint64 WritingBufferVolumeMb = 33 [default = 32];
17921793
}
17931794

17941795
message TSchemeShardConfig {

0 commit comments

Comments
 (0)