Skip to content

Commit a0b1351

Browse files
iddqdexpnv1
andauthored
change partitioning in tpch (#17416)
Co-authored-by: Nikolay Perfilov <pnv1@yandex-team.ru>
1 parent 68c43ae commit a0b1351

File tree

16 files changed

+828
-14
lines changed

16 files changed

+828
-14
lines changed

ydb/apps/ydb/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
* Added `--scale` option to `ydb workload tpch init` and `ydb workload tpcds init` commands. Sets the percentage of the benchmark's data size and workload to use, relative to full scale.
12
* Added "--no-discovery" option. It allows to skip discovery and use user provided endpoint to connect to YDB cluster.
23
* Added `--retries` to `ydb workload <clickbenh|tpch|tpcds> run` command.
34
* Added `--partition-size` param to `ydb workload <clickbench/tpcds/tpch> init`.

ydb/library/workload/benchmark_base/workload.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,22 @@ const TString TWorkloadGeneratorBase::CsvFormatString = [] () {
3535
return settings.SerializeAsString();
3636
} ();
3737

38+
namespace {
39+
40+
TString KeysList(const NJson::TJsonValue& table, const TString& key) {
41+
TVector<TStringBuf> keysV;
42+
for (const auto& k: table[key].GetArray()) {
43+
keysV.emplace_back(k.GetString());
44+
}
45+
return JoinSeq(", ", keysV);
46+
}
47+
48+
}
49+
50+
ui32 TWorkloadGeneratorBase::GetDefaultPartitionsCount(const TString& /*tableName*/) const {
51+
return 64;
52+
}
53+
3854
void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const {
3955
auto specialTypes = GetSpecialDataTypes();
4056
specialTypes["string_type"] = Params.GetStringType();
@@ -65,11 +81,7 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ
6581
}
6682
}
6783
result << JoinSeq(",\n", columns);
68-
TVector<TStringBuf> keysV;
69-
for (const auto& k: table["primary_key"].GetArray()) {
70-
keysV.emplace_back(k.GetString());
71-
}
72-
const TString keys = JoinSeq(", ", keysV);
84+
const auto keys = KeysList(table, "primary_key");
7385
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
7486
result << Endl;
7587
} else {
@@ -78,7 +90,7 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ
7890
result << ")" << Endl;
7991

8092
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) {
81-
result << "PARTITION BY HASH (" << keys << ")" << Endl;
93+
result << "PARTITION BY HASH (" << (table.Has("partition_by") ? KeysList(table, "partition_by") : keys) << ")" << Endl;
8294
}
8395

8496
result << "WITH (" << Endl;
@@ -89,12 +101,12 @@ void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJ
89101
break;
90102
case TWorkloadBaseParams::EStoreType::Column:
91103
result << " STORE = COLUMN," << Endl;
92-
result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl;
104+
result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(GetDefaultPartitionsCount(tableName)) << Endl;
93105
break;
94106
case TWorkloadBaseParams::EStoreType::Row:
95107
result << " STORE = ROW," << Endl;
96108
result << " AUTO_PARTITIONING_PARTITION_SIZE_MB = " << Params.GetPartitionSizeMb() << ", " << Endl;
97-
result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl;
109+
result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(GetDefaultPartitionsCount(tableName)) << Endl;
98110
}
99111
result << ");" << Endl;
100112
}

ydb/library/workload/benchmark_base/workload.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class TWorkloadGeneratorBase : public IWorkloadQueryGenerator {
5353
virtual TString GetTablesYaml() const = 0;
5454
virtual TSpecialDataTypes GetSpecialDataTypes() const = 0;
5555
NJson::TJsonValue GetTablesJson() const;
56+
virtual ui32 GetDefaultPartitionsCount(const TString& tableName) const;
5657

5758
THolder<TGeneratorStateProcessor> StateProcessor;
5859
private:

ydb/library/workload/tpc_base/tpc_base.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ void TTpcBaseWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const EComm
167167
.StoreResult(&ExternalQueriesDir);
168168
opts.AddLongOption( "syntax", "Query syntax [" + GetEnumAllNames<EQuerySyntax>() + "].")
169169
.StoreResult(&Syntax).DefaultValue(Syntax);
170-
opts.AddLongOption("scale", "scale in percents")
170+
opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
171171
.DefaultValue(Scale).StoreResult(&Scale);
172172
opts.AddLongOption("float-mode", "Float mode. Can be float, decimal or decimal_ydb. If set to 'float' - float will be used, 'decimal' means that decimal will be used with canonical size and 'decimal_ydb' means that all floats will be converted to decimal(22,9) because YDB supports only this type.")
173173
.StoreResult(&FloatMode).DefaultValue(FloatMode);
@@ -177,6 +177,8 @@ void TTpcBaseWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const EComm
177177
case TWorkloadParams::ECommandType::Init:
178178
opts.AddLongOption("float-mode", "Float mode. Can be float, decimal or decimal_ydb. If set to 'float' - float will be used, 'decimal' means that decimal will be used with canonical size and 'decimal_ydb' means that all floats will be converted to decimal(22,9) because YDB supports only this type.")
179179
.StoreResult(&FloatMode).DefaultValue(FloatMode);
180+
opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
181+
.DefaultValue(Scale).StoreResult(&Scale);
180182
break;
181183
default:
182184
break;

ydb/library/workload/tpcds/data_generator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ TTpcdsWorkloadDataInitializerGenerator::TTpcdsWorkloadDataInitializerGenerator(c
1919

2020
void TTpcdsWorkloadDataInitializerGenerator::ConfigureOpts(NLastGetopt::TOpts& opts) {
2121
TWorkloadDataInitializerBase::ConfigureOpts(opts);
22-
opts.AddLongOption("scale", "scale in percents")
22+
opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
2323
.DefaultValue(Scale).StoreResult(&Scale);
2424
opts.AddLongOption("tables", "Commaseparated list of tables for generate. Empty means all tables.\n"
2525
"Enabled tables: " + JoinSeq(", ", TBulkDataGenerator::TFactory::GetRegisteredKeys()))

ydb/library/workload/tpch/data_generator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ TTpchWorkloadDataInitializerGenerator::TTpchWorkloadDataInitializerGenerator(con
1111

1212
void TTpchWorkloadDataInitializerGenerator::ConfigureOpts(NLastGetopt::TOpts& opts) {
1313
TWorkloadDataInitializerBase::ConfigureOpts(opts);
14-
opts.AddLongOption("scale", "scale in percents")
14+
opts.AddLongOption("scale", "Sets the percentage of the benchmark's data size and workload to use, relative to full scale.")
1515
.DefaultValue(Scale).StoreResult(&Scale);
1616
opts.AddLongOption("tables", "Commaseparated list of tables for generate. Empty means all tables.\n"
1717
"Enabled tables: " + JoinSeq(", ", TBulkDataGenerator::TFactory::GetRegisteredKeys()))

ydb/library/workload/tpch/tpch.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ TWorkloadGeneratorBase::TSpecialDataTypes TTpchWorkloadGenerator::GetSpecialData
2929
}
3030
}
3131

32+
ui32 TTpchWorkloadGenerator::GetDefaultPartitionsCount(const TString& /*tableName*/) const {
33+
return Params.GetScale() <= 10 ? 64 : 256;
34+
}
35+
3236

3337
THolder<IWorkloadQueryGenerator> TTpchWorkloadParams::CreateGenerator() const {
3438
return MakeHolder<TTpchWorkloadGenerator>(*this);

ydb/library/workload/tpch/tpch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class TTpchWorkloadGenerator final: public TTpcBaseWorkloadGenerator {
1919
protected:
2020
TString GetTablesYaml() const override;
2121
TWorkloadGeneratorBase::TSpecialDataTypes GetSpecialDataTypes() const override;
22+
ui32 GetDefaultPartitionsCount(const TString& tableName) const override;
2223

2324
private:
2425
const TTpchWorkloadParams& Params;

ydb/library/workload/tpch/tpch_schema.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ tables:
8181
primary_key:
8282
- l_orderkey
8383
- l_linenumber
84+
partition_by:
85+
- l_orderkey
8486

8587
- name: nation
8688
columns:

ydb/tests/functional/benchmarks_init/canondata/result.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
"test_init.TestClickbenchInit.test_s1_s3": {
3333
"uri": "file://test_init.TestClickbenchInit.test_s1_s3/s1_s3"
3434
},
35+
"test_init.TestTpcdsInit.test_s100_column": {
36+
"uri": "file://test_init.TestTpcdsInit.test_s100_column/s100_column"
37+
},
3538
"test_init.TestTpcdsInit.test_s1_column": {
3639
"uri": "file://test_init.TestTpcdsInit.test_s1_column/s1_column"
3740
},
@@ -47,6 +50,9 @@
4750
"test_init.TestTpcdsInit.test_s1_s3": {
4851
"uri": "file://test_init.TestTpcdsInit.test_s1_s3/s1_s3"
4952
},
53+
"test_init.TestTpchInit.test_s100_column": {
54+
"uri": "file://test_init.TestTpchInit.test_s100_column/s100_column"
55+
},
5056
"test_init.TestTpchInit.test_s1_column": {
5157
"uri": "file://test_init.TestTpchInit.test_s1_column/s1_column"
5258
},

0 commit comments

Comments
 (0)