Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d4a7551
Added date_from&date_to timestamp uniform distribution for 'ydb workl…
Emgariko Apr 9, 2025
d059fcd
enthropy + description
Emgariko Apr 9, 2025
a7bf9f2
mutually_exclusive options
Emgariko Apr 10, 2025
a85d2e9
added workload log run parse param validation for PK mode
Emgariko Apr 24, 2025
71dfa29
fix `macro redifined`
Emgariko Apr 24, 2025
39fefbf
Commented library mutually exclusive settings
Emgariko Apr 25, 2025
5dddd6a
Moved Parse method to appropriate WorkloadRunCommand class & added in…
Emgariko Apr 25, 2025
377e777
Some changes
Emgariko Apr 28, 2025
21ef45c
debug logs
Emgariko May 5, 2025
3548f77
modifying ParseResult->Has method to check if name is presented in Cl…
Emgariko May 6, 2025
b8f456d
Changed Has to get provided args
Emgariko May 6, 2025
9c30cf9
Added validation & changed ParseFromCommandLineResult Get logic
Emgariko May 7, 2025
ae8895d
refactoring
Emgariko May 7, 2025
3449bd8
Refactoring
Emgariko May 7, 2025
b6c131d
avoided ydb_cli dependency in workload abstract lib
Emgariko May 7, 2025
65395f2
Rolled back client_command_options changes, it's redundant now
Emgariko May 7, 2025
f948b2f
removed comments & debug
Emgariko May 7, 2025
129a758
passed workload type to parameter validation
Emgariko May 7, 2025
837a52f
Removed debug log
Emgariko May 10, 2025
b4a5ead
Removed redundant logs
Emgariko May 10, 2025
a0d0739
Reverted 20 previous commits
Emgariko May 12, 2025
05aa423
uniform Ts
iddqdex May 11, 2025
43f1f58
refactor params
iddqdex May 11, 2025
df4a93a
fix
iddqdex May 11, 2025
f9194b6
Moved params validation before workload run
Emgariko May 13, 2025
e36fa04
Removed redundant try/catch
Emgariko May 13, 2025
db41c93
Debug logs
Emgariko May 14, 2025
65cb9c8
Moved validation before running workload in several threads
Emgariko May 15, 2025
adae9e5
Disabled validation for select
Emgariko May 16, 2025
60377df
Added workloadType and ECommandType to workload params validation
Emgariko May 17, 2025
36e1bca
Added default logic for timestamp_deviation param
Emgariko May 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ydb/library/workload/abstract/workload_query_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class TWorkloadParams {
}
virtual TString GetWorkloadName() const = 0;

virtual void Validate(const ECommandType /*commandType*/, int /*workloadType*/) {};
public:
ui64 BulkSize = 10000;
std::string DbPath;
Expand Down
132 changes: 92 additions & 40 deletions ydb/library/workload/log/log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <library/cpp/resource/resource.h>
#include <util/datetime/base.h>
#include <util/generic/guid.h>
#include <util/random/entropy.h>
#include <util/random/mersenne.h>
#include <util/random/normal.h>
#include <util/random/random.h>
#include <util/string/split.h>
Expand Down Expand Up @@ -252,9 +254,18 @@ class TRandomLogGenerator {
return result.str();
}

TInstant UniformInstant(ui64 from, ui64 to) const {
TMersenne<ui64> rnd(Seed());
return TInstant::FromValue(rnd.Uniform(from, to));
}

TInstant RandomInstant() const {
auto result = TInstant::Now() - TDuration::Seconds(Params.TimestampSubtract);
i64 millisecondsDiff = 60 * 1000 * NormalRandom<double>(0., Params.TimestampStandardDeviationMinutes);
ui64 timestampStandardDeviationMinutes = 0;
if (Params.TimestampStandardDeviationMinutes.Defined()) {
timestampStandardDeviationMinutes = *Params.TimestampStandardDeviationMinutes;
}
i64 millisecondsDiff = 60 * 1000 * NormalRandom<double>(0., timestampStandardDeviationMinutes);
if (millisecondsDiff >= 0) { // TDuration::MilliSeconds can't be negative for some reason...
result += TDuration::MilliSeconds(millisecondsDiff);
} else {
Expand All @@ -279,7 +290,7 @@ class TRandomLogGenerator {
for (size_t row = 0; row < count; ++row) {
result.emplace_back();
result.back().LogId = CreateGuidAsString().c_str();
result.back().Ts = RandomInstant();
result.back().Ts = !!Params.TimestampDateFrom && !!Params.TimestampDateTo ? UniformInstant(*Params.TimestampDateFrom, *Params.TimestampDateTo) : RandomInstant();
result.back().Level = RandomNumber<ui32>(10);
result.back().ServiceName = RandomWord(false);
result.back().Component = RandomWord(true);
Expand Down Expand Up @@ -360,6 +371,82 @@ TQueryInfoList TLogGenerator::GetWorkload(int type) {
}
}

void TLogWorkloadParams::ConfigureOptsColumns(NLastGetopt::TOpts& opts) {
opts.AddLongOption("len", "String len")
.DefaultValue(StringLen).StoreResult(&StringLen);
opts.AddLongOption("int-cols", "Number of int columns")
.DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt);
opts.AddLongOption("str-cols", "Number of string columns")
.DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt);
opts.AddLongOption("key-cols", "Number of key columns")
.DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt);
}

void TLogWorkloadParams::ConfigureOptsFillData(NLastGetopt::TOpts& opts) {
ConfigureOptsColumns(opts);
opts.AddLongOption("rows", "Number of rows to upsert")
.DefaultValue(RowsCnt).StoreResult(&RowsCnt);
opts.AddLongOption("timestamp_deviation", "Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added.")
.StoreResult(&TimestampStandardDeviationMinutes);
opts.AddLongOption("date-from", "Left boundary of the interval to generate "
"timestamp uniformly from specified interval. Presents as seconds since epoch. Once this option passed, 'date-to' "
"should be passed as well. This option is mutually exclusive with 'timestamp_deviation'")
.StoreResult(&TimestampDateFrom);
opts.AddLongOption("date-to", "Right boundary of the interval to generate "
"timestamp uniformly from specified interval. Presents as seconds since epoch. Once this option passed, 'date-from' "
"should be passed as well. This option is mutually exclusive with 'timestamp_deviation'")
.StoreResult(&TimestampDateTo);
opts.AddLongOption("timestamp_subtract", "Value in seconds to subtract from timestamp. For each timestamp, this value in seconds is subtracted")
.DefaultValue(0).StoreResult(&TimestampSubtract);
opts.AddLongOption("null-percent", "Percent of nulls in generated data")
.DefaultValue(NullPercent).StoreResult(&NullPercent);
}

void TLogWorkloadParams::Validate(const ECommandType commandType, int workloadType) {
bool timestampDevPassed = !!TimestampStandardDeviationMinutes;
const bool dateFromPassed = !!TimestampDateFrom;
const bool dateToPassed = !!TimestampDateTo;

switch (commandType) {
case TWorkloadParams::ECommandType::Init:
break;
case TWorkloadParams::ECommandType::Run:
switch (static_cast<TLogGenerator::EType>(workloadType)) {
case TLogGenerator::EType::Insert:
case TLogGenerator::EType::Upsert:
case TLogGenerator::EType::BulkUpsert:
if (!timestampDevPassed && !dateFromPassed && !dateToPassed) {
timestampDevPassed = true;
TimestampStandardDeviationMinutes = 0;
}

if (timestampDevPassed && (dateFromPassed || dateToPassed)) {
throw yexception() << "The `timestamp_deviation` and `date-from`, `date-to` are mutually exclusive and shouldn't be provided at once";
}

if ((dateFromPassed && !dateToPassed) || (!dateFromPassed && dateToPassed)) {
throw yexception() << "The `date-from` and `date-to` parameters must be provided together to specify the interval for uniform PK generation";
}

if (dateFromPassed && dateToPassed && *TimestampDateFrom >= *TimestampDateTo) {
throw yexception() << "Invalid interval [`date-from`, `date-to`)";
}

break;
case TLogGenerator::EType::Select:
break;
}
break;
case TWorkloadParams::ECommandType::Clean:
break;
case TWorkloadParams::ECommandType::Root:
break;
case TWorkloadParams::ECommandType::Import:
break;
}
return;
}

void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandType commandType, int workloadType) {
opts.AddLongOption('p', "path", "Path where benchmark tables are located")
.Optional()
Expand All @@ -379,14 +466,7 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT
.DefaultValue(PartitionSizeMb).StoreResult(&PartitionSizeMb);
opts.AddLongOption("auto-partition", "Enable auto partitioning by load.")
.DefaultValue(PartitionsByLoad).StoreResult(&PartitionsByLoad);
opts.AddLongOption("len", "String len")
.DefaultValue(StringLen).StoreResult(&StringLen);
opts.AddLongOption("int-cols", "Number of int columns")
.DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt);
opts.AddLongOption("str-cols", "Number of string columns")
.DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt);
opts.AddLongOption("key-cols", "Number of key columns")
.DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt);
ConfigureOptsColumns(opts);
opts.AddLongOption("ttl", "TTL for timestamp column in minutes")
.DefaultValue(TimestampTtlMinutes).StoreResult(&TimestampTtlMinutes);
opts.AddLongOption("store", "Storage type."
Expand All @@ -408,42 +488,14 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT
case TLogGenerator::EType::Insert:
case TLogGenerator::EType::Upsert:
case TLogGenerator::EType::BulkUpsert:
opts.AddLongOption("len", "String len")
.DefaultValue(StringLen).StoreResult(&StringLen);
opts.AddLongOption("int-cols", "Number of int columns")
.DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt);
opts.AddLongOption("str-cols", "Number of string columns")
.DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt);
opts.AddLongOption("key-cols", "Number of key columns")
.DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt);
opts.AddLongOption("rows", "Number of rows to upsert")
.DefaultValue(RowsCnt).StoreResult(&RowsCnt);
opts.AddLongOption("timestamp_deviation", "Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added.")
.DefaultValue(TimestampStandardDeviationMinutes).StoreResult(&TimestampStandardDeviationMinutes);
opts.AddLongOption("timestamp_subtract", "Value in seconds to subtract from timestamp. For each timestamp, this value in seconds is subtracted")
.DefaultValue(0).StoreResult(&TimestampSubtract);
opts.AddLongOption("null-percent", "Percent of nulls in generated data")
.DefaultValue(NullPercent).StoreResult(&NullPercent);
ConfigureOptsFillData(opts);
break;
case TLogGenerator::EType::Select:
break;
}
break;
case TWorkloadParams::ECommandType::Import:
opts.AddLongOption("len", "String len")
.DefaultValue(StringLen).StoreResult(&StringLen);
opts.AddLongOption("int-cols", "Number of int columns")
.DefaultValue(IntColumnsCnt).StoreResult(&IntColumnsCnt);
opts.AddLongOption("str-cols", "Number of string columns")
.DefaultValue(StrColumnsCnt).StoreResult(&StrColumnsCnt);
opts.AddLongOption("key-cols", "Number of key columns")
.DefaultValue(KeyColumnsCnt).StoreResult(&KeyColumnsCnt);
opts.AddLongOption("rows", "Number of rows to upsert")
.DefaultValue(RowsCnt).StoreResult(&RowsCnt);
opts.AddLongOption("timestamp_deviation", "Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added.")
.DefaultValue(TimestampStandardDeviationMinutes).StoreResult(&TimestampStandardDeviationMinutes);
opts.AddLongOption("null-percent", "Percent of nulls in generated data")
.DefaultValue(NullPercent).StoreResult(&NullPercent);
ConfigureOptsFillData(opts);
break;
default:
break;
Expand Down
9 changes: 8 additions & 1 deletion ydb/library/workload/log/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ class TLogWorkloadParams : public TWorkloadParams {
ui64 StrColumnsCnt = 0;
ui64 IntColumnsCnt = 0;
ui64 KeyColumnsCnt = 0;
ui64 TimestampStandardDeviationMinutes = 0;
TMaybe<ui64> TimestampStandardDeviationMinutes;
TMaybe<ui64> TimestampDateFrom;
TMaybe<ui64> TimestampDateTo;
ui64 TimestampTtlMinutes = 0;
ui64 TimestampSubtract = 0;
ui64 RowsCnt = 1;
Expand All @@ -34,6 +36,11 @@ class TLogWorkloadParams : public TWorkloadParams {

YDB_READONLY(EStoreType, StoreType, EStoreType::Row);
TWorkloadDataInitializer::TList CreateDataInitializers() const override;

void Validate(const ECommandType commandType, int workloadType) override;
private:
void ConfigureOptsFillData(NLastGetopt::TOpts& opts);
void ConfigureOptsColumns(NLastGetopt::TOpts& opts);
};

class TLogGenerator final: public TWorkloadQueryGeneratorBase<TLogWorkloadParams> {
Expand Down
2 changes: 2 additions & 0 deletions ydb/public/lib/ydb_cli/commands/ydb_workload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ int TWorkloadCommand::RunWorkload(NYdbWorkload::IWorkloadQueryGenerator& workloa
StopTime = StartTime + TDuration::Seconds(TotalSec);

NPar::LocalExecutor().RunAdditionalThreads(Threads);

auto futures = NPar::LocalExecutor().ExecRangeWithFutures([this, &workloadGen, type](int id) {
try {
WorkerFn(id, workloadGen, type);
Expand Down Expand Up @@ -350,6 +351,7 @@ int TWorkloadCommandRun::Run(TConfig& config) {
PrepareForRun(config);
Params.DbPath = config.Database;
auto workloadGen = Params.CreateGenerator();
Params.Validate(NYdbWorkload::TWorkloadParams::ECommandType::Run, Type);
return RunWorkload(*workloadGen, Type);
}

Expand Down
Loading