44#include < library/cpp/resource/resource.h>
55#include < util/datetime/base.h>
66#include < util/generic/guid.h>
7+ #include < util/random/entropy.h>
8+ #include < util/random/mersenne.h>
79#include < util/random/normal.h>
810#include < util/random/random.h>
911#include < util/string/split.h>
@@ -252,9 +254,18 @@ class TRandomLogGenerator {
252254 return result.str ();
253255 }
254256
257+ TInstant UniformInstant (ui64 from, ui64 to) const {
258+ TMersenne<ui64> rnd (Seed ());
259+ return TInstant::FromValue (rnd.Uniform (from, to));
260+ }
261+
255262 TInstant RandomInstant () const {
256263 auto result = TInstant::Now () - TDuration::Seconds (Params.TimestampSubtract );
257- i64 millisecondsDiff = 60 * 1000 * NormalRandom<double >(0 ., Params.TimestampStandardDeviationMinutes );
264+ ui64 timestampStandardDeviationMinutes = 0 ;
265+ if (Params.TimestampStandardDeviationMinutes .Defined ()) {
266+ timestampStandardDeviationMinutes = *Params.TimestampStandardDeviationMinutes ;
267+ }
268+ i64 millisecondsDiff = 60 * 1000 * NormalRandom<double >(0 ., timestampStandardDeviationMinutes);
258269 if (millisecondsDiff >= 0 ) { // TDuration::MilliSeconds can't be negative for some reason...
259270 result += TDuration::MilliSeconds (millisecondsDiff);
260271 } else {
@@ -279,7 +290,7 @@ class TRandomLogGenerator {
279290 for (size_t row = 0 ; row < count; ++row) {
280291 result.emplace_back ();
281292 result.back ().LogId = CreateGuidAsString ().c_str ();
282- result.back ().Ts = RandomInstant ();
293+ result.back ().Ts = !!Params. TimestampDateFrom && !!Params. TimestampDateTo ? UniformInstant (*Params. TimestampDateFrom , *Params. TimestampDateTo ) : RandomInstant ();
283294 result.back ().Level = RandomNumber<ui32>(10 );
284295 result.back ().ServiceName = RandomWord (false );
285296 result.back ().Component = RandomWord (true );
@@ -360,6 +371,82 @@ TQueryInfoList TLogGenerator::GetWorkload(int type) {
360371 }
361372}
362373
374+ void TLogWorkloadParams::ConfigureOptsColumns (NLastGetopt::TOpts& opts) {
375+ opts.AddLongOption (" len" , " String len" )
376+ .DefaultValue (StringLen).StoreResult (&StringLen);
377+ opts.AddLongOption (" int-cols" , " Number of int columns" )
378+ .DefaultValue (IntColumnsCnt).StoreResult (&IntColumnsCnt);
379+ opts.AddLongOption (" str-cols" , " Number of string columns" )
380+ .DefaultValue (StrColumnsCnt).StoreResult (&StrColumnsCnt);
381+ opts.AddLongOption (" key-cols" , " Number of key columns" )
382+ .DefaultValue (KeyColumnsCnt).StoreResult (&KeyColumnsCnt);
383+ }
384+
385+ void TLogWorkloadParams::ConfigureOptsFillData (NLastGetopt::TOpts& opts) {
386+ ConfigureOptsColumns (opts);
387+ opts.AddLongOption (" rows" , " Number of rows to upsert" )
388+ .DefaultValue (RowsCnt).StoreResult (&RowsCnt);
389+ opts.AddLongOption (" timestamp_deviation" , " Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added." )
390+ .StoreResult (&TimestampStandardDeviationMinutes);
391+ opts.AddLongOption (" date-from" , " Left boundary of the interval to generate "
392+ " timestamp uniformly from specified interval. Presents as seconds since epoch. Once this option passed, 'date-to' "
393+ " should be passed as well. This option is mutually exclusive with 'timestamp_deviation'" )
394+ .StoreResult (&TimestampDateFrom);
395+ opts.AddLongOption (" date-to" , " Right boundary of the interval to generate "
396+ " timestamp uniformly from specified interval. Presents as seconds since epoch. Once this option passed, 'date-from' "
397+ " should be passed as well. This option is mutually exclusive with 'timestamp_deviation'" )
398+ .StoreResult (&TimestampDateTo);
399+ opts.AddLongOption (" timestamp_subtract" , " Value in seconds to subtract from timestamp. For each timestamp, this value in seconds is subtracted" )
400+ .DefaultValue (0 ).StoreResult (&TimestampSubtract);
401+ opts.AddLongOption (" null-percent" , " Percent of nulls in generated data" )
402+ .DefaultValue (NullPercent).StoreResult (&NullPercent);
403+ }
404+
405+ void TLogWorkloadParams::Validate (const ECommandType commandType, int workloadType) {
406+ bool timestampDevPassed = !!TimestampStandardDeviationMinutes;
407+ const bool dateFromPassed = !!TimestampDateFrom;
408+ const bool dateToPassed = !!TimestampDateTo;
409+
410+ switch (commandType) {
411+ case TWorkloadParams::ECommandType::Init:
412+ break ;
413+ case TWorkloadParams::ECommandType::Run:
414+ switch (static_cast <TLogGenerator::EType>(workloadType)) {
415+ case TLogGenerator::EType::Insert:
416+ case TLogGenerator::EType::Upsert:
417+ case TLogGenerator::EType::BulkUpsert:
418+ if (!timestampDevPassed && !dateFromPassed && !dateToPassed) {
419+ timestampDevPassed = true ;
420+ TimestampStandardDeviationMinutes = 0 ;
421+ }
422+
423+ if (timestampDevPassed && (dateFromPassed || dateToPassed)) {
424+ throw yexception () << " The `timestamp_deviation` and `date-from`, `date-to` are mutually exclusive and shouldn't be provided at once" ;
425+ }
426+
427+ if ((dateFromPassed && !dateToPassed) || (!dateFromPassed && dateToPassed)) {
428+ throw yexception () << " The `date-from` and `date-to` parameters must be provided together to specify the interval for uniform PK generation" ;
429+ }
430+
431+ if (dateFromPassed && dateToPassed && *TimestampDateFrom >= *TimestampDateTo) {
432+ throw yexception () << " Invalid interval [`date-from`, `date-to`)" ;
433+ }
434+
435+ break ;
436+ case TLogGenerator::EType::Select:
437+ break ;
438+ }
439+ break ;
440+ case TWorkloadParams::ECommandType::Clean:
441+ break ;
442+ case TWorkloadParams::ECommandType::Root:
443+ break ;
444+ case TWorkloadParams::ECommandType::Import:
445+ break ;
446+ }
447+ return ;
448+ }
449+
363450void TLogWorkloadParams::ConfigureOpts (NLastGetopt::TOpts& opts, const ECommandType commandType, int workloadType) {
364451 opts.AddLongOption (' p' , " path" , " Path where benchmark tables are located" )
365452 .Optional ()
@@ -379,14 +466,7 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT
379466 .DefaultValue (PartitionSizeMb).StoreResult (&PartitionSizeMb);
380467 opts.AddLongOption (" auto-partition" , " Enable auto partitioning by load." )
381468 .DefaultValue (PartitionsByLoad).StoreResult (&PartitionsByLoad);
382- opts.AddLongOption (" len" , " String len" )
383- .DefaultValue (StringLen).StoreResult (&StringLen);
384- opts.AddLongOption (" int-cols" , " Number of int columns" )
385- .DefaultValue (IntColumnsCnt).StoreResult (&IntColumnsCnt);
386- opts.AddLongOption (" str-cols" , " Number of string columns" )
387- .DefaultValue (StrColumnsCnt).StoreResult (&StrColumnsCnt);
388- opts.AddLongOption (" key-cols" , " Number of key columns" )
389- .DefaultValue (KeyColumnsCnt).StoreResult (&KeyColumnsCnt);
469+ ConfigureOptsColumns (opts);
390470 opts.AddLongOption (" ttl" , " TTL for timestamp column in minutes" )
391471 .DefaultValue (TimestampTtlMinutes).StoreResult (&TimestampTtlMinutes);
392472 opts.AddLongOption (" store" , " Storage type."
@@ -408,42 +488,14 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT
408488 case TLogGenerator::EType::Insert:
409489 case TLogGenerator::EType::Upsert:
410490 case TLogGenerator::EType::BulkUpsert:
411- opts.AddLongOption (" len" , " String len" )
412- .DefaultValue (StringLen).StoreResult (&StringLen);
413- opts.AddLongOption (" int-cols" , " Number of int columns" )
414- .DefaultValue (IntColumnsCnt).StoreResult (&IntColumnsCnt);
415- opts.AddLongOption (" str-cols" , " Number of string columns" )
416- .DefaultValue (StrColumnsCnt).StoreResult (&StrColumnsCnt);
417- opts.AddLongOption (" key-cols" , " Number of key columns" )
418- .DefaultValue (KeyColumnsCnt).StoreResult (&KeyColumnsCnt);
419- opts.AddLongOption (" rows" , " Number of rows to upsert" )
420- .DefaultValue (RowsCnt).StoreResult (&RowsCnt);
421- opts.AddLongOption (" timestamp_deviation" , " Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added." )
422- .DefaultValue (TimestampStandardDeviationMinutes).StoreResult (&TimestampStandardDeviationMinutes);
423- opts.AddLongOption (" timestamp_subtract" , " Value in seconds to subtract from timestamp. For each timestamp, this value in seconds is subtracted" )
424- .DefaultValue (0 ).StoreResult (&TimestampSubtract);
425- opts.AddLongOption (" null-percent" , " Percent of nulls in generated data" )
426- .DefaultValue (NullPercent).StoreResult (&NullPercent);
491+ ConfigureOptsFillData (opts);
427492 break ;
428493 case TLogGenerator::EType::Select:
429494 break ;
430495 }
431496 break ;
432497 case TWorkloadParams::ECommandType::Import:
433- opts.AddLongOption (" len" , " String len" )
434- .DefaultValue (StringLen).StoreResult (&StringLen);
435- opts.AddLongOption (" int-cols" , " Number of int columns" )
436- .DefaultValue (IntColumnsCnt).StoreResult (&IntColumnsCnt);
437- opts.AddLongOption (" str-cols" , " Number of string columns" )
438- .DefaultValue (StrColumnsCnt).StoreResult (&StrColumnsCnt);
439- opts.AddLongOption (" key-cols" , " Number of key columns" )
440- .DefaultValue (KeyColumnsCnt).StoreResult (&KeyColumnsCnt);
441- opts.AddLongOption (" rows" , " Number of rows to upsert" )
442- .DefaultValue (RowsCnt).StoreResult (&RowsCnt);
443- opts.AddLongOption (" timestamp_deviation" , " Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added." )
444- .DefaultValue (TimestampStandardDeviationMinutes).StoreResult (&TimestampStandardDeviationMinutes);
445- opts.AddLongOption (" null-percent" , " Percent of nulls in generated data" )
446- .DefaultValue (NullPercent).StoreResult (&NullPercent);
498+ ConfigureOptsFillData (opts);
447499 break ;
448500 default :
449501 break ;
0 commit comments