@@ -30,6 +30,15 @@ class TSimpleSerializationStat {
3030 Y_ABORT_UNLESS (RawBytes);
3131 }
3232
33+ double GetSerializedBytesPerRecord () const {
34+ AFL_VERIFY (RecordsCount);
35+ return 1.0 * SerializedBytes / RecordsCount;
36+ }
37+ double GetRawBytesPerRecord () const {
38+ AFL_VERIFY (RecordsCount);
39+ return 1.0 * RawBytes / RecordsCount;
40+ }
41+
3342 ui64 GetSerializedBytes () const {
3443 return SerializedBytes;
3544 }
@@ -54,28 +63,47 @@ class TSimpleSerializationStat {
5463 Y_ABORT_UNLESS (RawBytes >= stat.RawBytes );
5564 RawBytes -= stat.RawBytes ;
5665 }
66+ };
5767
58- double GetPackedRecordSize () const {
59- return (double )SerializedBytes / RecordsCount;
68+ class TBatchSerializationStat {
69+ protected:
70+ double SerializedBytesPerRecord = 0 ;
71+ double RawBytesPerRecord = 0 ;
72+ public:
73+ TBatchSerializationStat () = default ;
74+ TBatchSerializationStat (const ui64 bytes, const ui64 recordsCount, const ui64 rawBytes) {
75+ Y_ABORT_UNLESS (recordsCount);
76+ SerializedBytesPerRecord = 1.0 * bytes / recordsCount;
77+ RawBytesPerRecord = 1.0 * rawBytes / recordsCount;
78+ }
79+
80+ TBatchSerializationStat (const TSimpleSerializationStat& simple) {
81+ SerializedBytesPerRecord = simple.GetSerializedBytesPerRecord ();
82+ RawBytesPerRecord = simple.GetRawBytesPerRecord ();
83+ }
84+
85+ void Merge (const TSimpleSerializationStat& item) {
86+ SerializedBytesPerRecord += item.GetSerializedBytesPerRecord ();
87+ RawBytesPerRecord += item.GetRawBytesPerRecord ();
6088 }
6189
6290 std::optional<ui64> PredictOptimalPackRecordsCount (const ui64 recordsCount, const ui64 blobSize) const {
63- if (!RecordsCount ) {
91+ if (!SerializedBytesPerRecord ) {
6492 return {};
6593 }
66- const ui64 fullSize = 1.0 * recordsCount / RecordsCount * SerializedBytes ;
94+ const ui64 fullSize = 1.0 * recordsCount * SerializedBytesPerRecord ;
6795 if (fullSize < blobSize) {
6896 return recordsCount;
6997 } else {
70- return std::floor (1.0 * blobSize / SerializedBytes * RecordsCount );
98+ return std::floor (1.0 * blobSize / SerializedBytesPerRecord );
7199 }
72100 }
73101
74102 std::optional<ui64> PredictOptimalSplitFactor (const ui64 recordsCount, const ui64 blobSize) const {
75- if (!RecordsCount ) {
103+ if (!SerializedBytesPerRecord ) {
76104 return {};
77105 }
78- const ui64 fullSize = 1.0 * recordsCount / RecordsCount * SerializedBytes ;
106+ const ui64 fullSize = 1.0 * recordsCount * SerializedBytesPerRecord ;
79107 if (fullSize < blobSize) {
80108 return 1 ;
81109 } else {
@@ -84,25 +112,6 @@ class TSimpleSerializationStat {
84112 }
85113};
86114
87- class TBatchSerializationStat : public TSimpleSerializationStat {
88- private:
89- using TBase = TSimpleSerializationStat;
90- public:
91- using TBase::TBase;
92- TBatchSerializationStat (const TSimpleSerializationStat& item)
93- : TBase(item)
94- {
95-
96- }
97-
98- void Merge (const TSimpleSerializationStat& item) {
99- SerializedBytes += item.GetSerializedBytes ();
100- RawBytes += item.GetRawBytes ();
101- AFL_VERIFY (RecordsCount == item.GetRecordsCount ())(" self_count" , RecordsCount)(" new_count" , item.GetRecordsCount ());
102- }
103-
104- };
105-
106115class TColumnSerializationStat : public TSimpleSerializationStat {
107116private:
108117 YDB_READONLY (ui32, ColumnId, 0 );
@@ -114,6 +123,10 @@ class TColumnSerializationStat: public TSimpleSerializationStat {
114123
115124 }
116125
126+ double GetPackedRecordSize () const {
127+ return (double )SerializedBytes / RecordsCount;
128+ }
129+
117130 TColumnSerializationStat RecalcForRecordsCount (const ui64 recordsCount) const {
118131 TColumnSerializationStat result (ColumnId, ColumnName);
119132 result.Merge (TSimpleSerializationStat (SerializedBytes / RecordsCount * recordsCount, recordsCount, RawBytes / RecordsCount * recordsCount));
0 commit comments