Skip to content

Commit b2f7f35

Browse files
authored
Support skipping blob header in TDiskBlob (#3145)
1 parent 9922ce5 commit b2f7f35

32 files changed

+336
-256
lines changed

ydb/core/blobstorage/ut_vdisk/lib/test_repl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ class TVDiskReplProxyReaderActor : public TActorBootstrapped<TVDiskReplProxyRead
203203

204204
ReplCtx = std::make_shared<TReplCtx>(
205205
VCtx,
206+
nullptr,
206207
nullptr, // PDiskCtx
207208
nullptr, // HugeBlobCtx
208209
nullptr,

ydb/core/blobstorage/vdisk/common/blobstorage_cost_tracker.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ class TBsCostModelBase {
232232
const NKikimrBlobStorage::EPutHandleClass handleClass = record.GetHandleClass();
233233
const ui64 size = record.HasBuffer() ? record.GetBuffer().size() : ev.GetPayload(0).GetSize();
234234

235-
NPriPut::EHandleType handleType = NPriPut::HandleType(HugeBlobSize, handleClass, size);
235+
NPriPut::EHandleType handleType = NPriPut::HandleType(HugeBlobSize, handleClass, size, true);
236236
if (handleType == NPriPut::Log) {
237237
return WriteCost(size);
238238
} else {
@@ -247,7 +247,7 @@ class TBsCostModelBase {
247247

248248
for (ui64 idx = 0; idx < record.ItemsSize(); ++idx) {
249249
const ui64 size = ev.GetBufferBytes(idx);
250-
NPriPut::EHandleType handleType = NPriPut::HandleType(HugeBlobSize, handleClass, size);
250+
NPriPut::EHandleType handleType = NPriPut::HandleType(HugeBlobSize, handleClass, size, true);
251251
if (handleType == NPriPut::Log) {
252252
cost += WriteCost(size);
253253
} else {

ydb/core/blobstorage/vdisk/common/vdisk_config.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ namespace NKikimr {
4545
HullCompMaxInFlightReads = 20;
4646
HullCompReadBatchEfficiencyThreshold = 0.5; // don't issue reads if there are more gaps than the useful data
4747
AnubisOsirisMaxInFly = 1000;
48+
AddHeader = true;
4849

4950
RecoveryLogCutterFirstDuration = TDuration::Seconds(10);
5051
RecoveryLogCutterRegularDuration = TDuration::Seconds(30);

ydb/core/blobstorage/vdisk/common/vdisk_config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ namespace NKikimr {
128128
ui32 HullCompMaxInFlightReads;
129129
double HullCompReadBatchEfficiencyThreshold;
130130
ui64 AnubisOsirisMaxInFly;
131+
bool AddHeader;
131132

132133
//////////////// LOG CUTTER SETTINGS ////////////////
133134
TDuration RecoveryLogCutterFirstDuration;

ydb/core/blobstorage/vdisk/common/vdisk_costmodel.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ namespace NKikimr {
180180
const NKikimrBlobStorage::EPutHandleClass handleClass = record.GetHandleClass();
181181
const ui64 bufSize = record.HasBuffer() ? record.GetBuffer().size() : ev.GetPayload(0).GetSize();
182182

183-
NPriPut::EHandleType handleType = NPriPut::HandleType(MinREALHugeBlobInBytes, handleClass, bufSize);
183+
NPriPut::EHandleType handleType = NPriPut::HandleType(MinREALHugeBlobInBytes, handleClass, bufSize, true);
184184
if (handleType == NPriPut::Log) {
185185
*logPutInternalQueue = true;
186186
return SmallWriteCost(bufSize);
@@ -197,7 +197,7 @@ namespace NKikimr {
197197
ui64 cost = 0;
198198
for (ui64 idx = 0; idx < record.ItemsSize(); ++idx) {
199199
const ui64 size = ev.GetBufferBytes(idx);
200-
NPriPut::EHandleType handleType = NPriPut::HandleType(MinREALHugeBlobInBytes, handleClass, size);
200+
NPriPut::EHandleType handleType = NPriPut::HandleType(MinREALHugeBlobInBytes, handleClass, size, true);
201201
if (handleType == NPriPut::Log) {
202202
cost += SmallWriteCost(size);
203203
} else {
@@ -264,7 +264,7 @@ namespace NKikimr {
264264
cost += MovedPatchCostBySize(essence.MovedPatchBlobSize);
265265
}
266266
for (ui64 size : essence.PutBufferSizes) {
267-
NPriPut::EHandleType handleType = NPriPut::HandleType(MinREALHugeBlobInBytes, essence.HandleClass, size);
267+
NPriPut::EHandleType handleType = NPriPut::HandleType(MinREALHugeBlobInBytes, essence.HandleClass, size, true);
268268
if (handleType == NPriPut::Log) {
269269
cost += SmallWriteCost(size);
270270
} else {

ydb/core/blobstorage/vdisk/common/vdisk_handle_class.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ namespace NKikimr {
99
namespace NPriPut {
1010

1111
EHandleType HandleType(const ui32 minREALHugeBlobSize, NKikimrBlobStorage::EPutHandleClass handleClass,
12-
ui32 originalBufSizeWithoutOverhead) {
12+
ui32 originalBufSizeWithoutOverhead, bool addHeader) {
1313
// what size of huge blob it would be, if it huge
14-
const ui64 hugeBlobSize = TDiskBlob::HugeBlobOverhead + originalBufSizeWithoutOverhead;
14+
const ui64 hugeBlobSize = (addHeader ? TDiskBlob::HeaderSize : 0) + originalBufSizeWithoutOverhead;
1515

1616
switch (handleClass) {
1717
case NKikimrBlobStorage::TabletLog:
@@ -25,13 +25,5 @@ namespace NKikimr {
2525
}
2626
}
2727

28-
bool IsHandleTypeLog(const ui32 minREALHugeBlobSize, NKikimrBlobStorage::EPutHandleClass handleClass,
29-
ui32 originalBufSizeWithoutOverhead) {
30-
const NPriPut::EHandleType handleType = NPriPut::HandleType(minREALHugeBlobSize, handleClass,
31-
originalBufSizeWithoutOverhead);
32-
const bool isHandleTypeLog = handleType == NPriPut::Log;
33-
return isHandleTypeLog;
34-
}
35-
3628
} // NPriPut
3729
} // NKikimr

ydb/core/blobstorage/vdisk/common/vdisk_handle_class.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@ namespace NKikimr {
1818
};
1919

2020
EHandleType HandleType(const ui32 minREALHugeBlobSize, NKikimrBlobStorage::EPutHandleClass handleClass,
21-
ui32 originalBufSizeWithoutOverhead);
21+
ui32 originalBufSizeWithoutOverhead, bool addHeader);
2222

23-
bool IsHandleTypeLog(const ui32 minREALHugeBlobSize, NKikimrBlobStorage::EPutHandleClass handleClass,
24-
ui32 originalBufSizeWithoutOverhead);
2523
} // NPriPut
2624

2725
} // NKikimr

ydb/core/blobstorage/vdisk/common/vdisk_hugeblobctx.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ namespace NKikimr {
4343

4444
// check whether this blob is huge one; userPartSize doesn't include any metadata stored along with blob
4545
bool THugeBlobCtx::IsHugeBlob(TBlobStorageGroupType gtype, const TLogoBlobID& fullId) const {
46-
return gtype.MaxPartSize(fullId) + TDiskBlob::HugeBlobOverhead >= MinREALHugeBlobInBytes;
46+
return gtype.MaxPartSize(fullId) + (AddHeader ? TDiskBlob::HeaderSize : 0) >= MinREALHugeBlobInBytes;
4747
}
4848

4949
} // NKikimr

ydb/core/blobstorage/vdisk/common/vdisk_hugeblobctx.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,15 @@ namespace NKikimr {
6666
// this value is multiply of AppendBlockSize and is calculated from Config->MinHugeBlobSize
6767
const ui32 MinREALHugeBlobInBytes;
6868
const std::shared_ptr<const THugeSlotsMap> HugeSlotsMap;
69+
const bool AddHeader;
6970

70-
// check whether this blob is huge one; userPartSize doesn't include any metadata stored along with blob
71+
// check whether this NEW blob is huge one; userPartSize doesn't include any metadata stored along with blob
7172
bool IsHugeBlob(TBlobStorageGroupType gtype, const TLogoBlobID& fullId) const;
7273

73-
THugeBlobCtx(ui32 minREALHugeBlobInBytes, const std::shared_ptr<const THugeSlotsMap> &hugeSlotsMap)
74+
THugeBlobCtx(ui32 minREALHugeBlobInBytes, const std::shared_ptr<const THugeSlotsMap> &hugeSlotsMap, bool addHeader)
7475
: MinREALHugeBlobInBytes(minREALHugeBlobInBytes)
7576
, HugeSlotsMap(hugeSlotsMap)
77+
, AddHeader(addHeader)
7678
{}
7779
};
7880

ydb/core/blobstorage/vdisk/defrag/defrag_rewriter.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -131,18 +131,25 @@ namespace NKikimr {
131131
Y_ABORT_UNLESS(partId);
132132

133133
TRcBuf data = msg->Data.ToString();
134-
Y_ABORT_UNLESS(data.size() == TDiskBlob::HeaderSize + gtype.PartSize(rec.LogoBlobId));
135-
const char *header = data.data();
136-
137-
ui32 fullDataSize;
138-
memcpy(&fullDataSize, header, sizeof(fullDataSize));
139-
header += sizeof(fullDataSize);
140-
Y_ABORT_UNLESS(fullDataSize == rec.LogoBlobId.BlobSize());
141-
142-
Y_ABORT_UNLESS(NMatrix::TVectorType::MakeOneHot(partId - 1, gtype.TotalPartCount()).Raw() == static_cast<ui8>(*header));
134+
Y_ABORT_UNLESS(data.size() == TDiskBlob::HeaderSize + gtype.PartSize(rec.LogoBlobId) ||
135+
data.size() == gtype.PartSize(rec.LogoBlobId));
136+
137+
ui32 trim = 0;
138+
if (data.size() == TDiskBlob::HeaderSize + gtype.PartSize(rec.LogoBlobId)) {
139+
const char *header = data.data();
140+
ui32 fullDataSize;
141+
memcpy(&fullDataSize, header, sizeof(fullDataSize));
142+
header += sizeof(fullDataSize);
143+
Y_ABORT_UNLESS(fullDataSize == rec.LogoBlobId.BlobSize());
144+
Y_ABORT_UNLESS(NMatrix::TVectorType::MakeOneHot(partId - 1, gtype.TotalPartCount()).Raw() == static_cast<ui8>(*header));
145+
trim += TDiskBlob::HeaderSize;
146+
}
143147

144148
TRope rope(std::move(data));
145-
rope.EraseFront(TDiskBlob::HeaderSize);
149+
if (trim) {
150+
rope.EraseFront(trim);
151+
}
152+
Y_ABORT_UNLESS(rope.size() == gtype.PartSize(rec.LogoBlobId));
146153

147154
auto writeEvent = std::make_unique<TEvBlobStorage::TEvVPut>(rec.LogoBlobId, std::move(rope),
148155
SelfVDiskId, true, nullptr, TInstant::Max(), NKikimrBlobStorage::EPutHandleClass::AsyncBlob);

ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap_ctx_ut.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ namespace NKikimr {
3636

3737
return std::make_shared<THugeBlobCtx>(
3838
repairedHuge->GetMinREALHugeBlobInBytes(),
39-
repairedHuge->Heap->BuildHugeSlotsMap());
39+
repairedHuge->Heap->BuildHugeSlotsMap(),
40+
true);
4041
}
4142

4243

ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_blob.h

Lines changed: 54 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ namespace NKikimr {
3535

3636
public:
3737
static const size_t HeaderSize = sizeof(ui32) + sizeof(ui8);
38-
static const size_t HugeBlobOverhead = HeaderSize;
3938

4039
TDiskBlob() = default;
4140

@@ -44,29 +43,41 @@ namespace NKikimr {
4443
, Parts(parts)
4544
{
4645
// ensure the blob format is correct
47-
Y_ABORT_UNLESS(Rope->GetSize() >= HeaderSize);
4846
Y_ABORT_UNLESS(parts.GetSize() <= MaxTotalPartCount);
4947
//Y_ABORT_UNLESS(parts.GetSize() == gtype.TotalPartCount()); // TODO(alexvru): fit UTs
5048

49+
ui32 blobSize = 0;
50+
for (ui8 i = parts.FirstPosition(); i != parts.GetSize(); i = parts.NextPosition(i)) {
51+
blobSize += gtype.PartSize(TLogoBlobID(fullId, i + 1));
52+
}
53+
54+
Y_ABORT_UNLESS(rope->GetSize() == blobSize || rope->GetSize() == blobSize + HeaderSize);
55+
5156
auto iter = Rope->Begin();
57+
ui32 offset = 0;
5258

53-
// obtain full data size from the header
54-
iter.ExtractPlainDataAndAdvance(&FullDataSize, sizeof(FullDataSize));
59+
if (rope->GetSize() == blobSize + HeaderSize) {
60+
// obtain full data size from the header
61+
iter.ExtractPlainDataAndAdvance(&FullDataSize, sizeof(FullDataSize));
5562

56-
// then check the parts; we have `parts' argument to validate actual blob content
57-
ui8 partsMask;
58-
iter.ExtractPlainDataAndAdvance(&partsMask, sizeof(partsMask));
59-
Y_ABORT_UNLESS(parts.Raw() == partsMask);
63+
// then check the parts; we have `parts' argument to validate actual blob content
64+
ui8 partsMask;
65+
iter.ExtractPlainDataAndAdvance(&partsMask, sizeof(partsMask));
66+
Y_ABORT_UNLESS(parts.Raw() == partsMask);
67+
68+
// advance offset
69+
offset += HeaderSize;
70+
} else {
71+
FullDataSize = fullId.BlobSize();
72+
}
6073

6174
// calculate part layout in the binary
62-
ui32 offset = HeaderSize;
6375
for (ui8 i = 0; i <= parts.GetSize(); ++i) {
6476
PartOffs[i] = offset;
6577
if (i != parts.GetSize()) {
6678
offset += parts.Get(i) ? gtype.PartSize(TLogoBlobID(fullId, i + 1)) : 0;
6779
}
6880
}
69-
Y_ABORT_UNLESS(GetSize() == Rope->GetSize(), "%" PRIu32 " != %zu", GetSize(), Rope->GetSize());
7081
}
7182

7283
bool Empty() const {
@@ -112,8 +123,8 @@ namespace NKikimr {
112123
return Parts;
113124
}
114125

115-
ui32 GetSize() const {
116-
return PartOffs[Parts.GetSize()];
126+
ui32 GetBlobSize(bool addHeader) const {
127+
return PartOffs[Parts.GetSize()] - PartOffs[0] + (addHeader ? HeaderSize : 0);
117128
}
118129

119130
////////////////// Iterator via all parts ///////////////////////////////////////
@@ -203,19 +214,22 @@ namespace NKikimr {
203214

204215
public:
205216
template<typename TPartIt>
206-
static TRope CreateFromDistinctParts(TPartIt first, TPartIt last, NMatrix::TVectorType parts, ui64 fullDataSize, TRopeArena& arena) {
217+
static TRope CreateFromDistinctParts(TPartIt first, TPartIt last, NMatrix::TVectorType parts, ui64 fullDataSize,
218+
TRopeArena& arena, bool addHeader) {
207219
// ensure that we have correct number of set parts
208220
Y_ABORT_UNLESS(parts.CountBits() == std::distance(first, last));
209221
Y_ABORT_UNLESS(first != last);
210222

211223
TRope rope;
212224

213-
// fill in header
214-
char header[HeaderSize];
215-
Y_ABORT_UNLESS(fullDataSize <= Max<ui32>());
216-
*reinterpret_cast<ui32*>(header) = fullDataSize;
217-
*reinterpret_cast<ui8*>(header + sizeof(ui32)) = parts.Raw();
218-
rope.Insert(rope.End(), arena.CreateRope(header, HeaderSize));
225+
if (addHeader) {
226+
// fill in header
227+
char header[HeaderSize];
228+
Y_ABORT_UNLESS(fullDataSize <= Max<ui32>());
229+
*reinterpret_cast<ui32*>(header) = fullDataSize;
230+
*reinterpret_cast<ui8*>(header + sizeof(ui32)) = parts.Raw();
231+
rope.Insert(rope.End(), arena.CreateRope(header, HeaderSize));
232+
}
219233

220234
// then copy parts' contents to the rope
221235
while (first != last) {
@@ -225,19 +239,22 @@ namespace NKikimr {
225239
return rope;
226240
}
227241

228-
static inline TRope Create(ui64 fullDataSize, ui8 partId, ui8 total, TRope&& data, TRopeArena& arena) {
242+
static inline TRope Create(ui64 fullDataSize, ui8 partId, ui8 total, TRope&& data, TRopeArena& arena,
243+
bool addHeader) {
229244
Y_ABORT_UNLESS(partId > 0 && partId <= 8);
230245
return CreateFromDistinctParts(&data, &data + 1, NMatrix::TVectorType::MakeOneHot(partId - 1, total),
231-
fullDataSize, arena);
246+
fullDataSize, arena, addHeader);
232247
}
233248

234-
static inline TRope Create(ui64 fullDataSize, NMatrix::TVectorType parts, TRope&& data, TRopeArena& arena) {
235-
return CreateFromDistinctParts(&data, &data + 1, parts, fullDataSize, arena);
249+
static inline TRope Create(ui64 fullDataSize, NMatrix::TVectorType parts, TRope&& data, TRopeArena& arena,
250+
bool addHeader) {
251+
return CreateFromDistinctParts(&data, &data + 1, parts, fullDataSize, arena, addHeader);
236252
}
237253

238254
// static function for calculating size of a blob being created ('Create' function creates blob of this size)
239-
static inline ui32 CalculateBlobSize(TBlobStorageGroupType gtype, const TLogoBlobID& fullId, NMatrix::TVectorType parts) {
240-
ui32 res = HeaderSize;
255+
static inline ui32 CalculateBlobSize(TBlobStorageGroupType gtype, const TLogoBlobID& fullId, NMatrix::TVectorType parts,
256+
bool addHeader) {
257+
ui32 res = addHeader ? HeaderSize : 0;
241258
for (ui8 i = parts.FirstPosition(); i != parts.GetSize(); i = parts.NextPosition(i)) {
242259
res += gtype.PartSize(TLogoBlobID(fullId, i + 1));
243260
}
@@ -256,7 +273,7 @@ namespace NKikimr {
256273

257274
if (Parts.Empty()) {
258275
Parts = NMatrix::TVectorType(0, source.Parts.GetSize());
259-
PartOffs.fill(HeaderSize);
276+
PartOffs.fill(0); // we don't care about absolute offsets here
260277
} else {
261278
Y_ABORT_UNLESS(Parts.GetSize() == source.Parts.GetSize());
262279
}
@@ -273,14 +290,18 @@ namespace NKikimr {
273290
}
274291
}
275292

276-
TRope CreateDiskBlob(TRopeArena& arena) const {
293+
TRope CreateDiskBlob(TRopeArena& arena, bool addHeader) const {
277294
Y_ABORT_UNLESS(!Empty());
278295

279-
char header[HeaderSize];
280-
*reinterpret_cast<ui32*>(header) = FullDataSize;
281-
*reinterpret_cast<ui8*>(header + sizeof(ui32)) = Parts.Raw();
296+
TRope rope;
297+
298+
if (addHeader) {
299+
char header[HeaderSize];
300+
*reinterpret_cast<ui32*>(header) = FullDataSize;
301+
*reinterpret_cast<ui8*>(header + sizeof(ui32)) = Parts.Raw();
302+
rope.Insert(rope.End(), arena.CreateRope(header, sizeof(header)));
303+
}
282304

283-
TRope rope(arena.CreateRope(header, sizeof(header)));
284305
for (auto it = begin(); it != end(); ++it) {
285306
rope.Insert(rope.End(), it.GetPart());
286307
}
@@ -315,8 +336,8 @@ namespace NKikimr {
315336
return Blob.Empty();
316337
}
317338

318-
TRope CreateDiskBlob(TRopeArena& arena) const {
319-
return Blob.CreateDiskBlob(arena);
339+
TRope CreateDiskBlob(TRopeArena& arena, bool addHeader) const {
340+
return Blob.CreateDiskBlob(arena, addHeader);
320341
}
321342

322343
const TDiskBlob& GetDiskBlob() const {

0 commit comments

Comments
 (0)