Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: Add deltaRowCount in l0 compaction #33843

Merged
merged 3 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions internal/datacoord/compaction_l0_view.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ func (v *LevelZeroSegmentsView) String() string {
l0strings := lo.Map(v.segments, func(v *SegmentView, _ int) string {
return v.LevelZeroString()
})
return fmt.Sprintf("label=<%s>, posT=<%v>, l0 segments=%v",

count := lo.SumBy(v.segments, func(v *SegmentView) int {
return v.DeltaRowCount
})
return fmt.Sprintf("L0SegCount=%d, DeltaRowCount=%d, label=<%s>, posT=<%v>, L0 segments=%v",
len(v.segments),
count,
v.label.String(),
v.earliestGrowingSegmentPos.GetTimestamp(),
l0strings)
Expand Down Expand Up @@ -116,19 +122,20 @@ func (v *LevelZeroSegmentsView) minCountSizeTrigger(segments []*SegmentView) (pi
maxDeltaCount = paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMaxNum.GetAsInt()
)

curSize := float64(0)
pickedSize := float64(0)
pickedCount := 0

// count >= minDeltaCount
if lo.SumBy(segments, func(view *SegmentView) int { return view.DeltalogCount }) >= minDeltaCount {
picked, curSize = pickByMaxCountSize(segments, maxDeltaSize, maxDeltaCount)
reason = fmt.Sprintf("level zero segments count reaches minForceTriggerCountLimit=%d, curDeltaSize=%.2f, curDeltaCount=%d", minDeltaCount, curSize, len(segments))
picked, pickedSize, pickedCount = pickByMaxCountSize(segments, maxDeltaSize, maxDeltaCount)
reason = fmt.Sprintf("level zero segments count reaches minForceTriggerCountLimit=%d, pickedSize=%.2fB, pickedCount=%d", minDeltaCount, pickedSize, pickedCount)
return
}

// size >= minDeltaSize
if lo.SumBy(segments, func(view *SegmentView) float64 { return view.DeltaSize }) >= minDeltaSize {
picked, curSize = pickByMaxCountSize(segments, maxDeltaSize, maxDeltaCount)
reason = fmt.Sprintf("level zero segments size reaches minForceTriggerSizeLimit=%.2f, curDeltaSize=%.2f, curDeltaCount=%d", minDeltaSize, curSize, len(segments))
picked, pickedSize, pickedCount = pickByMaxCountSize(segments, maxDeltaSize, maxDeltaCount)
reason = fmt.Sprintf("level zero segments size reaches minForceTriggerSizeLimit=%.2fB, pickedSize=%.2fB, pickedCount=%d", minDeltaSize, pickedSize, pickedCount)
return
}

Expand All @@ -143,30 +150,25 @@ func (v *LevelZeroSegmentsView) forceTrigger(segments []*SegmentView) (picked []
maxDeltaCount = paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMaxNum.GetAsInt()
)

curSize := float64(0)
picked, curSize = pickByMaxCountSize(segments, maxDeltaSize, maxDeltaCount)
reason = fmt.Sprintf("level zero views force to trigger, curDeltaSize=%.2f, curDeltaCount=%d", curSize, len(segments))
return
picked, pickedSize, pickedCount := pickByMaxCountSize(segments, maxDeltaSize, maxDeltaCount)
reason = fmt.Sprintf("level zero views force to trigger, pickedSize=%.2fB, pickedCount=%d", pickedSize, pickedCount)
return picked, reason
}

// pickByMaxCountSize picks segments that count <= maxCount or size <= maxSize
func pickByMaxCountSize(segments []*SegmentView, maxSize float64, maxCount int) ([]*SegmentView, float64) {
var (
curDeltaCount = 0
curDeltaSize = float64(0)
)
func pickByMaxCountSize(segments []*SegmentView, maxSize float64, maxCount int) (picked []*SegmentView, pickedSize float64, pickedCount int) {
idx := 0
for _, view := range segments {
targetCount := view.DeltalogCount + curDeltaCount
targetSize := view.DeltaSize + curDeltaSize
targetCount := view.DeltalogCount + pickedCount
targetSize := view.DeltaSize + pickedSize

if (curDeltaCount != 0 && curDeltaSize != float64(0)) && (targetSize > maxSize || targetCount > maxCount) {
if (pickedCount != 0 && pickedSize != float64(0)) && (targetSize > maxSize || targetCount > maxCount) {
break
}

curDeltaCount = targetCount
curDeltaSize = targetSize
pickedCount = targetCount
pickedSize = targetSize
idx += 1
}
return segments[:idx], curDeltaSize
return segments[:idx], pickedSize, pickedCount
}
1 change: 1 addition & 0 deletions internal/datacoord/compaction_l0_view_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ func (s *LevelZeroSegmentsViewSuite) TestTrigger() {
if view.dmlPos.Timestamp < test.prepEarliestT {
view.DeltalogCount = test.prepCountEach
view.DeltaSize = test.prepSizeEach
view.DeltaRowCount = 1
}
}
log.Info("LevelZeroSegmentsView", zap.String("view", s.v.String()))
Expand Down
27 changes: 21 additions & 6 deletions internal/datacoord/compaction_view.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ type SegmentView struct {
BinlogCount int
StatslogCount int
DeltalogCount int

// row count
DeltaRowCount int
}

func (s *SegmentView) Clone() *SegmentView {
Expand All @@ -104,6 +107,7 @@ func (s *SegmentView) Clone() *SegmentView {
BinlogCount: s.BinlogCount,
StatslogCount: s.StatslogCount,
DeltalogCount: s.DeltalogCount,
DeltaRowCount: s.DeltaRowCount,
}
}

Expand All @@ -126,6 +130,7 @@ func GetViewsByInfo(segments ...*SegmentInfo) []*SegmentView {

DeltaSize: GetBinlogSizeAsBytes(segment.GetDeltalogs()),
DeltalogCount: GetBinlogCount(segment.GetDeltalogs()),
DeltaRowCount: GetBinlogEntriesNum(segment.GetDeltalogs()),

Size: GetBinlogSizeAsBytes(segment.GetBinlogs()),
BinlogCount: GetBinlogCount(segment.GetBinlogs()),
Expand All @@ -147,13 +152,13 @@ func (v *SegmentView) Equal(other *SegmentView) bool {
}

func (v *SegmentView) String() string {
return fmt.Sprintf("ID=%d, label=<%s>, state=%s, level=%s, binlogSize=%.2f, binlogCount=%d, deltaSize=%.2f, deltaCount=%d, expireSize=%.2f",
v.ID, v.label, v.State.String(), v.Level.String(), v.Size, v.BinlogCount, v.DeltaSize, v.DeltalogCount, v.ExpireSize)
return fmt.Sprintf("ID=%d, label=<%s>, state=%s, level=%s, binlogSize=%.2f, binlogCount=%d, deltaSize=%.2f, deltalogCount=%d, deltaRowCount=%d, expireSize=%.2f",
v.ID, v.label, v.State.String(), v.Level.String(), v.Size, v.BinlogCount, v.DeltaSize, v.DeltalogCount, v.DeltaRowCount, v.ExpireSize)
}

func (v *SegmentView) LevelZeroString() string {
return fmt.Sprintf("<ID=%d, level=%s, deltaSize=%.2f, deltaCount=%d>",
v.ID, v.Level.String(), v.DeltaSize, v.DeltalogCount)
return fmt.Sprintf("<ID=%d, level=%s, deltaSize=%.2f, deltaLogCount=%d, deltaRowCount=%d>",
v.ID, v.Level.String(), v.DeltaSize, v.DeltalogCount, v.DeltaRowCount)
}

func GetBinlogCount(fieldBinlogs []*datapb.FieldBinlog) int {
Expand All @@ -164,9 +169,19 @@ func GetBinlogCount(fieldBinlogs []*datapb.FieldBinlog) int {
return num
}

func GetBinlogSizeAsBytes(deltaBinlogs []*datapb.FieldBinlog) float64 {
func GetBinlogEntriesNum(fieldBinlogs []*datapb.FieldBinlog) int {
var num int
for _, fbinlog := range fieldBinlogs {
for _, binlog := range fbinlog.GetBinlogs() {
num += int(binlog.GetEntriesNum())
}
}
return num
}

func GetBinlogSizeAsBytes(fieldBinlogs []*datapb.FieldBinlog) float64 {
var deltaSize float64
for _, deltaLogs := range deltaBinlogs {
for _, deltaLogs := range fieldBinlogs {
for _, l := range deltaLogs.GetBinlogs() {
deltaSize += float64(l.GetMemorySize())
}
Expand Down
1 change: 1 addition & 0 deletions internal/datacoord/compaction_view_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ func genTestDeltalogs(logCount int, logSize int64) []*datapb.FieldBinlog {

for i := 0; i < logCount; i++ {
binlog := &datapb.Binlog{
EntriesNum: int64(i),
LogSize: logSize,
MemorySize: logSize,
}
Expand Down
96 changes: 43 additions & 53 deletions internal/datanode/l0_compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func (t *levelZeroCompactionTask) serializeUpload(ctx context.Context, segmentWr

func (t *levelZeroCompactionTask) splitDelta(
ctx context.Context,
allDelta []*storage.DeleteData,
allDelta *storage.DeleteData,
targetSegIDs []int64,
) map[int64]*SegmentDeltaWriter {
traceCtx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact splitDelta")
Expand All @@ -259,9 +259,6 @@ func (t *levelZeroCompactionTask) splitDelta(
startIdx := value.StartIdx
pk2SegmentIDs := value.Segment2Hits

pks := allDelta[value.DeleteDataIdx].Pks
tss := allDelta[value.DeleteDataIdx].Tss

for segmentID, hits := range pk2SegmentIDs {
for i, hit := range hits {
if hit {
Expand All @@ -271,23 +268,21 @@ func (t *levelZeroCompactionTask) splitDelta(
writer = NewSegmentDeltaWriter(segmentID, segment.GetPartitionID(), t.getCollection())
targetSegBuffer[segmentID] = writer
}
writer.Write(pks[startIdx+i], tss[startIdx+i])
writer.Write(allDelta.Pks[startIdx+i], allDelta.Tss[startIdx+i])
}
}
}
return true
})

return targetSegBuffer
}

type BatchApplyRet = struct {
DeleteDataIdx int
StartIdx int
Segment2Hits map[int64][]bool
StartIdx int
Segment2Hits map[int64][]bool
}

func (t *levelZeroCompactionTask) applyBFInParallel(ctx context.Context, deleteDatas []*storage.DeleteData, pool *conc.Pool[any], segmentBfs []*metacache.SegmentInfo) *typeutil.ConcurrentMap[int, *BatchApplyRet] {
func (t *levelZeroCompactionTask) applyBFInParallel(ctx context.Context, deltaData *storage.DeleteData, pool *conc.Pool[any], segmentBfs []*metacache.SegmentInfo) *typeutil.ConcurrentMap[int, *BatchApplyRet] {
_, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact applyBFInParallel")
defer span.End()
batchSize := paramtable.Get().CommonCfg.BloomFilterApplyBatchSize.GetAsInt()
Expand All @@ -306,42 +301,37 @@ func (t *levelZeroCompactionTask) applyBFInParallel(ctx context.Context, deleteD
retIdx := 0
retMap := typeutil.NewConcurrentMap[int, *BatchApplyRet]()
var futures []*conc.Future[any]
for didx, data := range deleteDatas {
pks := data.Pks
for idx := 0; idx < len(pks); idx += batchSize {
startIdx := idx
endIdx := startIdx + batchSize
if endIdx > len(pks) {
endIdx = len(pks)
}
pks := deltaData.Pks
for idx := 0; idx < len(pks); idx += batchSize {
startIdx := idx
endIdx := startIdx + batchSize
if endIdx > len(pks) {
endIdx = len(pks)
}

retIdx += 1
tmpRetIndex := retIdx
deleteDataId := didx
future := pool.Submit(func() (any, error) {
ret := batchPredict(pks[startIdx:endIdx])
retMap.Insert(tmpRetIndex, &BatchApplyRet{
DeleteDataIdx: deleteDataId,
StartIdx: startIdx,
Segment2Hits: ret,
})
return nil, nil
retIdx += 1
tmpRetIndex := retIdx
future := pool.Submit(func() (any, error) {
ret := batchPredict(pks[startIdx:endIdx])
retMap.Insert(tmpRetIndex, &BatchApplyRet{
StartIdx: startIdx,
Segment2Hits: ret,
})
futures = append(futures, future)
}
return nil, nil
})
futures = append(futures, future)
}
conc.AwaitAll(futures...)

return retMap
}

func (t *levelZeroCompactionTask) process(ctx context.Context, batchSize int, targetSegments []int64, deltaLogs ...[]string) ([]*datapb.CompactionSegment, error) {
_, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact process")
ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact process")
defer span.End()

results := make([]*datapb.CompactionSegment, 0)
batch := int(math.Ceil(float64(len(targetSegments)) / float64(batchSize)))
log := log.Ctx(t.ctx).With(
log := log.Ctx(ctx).With(
zap.Int64("planID", t.plan.GetPlanID()),
zap.Int("max conc segment counts", batchSize),
zap.Int("total segment counts", len(targetSegments)),
Expand Down Expand Up @@ -369,33 +359,33 @@ func (t *levelZeroCompactionTask) process(ctx context.Context, batchSize int, ta
return nil, err
}

log.Info("L0 compaction finished one batch", zap.Int("batch no.", i), zap.Int("batch segment count", len(batchResults)))
log.Info("L0 compaction finished one batch",
zap.Int("batch no.", i),
zap.Int("total deltaRowCount", int(allDelta.RowCount)),
zap.Int("batch segment count", len(batchResults)))
results = append(results, batchResults...)
}

log.Info("L0 compaction process done")
return results, nil
}

func (t *levelZeroCompactionTask) loadDelta(ctx context.Context, deltaLogs ...[]string) ([]*storage.DeleteData, error) {
_, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact loadDelta")
func (t *levelZeroCompactionTask) loadDelta(ctx context.Context, deltaLogs []string) (*storage.DeleteData, error) {
ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact loadDelta")
defer span.End()
allData := make([]*storage.DeleteData, 0, len(deltaLogs))
for _, paths := range deltaLogs {
blobBytes, err := t.Download(ctx, paths)
if err != nil {
return nil, err
}
blobs := make([]*storage.Blob, 0, len(blobBytes))
for _, blob := range blobBytes {
blobs = append(blobs, &storage.Blob{Value: blob})
}
_, _, dData, err := storage.NewDeleteCodec().Deserialize(blobs)
if err != nil {
return nil, err
}

allData = append(allData, dData)
blobBytes, err := t.Download(ctx, deltaLogs)
if err != nil {
return nil, err
}
return allData, nil
blobs := make([]*storage.Blob, 0, len(blobBytes))
for _, blob := range blobBytes {
blobs = append(blobs, &storage.Blob{Value: blob})
}
_, _, dData, err := storage.NewDeleteCodec().Deserialize(blobs)
if err != nil {
return nil, err
}

return dData, nil
}
12 changes: 6 additions & 6 deletions internal/datanode/l0_compactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ func (s *LevelZeroCompactionTaskSuite) TestSplitDelta() {
s.mockMeta.EXPECT().Collection().Return(1)

targetSegIDs := predicted
deltaWriters := s.task.splitDelta(context.TODO(), []*storage.DeleteData{s.dData}, targetSegIDs)
deltaWriters := s.task.splitDelta(context.TODO(), s.dData, targetSegIDs)

s.NotEmpty(deltaWriters)
s.ElementsMatch(predicted, lo.Keys(deltaWriters))
Expand Down Expand Up @@ -449,16 +449,16 @@ func (s *LevelZeroCompactionTaskSuite) TestLoadDelta() {
}

for _, test := range tests {
dDatas, err := s.task.loadDelta(ctx, test.paths)
dData, err := s.task.loadDelta(ctx, test.paths)

if test.expectError {
s.Error(err)
} else {
s.NoError(err)
s.NotEmpty(dDatas)
s.EqualValues(1, len(dDatas))
s.ElementsMatch(s.dData.Pks, dDatas[0].Pks)
s.Equal(s.dData.RowCount, dDatas[0].RowCount)
s.NotEmpty(dData)
s.NotNil(dData)
s.ElementsMatch(s.dData.Pks, dData.Pks)
s.Equal(s.dData.RowCount, dData.RowCount)
}
}
}
Loading