-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
store/tikv: Continue GC when error happens during UnsafeDestroyRange #10743
Changes from 1 commit
533b63d
e16dd35
592b591
e424942
2f89696
eac96d0
718f3a5
45c97ef
26e9241
e7a2860
0e0a197
344d8ed
96d8608
1f8837f
8db81f6
a05a984
929b89d
8d49c0f
fa87e0c
4903c5c
c4c85d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -546,14 +546,22 @@ func (w *GCWorker) deleteRanges(ctx context.Context, safePoint uint64) error { | |
|
||
err = w.sendUnsafeDestroyRangeRequest(ctx, startKey, endKey) | ||
if err != nil { | ||
return errors.Trace(err) | ||
logutil.Logger(ctx).Info("[gc worker] delete range failed on range", | ||
zap.String("uuid", w.uuid), | ||
zap.Binary("startKey", startKey), | ||
zap.Binary("endKey", endKey), | ||
zap.Error(err)) | ||
} | ||
|
||
se := createSession(w.store) | ||
err = util.CompleteDeleteRange(se, r) | ||
se.Close() | ||
if err != nil { | ||
return errors.Trace(err) | ||
logutil.Logger(ctx).Info("[gc worker] failed to mark delete range task done", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to change the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea |
||
zap.String("uuid", w.uuid), | ||
zap.Binary("startKey", startKey), | ||
zap.Binary("endKey", endKey), | ||
zap.Error(err)) | ||
} | ||
} | ||
logutil.Logger(ctx).Info("[gc worker] finish delete ranges", | ||
|
@@ -587,14 +595,22 @@ func (w *GCWorker) redoDeleteRanges(ctx context.Context, safePoint uint64) error | |
|
||
err = w.sendUnsafeDestroyRangeRequest(ctx, startKey, endKey) | ||
if err != nil { | ||
return errors.Trace(err) | ||
logutil.Logger(ctx).Info("[gc worker] redo-delete range failed on range", | ||
zap.String("uuid", w.uuid), | ||
zap.Binary("startKey", startKey), | ||
zap.Binary("endKey", endKey), | ||
zap.Error(err)) | ||
} | ||
|
||
se := createSession(w.store) | ||
err := util.DeleteDoneRecord(se, r) | ||
se.Close() | ||
if err != nil { | ||
return errors.Trace(err) | ||
logutil.Logger(ctx).Info("[gc worker] failed to remove delete_range_done record", | ||
zap.String("uuid", w.uuid), | ||
zap.Binary("startKey", startKey), | ||
zap.Binary("endKey", endKey), | ||
zap.Error(err)) | ||
} | ||
} | ||
logutil.Logger(ctx).Info("[gc worker] finish redo-delete ranges", | ||
|
@@ -624,6 +640,7 @@ func (w *GCWorker) sendUnsafeDestroyRangeRequest(ctx context.Context, startKey [ | |
} | ||
|
||
var wg sync.WaitGroup | ||
errChan := make(chan error, len(stores)) | ||
|
||
for _, store := range stores { | ||
address := store.Address | ||
|
@@ -633,18 +650,29 @@ func (w *GCWorker) sendUnsafeDestroyRangeRequest(ctx context.Context, startKey [ | |
defer wg.Done() | ||
_, err1 := w.store.GetTiKVClient().SendRequest(ctx, address, req, tikv.UnsafeDestroyRangeTimeout) | ||
if err1 != nil { | ||
metrics.GCUnsafeDestroyRangeFailuresCounter.Inc() | ||
logutil.Logger(ctx).Error("[gc worker] destroy range on store failed with error", | ||
zap.String("uuid", w.uuid), | ||
zap.Uint64("storeID", storeID), | ||
zap.Error(err)) | ||
err = err1 | ||
} | ||
errChan <- err1 | ||
}() | ||
} | ||
|
||
for range stores { | ||
err1 := <-errChan | ||
if err1 != nil { | ||
err = err1 | ||
} | ||
} | ||
|
||
wg.Wait() | ||
|
||
return errors.Trace(err) | ||
if err != nil { | ||
return errors.New("[gc worker] destroy range failed on some stores") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefer to user There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These errors were already logged so I thought it's ok to drop them... And there may be multiple regions returned from above, so I think collect them into an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error log will be scattered throughout the file in the current implementation. I think we should collect all error messages in a GC job and log them after the job finishes. |
||
} | ||
return nil | ||
} | ||
|
||
func (w *GCWorker) getUpStores(ctx context.Context) ([]*metapb.Store, error) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to register it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done