Skip to content

Commit

Permalink
pd, openapi(ticdc): retry one reset by peer when kill the pd instance…
Browse files Browse the repository at this point in the history
…, fix healthy panic (#7069)

ref #4757, close #7067, close #7094
  • Loading branch information
3AceShowHand authored Sep 16, 2022
1 parent fbca9fc commit f6a7403
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 2 deletions.
6 changes: 5 additions & 1 deletion cdc/api/v1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -872,8 +872,12 @@ func (h *OpenAPI) ServerStatus(c *gin.Context) {
// @Failure 500 {object} model.HTTPError
// @Router /api/v1/health [get]
func (h *OpenAPI) Health(c *gin.Context) {
ctx := c.Request.Context()
if !h.capture.IsOwner() {
middleware.ForwardToOwnerMiddleware(h.capture)(c)
return
}

ctx := c.Request.Context()
health, err := h.statusProvider().IsHealthy(ctx)
if err != nil {
c.IndentedJSON(http.StatusInternalServerError, model.NewHTTPError(err))
Expand Down
3 changes: 2 additions & 1 deletion cdc/api/v1/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -896,9 +896,10 @@ func TestHealth(t *testing.T) {
// capture is owner
ctrl := gomock.NewController(t)
cp := mock_capture.NewMockCapture(ctrl)

api := testCase{url: "/api/v1/health", method: "GET"}
sp := mock_owner.NewMockStatusProvider(ctrl)
ownerRouter := newRouter(cp, sp)
api := testCase{url: "/api/v1/health", method: "GET"}

cp.EXPECT().IsReady().Return(true).AnyTimes()
cp.EXPECT().Info().DoAndReturn(func() (model.CaptureInfo, error) {
Expand Down
5 changes: 5 additions & 0 deletions pkg/errorutil/ignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,10 @@ func IsRetryableEtcdError(err error) bool {
if strings.Contains(etcdErr.Error(), "received prior goaway: code: NO_ERROR") {
return true
}

// this may happen if the PD instance shutdown by `kill -9`, no matter the instance is the leader or not.
if strings.Contains(etcdErr.Error(), "connection reset by peer") {
return true
}
return false
}
2 changes: 2 additions & 0 deletions pkg/errorutil/ignore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ func TestIsRetryableEtcdError(t *testing.T) {
{errors.New("rpc error: code = Unavailable desc = closing transport due to: " +
"connection error: desc = \\\"error reading from server: EOF\\\", " +
"received prior goaway: code: NO_ERROR\""), true},
{errors.New("rpc error: code = Unavailable desc = error reading from server: " +
"xxx: read: connection reset by peer"), true},
}

for _, item := range cases {
Expand Down

0 comments on commit f6a7403

Please sign in to comment.