pingcap · ti-chi-bot · Aug 3, 2022 · Aug 2, 2022 · Aug 3, 2022 · Aug 3, 2022
diff --git a/cdc/scheduler/internal/v3/coordinator.go b/cdc/scheduler/internal/v3/coordinator.go
@@ -239,6 +239,8 @@ func (c *coordinator) poll(
 	ctx context.Context, checkpointTs model.Ts, currentTables []model.TableID,
 	aliveCaptures map[model.CaptureID]*model.CaptureInfo,
 ) (newCheckpointTs, newResolvedTs model.Ts, err error) {
+	c.maybeCollectMetrics()
+
 	recvMsgs, err := c.recvMsgs(ctx)
 	if err != nil {
 		return checkpointCannotProceed, checkpointCannotProceed, errors.Trace(err)
@@ -250,10 +252,19 @@ func (c *coordinator) poll(
 	msgBuf = append(msgBuf, msgs...)
 	msgs = c.captureM.HandleAliveCaptureUpdate(aliveCaptures)
 	msgBuf = append(msgBuf, msgs...)
+
+	// Handle received messages to advance replication set.
+	msgs, err = c.replicationM.HandleMessage(recvMsgs)
+	if err != nil {
+		return checkpointCannotProceed, checkpointCannotProceed, errors.Trace(err)
+	}
+	msgBuf = append(msgBuf, msgs...)
+
 	if !c.captureM.CheckAllCaptureInitialized() {
-		// Skip handling messages and tasks for replication manager,
+		// Skip generating schedule tasks for replication manager,
 		// as not all capture are initialized.
-		return checkpointCannotProceed, checkpointCannotProceed, c.sendMsgs(ctx, msgBuf)
+		newCheckpointTs, newResolvedTs = c.replicationM.AdvanceCheckpoint(currentTables)
+		return newCheckpointTs, newResolvedTs, c.sendMsgs(ctx, msgBuf)
 	}
 
 	// Handle capture membership changes.
@@ -266,13 +277,6 @@ func (c *coordinator) poll(
 		msgBuf = append(msgBuf, msgs...)
 	}
 
-	// Handle received messages to advance replication set.
-	msgs, err = c.replicationM.HandleMessage(recvMsgs)
-	if err != nil {
-		return checkpointCannotProceed, checkpointCannotProceed, errors.Trace(err)
-	}
-	msgBuf = append(msgBuf, msgs...)
-
 	// Generate schedule tasks based on the current status.
 	replications := c.replicationM.ReplicationSets()
 	runningTasks := c.replicationM.RunningTasks()
@@ -292,8 +296,6 @@ func (c *coordinator) poll(
 		return checkpointCannotProceed, checkpointCannotProceed, errors.Trace(err)
 	}
 
-	c.maybeCollectMetrics()
-
 	// Checkpoint calculation
 	newCheckpointTs, newResolvedTs = c.replicationM.AdvanceCheckpoint(currentTables)
 	return newCheckpointTs, newResolvedTs, nil

diff --git a/cdc/scheduler/internal/v3/coordinator_test.go b/cdc/scheduler/internal/v3/coordinator_test.go
@@ -316,3 +316,96 @@ func TestCoordinatorDrainCapture(t *testing.T) {
 	require.NoError(t, err)
 	require.Equal(t, 1, count)
 }
+
+func TestCoordinatorAdvanceCheckpoint(t *testing.T) {
+	t.Parallel()
+
+	coord := newCoordinator("a", model.ChangeFeedID{}, 1, &config.SchedulerConfig{
+		HeartbeatTick:      math.MaxInt,
+		MaxTaskConcurrency: 1,
+	})
+	trans := transport.NewMockTrans()
+	coord.trans = trans
+
+	// Prepare captureM and replicationM.
+	// Two captures "a", "b".
+	// Three tables 1 2.
+	ctx := context.Background()
+	currentTables := []model.TableID{1, 2}
+	aliveCaptures := map[model.CaptureID]*model.CaptureInfo{"a": {}, "b": {}}
+	_, _, err := coord.poll(ctx, 0, currentTables, aliveCaptures)
+	require.Nil(t, err)
+
+	// Initialize captures.
+	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
+		Header: &schedulepb.Message_Header{
+			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
+		},
+		To:                "a",
+		From:              "b",
+		MsgType:           schedulepb.MsgHeartbeatResponse,
+		HeartbeatResponse: &schedulepb.HeartbeatResponse{},
+	})
+	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
+		Header: &schedulepb.Message_Header{
+			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
+		},
+		To:      "a",
+		From:    "a",
+		MsgType: schedulepb.MsgHeartbeatResponse,
+		HeartbeatResponse: &schedulepb.HeartbeatResponse{
+			Tables: []schedulepb.TableStatus{
+				{
+					TableID: 1, State: schedulepb.TableStateReplicating,
+					Checkpoint: schedulepb.Checkpoint{
+						CheckpointTs: 2, ResolvedTs: 4,
+					},
+				},
+				{
+					TableID: 2, State: schedulepb.TableStateReplicating,
+					Checkpoint: schedulepb.Checkpoint{
+						CheckpointTs: 2, ResolvedTs: 4,
+					},
+				},
+			},
+		},
+	})
+	cts, rts, err := coord.poll(ctx, 0, currentTables, aliveCaptures)
+	require.Nil(t, err)
+	require.True(t, coord.captureM.CheckAllCaptureInitialized())
+	require.EqualValues(t, 2, cts)
+	require.EqualValues(t, 4, rts)
+
+	// Checkpoint should be advanced even if there is an uninitialized capture.
+	aliveCaptures["c"] = &model.CaptureInfo{}
+	trans.RecvBuffer = nil
+	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
+		Header: &schedulepb.Message_Header{
+			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
+		},
+		To:      "a",
+		From:    "a",
+		MsgType: schedulepb.MsgHeartbeatResponse,
+		HeartbeatResponse: &schedulepb.HeartbeatResponse{
+			Tables: []schedulepb.TableStatus{
+				{
+					TableID: 1, State: schedulepb.TableStateReplicating,
+					Checkpoint: schedulepb.Checkpoint{
+						CheckpointTs: 3, ResolvedTs: 5,
+					},
+				},
+				{
+					TableID: 2, State: schedulepb.TableStateReplicating,
+					Checkpoint: schedulepb.Checkpoint{
+						CheckpointTs: 4, ResolvedTs: 5,
+					},
+				},
+			},
+		},
+	})
+	cts, rts, err = coord.poll(ctx, 0, currentTables, aliveCaptures)
+	require.Nil(t, err)
+	require.False(t, coord.captureM.CheckAllCaptureInitialized())
+	require.EqualValues(t, 3, cts)
+	require.EqualValues(t, 5, rts)
+}