Skip to content

Commit 367d7bd

Browse files
committed
Add more metadata to checkpoint creation logs
1 parent d998a20 commit 367d7bd

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

apps/coordinator/src/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,7 @@ class TaskCoordinator {
906906

907907
const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", {
908908
version: "v1",
909+
runId: socket.data.runId,
909910
attemptFriendlyId: message.attemptFriendlyId,
910911
docker: checkpoint.docker,
911912
location: checkpoint.location,
@@ -986,6 +987,7 @@ class TaskCoordinator {
986987

987988
const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", {
988989
version: "v1",
990+
runId: socket.data.runId,
989991
attemptFriendlyId: message.attemptFriendlyId,
990992
docker: checkpoint.docker,
991993
location: checkpoint.location,
@@ -1066,6 +1068,7 @@ class TaskCoordinator {
10661068

10671069
const ack = await this.#platformSocket?.sendWithAck("CHECKPOINT_CREATED", {
10681070
version: "v1",
1071+
runId: socket.data.runId,
10691072
attemptFriendlyId: message.attemptFriendlyId,
10701073
docker: checkpoint.docker,
10711074
location: checkpoint.location,

apps/webapp/app/v3/services/createCheckpoint.server.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ export class CreateCheckpointService extends BaseService {
5050
});
5151

5252
if (!attempt) {
53-
logger.error("Attempt not found", { attemptFriendlyId: params.attemptFriendlyId });
53+
logger.error("Attempt not found", params);
5454

5555
return {
5656
success: false,
@@ -70,6 +70,7 @@ export class CreateCheckpointService extends BaseService {
7070
id: attempt.taskRunId,
7171
status: attempt.taskRun.status,
7272
},
73+
params,
7374
});
7475

7576
return {
@@ -84,6 +85,7 @@ export class CreateCheckpointService extends BaseService {
8485
logger.error("Missing deployment or image ref", {
8586
attemptId: attempt.id,
8687
workerId: attempt.backgroundWorker.id,
88+
params,
8789
});
8890

8991
return {
@@ -170,6 +172,7 @@ export class CreateCheckpointService extends BaseService {
170172
taskRunId: attempt.taskRunId,
171173
type: "WAIT_FOR_TASK",
172174
reason,
175+
params,
173176
});
174177
await marqs?.cancelHeartbeat(attempt.taskRunId);
175178

@@ -182,6 +185,7 @@ export class CreateCheckpointService extends BaseService {
182185
if (!childRun) {
183186
logger.error("CreateCheckpointService: WAIT_FOR_TASK child run not found", {
184187
friendlyId: reason.friendlyId,
188+
params,
185189
});
186190

187191
return {
@@ -201,13 +205,15 @@ export class CreateCheckpointService extends BaseService {
201205
childRun,
202206
attempt,
203207
checkpointEvent,
208+
params,
204209
});
205210
} else {
206211
logger.error("CreateCheckpointService: Failed to resume dependent parents", {
207212
result,
208213
childRun,
209214
attempt,
210215
checkpointEvent,
216+
params,
211217
});
212218
}
213219

@@ -233,6 +239,7 @@ export class CreateCheckpointService extends BaseService {
233239
attemptId: attempt.id,
234240
taskRunId: attempt.taskRunId,
235241
type: "WAIT_FOR_BATCH",
242+
params,
236243
});
237244
await marqs?.cancelHeartbeat(attempt.taskRunId);
238245

@@ -248,6 +255,7 @@ export class CreateCheckpointService extends BaseService {
248255
if (!batchRun) {
249256
logger.error("CreateCheckpointService: Batch not found", {
250257
friendlyId: reason.batchFriendlyId,
258+
params,
251259
});
252260

253261
return {
@@ -297,6 +305,7 @@ export class CreateCheckpointService extends BaseService {
297305
logger.error("No checkpoint event", {
298306
attemptId: attempt.id,
299307
checkpointId: checkpoint.id,
308+
params,
300309
});
301310
await marqs?.acknowledgeMessage(attempt.taskRunId);
302311

packages/core/src/v3/schemas/messages.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,7 @@ export const CoordinatorToPlatformMessages = {
453453
CHECKPOINT_CREATED: {
454454
message: z.object({
455455
version: z.literal("v1").default("v1"),
456+
runId: z.string().optional(),
456457
attemptFriendlyId: z.string(),
457458
docker: z.boolean(),
458459
location: z.string(),

0 commit comments

Comments
 (0)