@@ -66,18 +66,7 @@ export class WaitpointSystem {
66
66
isError : boolean ;
67
67
} ;
68
68
} ) : Promise < Waitpoint > {
69
- // 1. Find the TaskRuns blocked by this waitpoint
70
- const affectedTaskRuns = await this . $ . prisma . taskRunWaitpoint . findMany ( {
71
- where : { waitpointId : id } ,
72
- select : { taskRunId : true , spanIdToComplete : true , createdAt : true } ,
73
- } ) ;
74
-
75
- if ( affectedTaskRuns . length === 0 ) {
76
- this . $ . logger . debug ( `completeWaitpoint: No TaskRunWaitpoints found for waitpoint` , {
77
- waitpointId : id ,
78
- } ) ;
79
- }
80
-
69
+ // 1. Complete the Waitpoint (if not completed)
81
70
let [ waitpointError , waitpoint ] = await tryCatch (
82
71
this . $ . prisma . waitpoint . update ( {
83
72
where : { id, status : "PENDING" } ,
@@ -109,15 +98,44 @@ export class WaitpointSystem {
109
98
throw new Error ( `Waitpoint ${ id } not found` ) ;
110
99
}
111
100
112
- //schedule trying to continue the runs
101
+ if ( waitpoint . status !== "COMPLETED" ) {
102
+ this . $ . logger . error ( `completeWaitpoint: waitpoint is not completed` , {
103
+ waitpointId : id ,
104
+ } ) ;
105
+ throw new Error ( `Waitpoint ${ id } is not completed` ) ;
106
+ }
107
+
108
+ // 2. Find the TaskRuns blocked by this waitpoint
109
+ const affectedTaskRuns = await this . $ . prisma . taskRunWaitpoint . findMany ( {
110
+ where : { waitpointId : id } ,
111
+ select : { taskRunId : true , spanIdToComplete : true , createdAt : true } ,
112
+ } ) ;
113
+
114
+ if ( affectedTaskRuns . length === 0 ) {
115
+ this . $ . logger . debug ( `completeWaitpoint: no TaskRunWaitpoints found for waitpoint` , {
116
+ waitpointId : id ,
117
+ } ) ;
118
+ }
119
+
120
+ // 3. Schedule trying to continue the runs
113
121
for ( const run of affectedTaskRuns ) {
122
+ const jobId = `continueRunIfUnblocked:${ run . taskRunId } ` ;
123
+ //50ms in the future
124
+ const availableAt = new Date ( Date . now ( ) + 50 ) ;
125
+
126
+ this . $ . logger . debug ( `completeWaitpoint: enqueueing continueRunIfUnblocked` , {
127
+ waitpointId : id ,
128
+ runId : run . taskRunId ,
129
+ jobId,
130
+ availableAt,
131
+ } ) ;
132
+
114
133
await this . $ . worker . enqueue ( {
115
134
//this will debounce the call
116
- id : `continueRunIfUnblocked: ${ run . taskRunId } ` ,
135
+ id : jobId ,
117
136
job : "continueRunIfUnblocked" ,
118
137
payload : { runId : run . taskRunId } ,
119
- //50ms in the future
120
- availableAt : new Date ( Date . now ( ) + 50 ) ,
138
+ availableAt,
121
139
} ) ;
122
140
123
141
// emit an event to complete associated cached runs
@@ -469,6 +487,10 @@ export class WaitpointSystem {
469
487
}
470
488
471
489
public async continueRunIfUnblocked ( { runId } : { runId : string } ) {
490
+ this . $ . logger . debug ( `continueRunIfUnblocked: start` , {
491
+ runId,
492
+ } ) ;
493
+
472
494
// 1. Get the any blocking waitpoints
473
495
const blockingWaitpoints = await this . $ . prisma . taskRunWaitpoint . findMany ( {
474
496
where : { taskRunId : runId } ,
@@ -483,6 +505,10 @@ export class WaitpointSystem {
483
505
484
506
// 2. There are blockers still, so do nothing
485
507
if ( blockingWaitpoints . some ( ( w ) => w . waitpoint . status !== "COMPLETED" ) ) {
508
+ this . $ . logger . debug ( `continueRunIfUnblocked: blocking waitpoints still exist` , {
509
+ runId,
510
+ blockingWaitpoints,
511
+ } ) ;
486
512
return ;
487
513
}
488
514
@@ -505,15 +531,18 @@ export class WaitpointSystem {
505
531
} ) ;
506
532
507
533
if ( ! run ) {
508
- throw new Error ( `#continueRunIfUnblocked: run not found: ${ runId } ` ) ;
534
+ this . $ . logger . error ( `continueRunIfUnblocked: run not found` , {
535
+ runId,
536
+ } ) ;
537
+ throw new Error ( `continueRunIfUnblocked: run not found: ${ runId } ` ) ;
509
538
}
510
539
511
540
//4. Continue the run whether it's executing or not
512
541
await this . $ . runLock . lock ( "continueRunIfUnblocked" , [ runId ] , 5000 , async ( ) => {
513
542
const snapshot = await getLatestExecutionSnapshot ( this . $ . prisma , runId ) ;
514
543
515
544
if ( isFinishedOrPendingFinished ( snapshot . executionStatus ) ) {
516
- this . $ . logger . debug ( `# continueRunIfUnblocked: run is finished, skipping` , {
545
+ this . $ . logger . debug ( `continueRunIfUnblocked: run is finished, skipping` , {
517
546
runId,
518
547
snapshot,
519
548
} ) ;
@@ -555,6 +584,15 @@ export class WaitpointSystem {
555
584
556
585
await this . releaseConcurrencySystem . refillTokensForSnapshot ( snapshot ) ;
557
586
587
+ this . $ . logger . debug (
588
+ `continueRunIfUnblocked: run was still executing, sending notification` ,
589
+ {
590
+ runId,
591
+ snapshot,
592
+ newSnapshot,
593
+ }
594
+ ) ;
595
+
558
596
await sendNotificationToWorker ( {
559
597
runId,
560
598
snapshot : newSnapshot ,
@@ -563,7 +601,7 @@ export class WaitpointSystem {
563
601
} else {
564
602
// Because we cannot reacquire the concurrency, we need to enqueue the run again
565
603
// and because the run is still executing, we need to set the status to QUEUED_EXECUTING
566
- await this . enqueueSystem . enqueueRun ( {
604
+ const newSnapshot = await this . enqueueSystem . enqueueRun ( {
567
605
run,
568
606
env : run . runtimeEnvironment ,
569
607
snapshot : {
@@ -577,21 +615,27 @@ export class WaitpointSystem {
577
615
index : b . batchIndex ?? undefined ,
578
616
} ) ) ,
579
617
} ) ;
618
+
619
+ this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED_EXECUTING` , {
620
+ runId,
621
+ snapshot,
622
+ newSnapshot,
623
+ } ) ;
580
624
}
581
625
} else {
582
626
if ( snapshot . executionStatus !== "RUN_CREATED" && ! snapshot . checkpointId ) {
583
627
// TODO: We're screwed, should probably fail the run immediately
584
- this . $ . logger . error ( `# continueRunIfUnblocked: run has no checkpoint` , {
628
+ this . $ . logger . error ( `continueRunIfUnblocked: run has no checkpoint` , {
585
629
runId : run . id ,
586
630
snapshot,
587
631
blockingWaitpoints,
588
632
} ) ;
589
- throw new Error ( `# continueRunIfUnblocked: run has no checkpoint: ${ run . id } ` ) ;
633
+ throw new Error ( `continueRunIfUnblocked: run has no checkpoint: ${ run . id } ` ) ;
590
634
}
591
635
592
636
//put it back in the queue, with the original timestamp (w/ priority)
593
637
//this prioritizes dequeuing waiting runs over new runs
594
- await this . enqueueSystem . enqueueRun ( {
638
+ const newSnapshot = await this . enqueueSystem . enqueueRun ( {
595
639
run,
596
640
env : run . runtimeEnvironment ,
597
641
snapshot : {
@@ -604,6 +648,12 @@ export class WaitpointSystem {
604
648
} ) ) ,
605
649
checkpointId : snapshot . checkpointId ?? undefined ,
606
650
} ) ;
651
+
652
+ this . $ . logger . debug ( `continueRunIfUnblocked: run goes to QUEUED` , {
653
+ runId,
654
+ snapshot,
655
+ newSnapshot,
656
+ } ) ;
607
657
}
608
658
} ) ;
609
659
@@ -613,6 +663,10 @@ export class WaitpointSystem {
613
663
taskRunId : runId ,
614
664
} ,
615
665
} ) ;
666
+
667
+ this . $ . logger . debug ( `continueRunIfUnblocked: removed blocking waitpoints` , {
668
+ runId,
669
+ } ) ;
616
670
}
617
671
618
672
public async createRunAssociatedWaitpoint (
0 commit comments