Skip to content

Commit bf7a662

Browse files
committed
fix: prevent time-travel bug in parallel tool calling
## Problem When multiple tools are called in parallel (e.g., update_todo_list + new_task), tool_results could appear in API history BEFORE their corresponding tool_use blocks, causing Anthropic API 400 errors: "unexpected `tool_use_id` found in `tool_result` blocks" ## Root Cause Tools executed during streaming via presentAssistantMessage() BEFORE the assistant message was saved to history. When new_task triggered delegation and called flushPendingToolResultsToHistory(), it saved a user message (with tool_results) before the assistant message (with tool_uses) existed. ## Solution Added assistantMessageSavedToHistory coordination flag to ensure correct message ordering: 1. Flag is reset at the start of each API request 2. Flag is set to true after assistant message is saved to history 3. flushPendingToolResultsToHistory() waits for the flag before flushing 4. This ensures: [assistant with tool_uses] → [user with tool_results] Also removed the "CRITICAL: This tool MUST be called alone" restriction from new_task tool description to enable parallel tool calling. ## Changes - src/core/task/Task.ts: Added assistantMessageSavedToHistory flag and waiting logic in flushPendingToolResultsToHistory() - src/core/prompts/tools/native-tools/new_task.ts: Removed restriction preventing parallel tool calling - src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts: Added tests for waiting behavior ## Testing All 8 tests pass, including new tests verifying: - Skips waiting when flag is already true - Waits via pWaitFor when flag is false - Aborts without flushing when task is aborted during wait
1 parent e7965d9 commit bf7a662

File tree

2 files changed

+146
-1
lines changed

2 files changed

+146
-1
lines changed

src/core/task/Task.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,20 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
355355
userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolResultBlockParam)[] = []
356356
userMessageContentReady = false
357357

358+
/**
359+
* Flag indicating whether the assistant message for the current streaming session
360+
* has been saved to API conversation history.
361+
*
362+
* This is critical for parallel tool calling: tools should NOT execute until
363+
* the assistant message is saved. Otherwise, if a tool like `new_task` triggers
364+
* `flushPendingToolResultsToHistory()`, the user message with tool_results would
365+
* appear BEFORE the assistant message with tool_uses, causing API errors.
366+
*
367+
* Reset to `false` at the start of each API request.
368+
* Set to `true` after the assistant message is saved in `recursivelyMakeClineRequests`.
369+
*/
370+
assistantMessageSavedToHistory = false
371+
358372
/**
359373
* Push a tool_result block to userMessageContent, preventing duplicates.
360374
* Duplicate tool_use_ids cause API errors.
@@ -1063,6 +1077,36 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
10631077
return
10641078
}
10651079

1080+
// CRITICAL: Wait for the assistant message to be saved to API history first.
1081+
// Without this, tool_result blocks would appear BEFORE tool_use blocks in the
1082+
// conversation history, causing API errors like:
1083+
// "unexpected `tool_use_id` found in `tool_result` blocks"
1084+
//
1085+
// This can happen when parallel tools are called (e.g., update_todo_list + new_task).
1086+
// Tools execute during streaming via presentAssistantMessage, BEFORE the assistant
1087+
// message is saved. When new_task triggers delegation, it calls this method to
1088+
// flush pending results - but the assistant message hasn't been saved yet.
1089+
//
1090+
// The assistantMessageSavedToHistory flag is:
1091+
// - Reset to false at the start of each API request
1092+
// - Set to true after the assistant message is saved in recursivelyMakeClineRequests
1093+
if (!this.assistantMessageSavedToHistory) {
1094+
await pWaitFor(() => this.assistantMessageSavedToHistory || this.abort, {
1095+
interval: 50,
1096+
timeout: 30_000, // 30 second timeout as safety net
1097+
}).catch(() => {
1098+
// If timeout or abort, log and proceed anyway to avoid hanging
1099+
console.warn(
1100+
`[Task#${this.taskId}] flushPendingToolResultsToHistory: timed out waiting for assistant message to be saved`,
1101+
)
1102+
})
1103+
}
1104+
1105+
// If task was aborted while waiting, don't flush
1106+
if (this.abort) {
1107+
return
1108+
}
1109+
10661110
// Save the user message with tool_result blocks
10671111
const userMessage: Anthropic.MessageParam = {
10681112
role: "user",
@@ -2707,6 +2751,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
27072751
this.userMessageContentReady = false
27082752
this.didRejectTool = false
27092753
this.didAlreadyUseTool = false
2754+
this.assistantMessageSavedToHistory = false
27102755
// Reset tool failure flag for each new assistant turn - this ensures that tool failures
27112756
// only prevent attempt_completion within the same assistant message, not across turns
27122757
// (e.g., if a tool fails, then user sends a message saying "just complete anyway")
@@ -3488,6 +3533,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
34883533
{ role: "assistant", content: assistantContent },
34893534
reasoningMessage || undefined,
34903535
)
3536+
this.assistantMessageSavedToHistory = true
34913537

34923538
TelemetryService.instance.captureConversationMessage(this.taskId, "assistant")
34933539
}

src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ vi.mock("fs/promises", async (importOriginal) => {
3838
}
3939
})
4040

41+
const { mockPWaitFor } = vi.hoisted(() => {
42+
return { mockPWaitFor: vi.fn().mockImplementation(async () => Promise.resolve()) }
43+
})
44+
4145
vi.mock("p-wait-for", () => ({
42-
default: vi.fn().mockImplementation(async () => Promise.resolve()),
46+
default: mockPWaitFor,
4347
}))
4448

4549
vi.mock("vscode", () => {
@@ -344,4 +348,99 @@ describe("flushPendingToolResultsToHistory", () => {
344348
expect((task.apiConversationHistory[0] as any).ts).toBeGreaterThanOrEqual(beforeTs)
345349
expect((task.apiConversationHistory[0] as any).ts).toBeLessThanOrEqual(afterTs)
346350
})
351+
352+
it("should skip waiting for assistantMessageSavedToHistory when flag is already true", async () => {
353+
const task = new Task({
354+
provider: mockProvider,
355+
apiConfiguration: mockApiConfig,
356+
task: "test task",
357+
startTask: false,
358+
})
359+
360+
// Set flag to true (assistant message already saved)
361+
task.assistantMessageSavedToHistory = true
362+
363+
// Set up pending tool result
364+
task.userMessageContent = [
365+
{
366+
type: "tool_result",
367+
tool_use_id: "tool-skip-wait",
368+
content: "Result when flag is true",
369+
},
370+
]
371+
372+
// Clear mock call history
373+
mockPWaitFor.mockClear()
374+
375+
await task.flushPendingToolResultsToHistory()
376+
377+
// Should not have called pWaitFor since flag was already true
378+
expect(mockPWaitFor).not.toHaveBeenCalled()
379+
380+
// Should still save the message
381+
expect(task.apiConversationHistory.length).toBe(1)
382+
expect((task.apiConversationHistory[0].content as any[])[0].tool_use_id).toBe("tool-skip-wait")
383+
})
384+
385+
it("should wait for assistantMessageSavedToHistory when flag is false", async () => {
386+
const task = new Task({
387+
provider: mockProvider,
388+
apiConfiguration: mockApiConfig,
389+
task: "test task",
390+
startTask: false,
391+
})
392+
393+
// Flag is false by default - assistant message not yet saved
394+
expect(task.assistantMessageSavedToHistory).toBe(false)
395+
396+
// Set up pending tool result
397+
task.userMessageContent = [
398+
{
399+
type: "tool_result",
400+
tool_use_id: "tool-wait",
401+
content: "Result when flag is false",
402+
},
403+
]
404+
405+
// Clear mock call history
406+
mockPWaitFor.mockClear()
407+
408+
await task.flushPendingToolResultsToHistory()
409+
410+
// Should have called pWaitFor since flag was false
411+
expect(mockPWaitFor).toHaveBeenCalled()
412+
413+
// Should still save the message (mock resolves immediately)
414+
expect(task.apiConversationHistory.length).toBe(1)
415+
})
416+
417+
it("should not flush when task is aborted during wait", async () => {
418+
const task = new Task({
419+
provider: mockProvider,
420+
apiConfiguration: mockApiConfig,
421+
task: "test task",
422+
startTask: false,
423+
})
424+
425+
// Flag is false - will need to wait
426+
task.assistantMessageSavedToHistory = false
427+
428+
// Set up pending tool result
429+
task.userMessageContent = [
430+
{
431+
type: "tool_result",
432+
tool_use_id: "tool-aborted",
433+
content: "Should not be saved",
434+
},
435+
]
436+
437+
// Set abort flag - this will cause the condition in pWaitFor to return true
438+
// AND will cause early return after the wait
439+
task.abort = true
440+
441+
await task.flushPendingToolResultsToHistory()
442+
443+
// Should not have saved anything since task was aborted
444+
expect(task.apiConversationHistory.length).toBe(0)
445+
})
347446
})

0 commit comments

Comments
 (0)