Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
009753f
fix(copilot): prevent background agent stalls and context hallucination
majdyz Feb 19, 2026
fed645c
Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into fi…
majdyz Feb 19, 2026
23225aa
fix(copilot): address review comments — slot counting, heartbeat, err…
majdyz Feb 19, 2026
a0a040f
fix(copilot): sandbox kill, tool event logging, and background task UX
majdyz Feb 19, 2026
c3e94f7
fix(copilot): address review comments — counter order, error markers,…
majdyz Feb 19, 2026
240e403
fix(copilot): fix transcript validation and resume test resilience
majdyz Feb 19, 2026
7ee870e
fix(copilot): catch OSError in sandbox killpg to prevent zombie proce…
majdyz Feb 19, 2026
b1c5000
fix(copilot): improve tool flush logging, transcript capture, and sta…
majdyz Feb 20, 2026
e476185
fix(copilot): mitigate SDK hook race condition and improve diagnostic…
majdyz Feb 20, 2026
78b52b9
fix(copilot): address PR review comments — runtime check, SDK version…
majdyz Feb 20, 2026
d81e7dd
Merge branch 'dev' into fix/messed-up-copilot
majdyz Feb 20, 2026
eb7bd6b
fix(copilot): unify context-building logic for resume and non-resume …
majdyz Feb 20, 2026
4be03fc
fix(copilot): remove redundant resolveInProgressTools frontend safety…
majdyz Feb 20, 2026
7acbbd0
poetry lock
majdyz Feb 20, 2026
1799559
poetry lock
majdyz Feb 20, 2026
372f9bf
fix(copilot): address review comments — SDK compat test, output_len, …
majdyz Feb 20, 2026
9161090
Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into fi…
majdyz Feb 20, 2026
d3299cf
fix(copilot): remove redundant resolveInProgressTools streaming→ready…
majdyz Feb 20, 2026
3491365
poetry lock
majdyz Feb 20, 2026
3a38b5e
fix(copilot): address review comments — wait_for_stash fast path, err…
majdyz Feb 20, 2026
a408b45
fix(copilot): don't flush parallel tool calls prematurely
majdyz Feb 20, 2026
8c2363e
fix(copilot): add safety-net flush and diagnostic logging for paralle…
majdyz Feb 20, 2026
d937c68
fix(copilot): handle stream ending without text + PostToolUse logging
majdyz Feb 20, 2026
e18b3c5
Merge branch 'dev' into fix/messed-up-copilot
majdyz Feb 20, 2026
e1e3b60
poetry lock
majdyz Feb 20, 2026
37355f7
fix(copilot): non-cancelling heartbeat, incremental saves, frontend r…
majdyz Feb 20, 2026
9106f0b
poetry lock
majdyz Feb 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 41 additions & 16 deletions autogpt_platform/backend/backend/copilot/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,13 +432,23 @@ async def _get_session_from_db(session_id: str) -> ChatSession | None:
return session


async def upsert_chat_session(session: ChatSession) -> ChatSession:
async def upsert_chat_session(
session: ChatSession,
*,
existing_message_count: int | None = None,
) -> ChatSession:
"""Update a chat session in both cache and database.

Uses session-level locking to prevent race conditions when concurrent
operations (e.g., background title update and main stream handler)
attempt to upsert the same session simultaneously.

Args:
existing_message_count: If provided, skip the DB query to count
existing messages. The caller is responsible for tracking this
accurately. Useful for incremental saves in a streaming loop
where the caller already knows how many messages are persisted.

Raises:
DatabaseError: If the database write fails. The cache is still updated
as a best-effort optimization, but the error is propagated to ensure
Expand All @@ -450,15 +460,20 @@ async def upsert_chat_session(session: ChatSession) -> ChatSession:

async with lock:
# Get existing message count from DB for incremental saves
existing_message_count = await chat_db().get_chat_session_message_count(
session.session_id
)
if existing_message_count is None:
existing_message_count = await chat_db().get_chat_session_message_count(
session.session_id
)

db_error: Exception | None = None

# Save to database (primary storage)
try:
await _save_session_to_db(session, existing_message_count)
await _save_session_to_db(
session,
existing_message_count,
skip_existence_check=existing_message_count > 0,
)
except Exception as e:
logger.error(
f"Failed to save session {session.session_id} to database: {e}"
Expand Down Expand Up @@ -489,21 +504,31 @@ async def upsert_chat_session(session: ChatSession) -> ChatSession:


async def _save_session_to_db(
session: ChatSession, existing_message_count: int
session: ChatSession,
existing_message_count: int,
*,
skip_existence_check: bool = False,
) -> None:
"""Save or update a chat session in the database."""
"""Save or update a chat session in the database.

Args:
skip_existence_check: When True, skip the ``get_chat_session`` query
and assume the session row already exists. Saves one DB round trip
for incremental saves during streaming.
"""
db = chat_db()

# Check if session exists in DB
existing = await db.get_chat_session(session.session_id)
if not skip_existence_check:
# Check if session exists in DB
existing = await db.get_chat_session(session.session_id)

if not existing:
# Create new session
await db.create_chat_session(
session_id=session.session_id,
user_id=session.user_id,
)
existing_message_count = 0
if not existing:
# Create new session
await db.create_chat_session(
session_id=session.session_id,
user_id=session.user_id,
)
existing_message_count = 0

# Calculate total tokens from usage
total_prompt = sum(u.prompt_tokens for u in session.usage)
Expand Down
82 changes: 69 additions & 13 deletions autogpt_platform/backend/backend/copilot/sdk/response_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ class SDKResponseAdapter:
text blocks, tool calls, and message lifecycle.
"""

def __init__(self, message_id: str | None = None):
def __init__(self, message_id: str | None = None, session_id: str | None = None):
self.message_id = message_id or str(uuid.uuid4())
self.session_id = session_id
self.text_block_id = str(uuid.uuid4())
self.has_started_text = False
self.has_ended_text = False
Expand All @@ -61,6 +62,11 @@ def set_task_id(self, task_id: str) -> None:
"""Set the task ID for reconnection support."""
self.task_id = task_id

@property
def has_unresolved_tool_calls(self) -> bool:
"""True when there are tool calls that haven't received output yet."""
return bool(self.current_tool_calls.keys() - self.resolved_tool_calls)

def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
"""Convert a single SDK message to Vercel AI SDK format."""
responses: list[StreamBaseResponse] = []
Expand All @@ -77,7 +83,12 @@ def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
elif isinstance(sdk_message, AssistantMessage):
# Flush any SDK built-in tool calls that didn't get a UserMessage
# result (e.g. WebSearch, Read handled internally by the CLI).
self._flush_unresolved_tool_calls(responses)
# BUT skip flush when this AssistantMessage is a parallel tool
# continuation (contains only ToolUseBlocks) — the prior tools
# are still executing concurrently and haven't finished yet.
is_tool_only = all(isinstance(b, ToolUseBlock) for b in sdk_message.content)
if not is_tool_only:
self._flush_unresolved_tool_calls(responses)

# After tool results, the SDK sends a new AssistantMessage for the
# next LLM turn. Open a new step if the previous one was closed.
Expand Down Expand Up @@ -118,8 +129,24 @@ def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
blocks = content if isinstance(content, list) else []
resolved_in_blocks: set[str] = set()

sid = (self.session_id or "?")[:12]
parent_id_preview = getattr(sdk_message, "parent_tool_use_id", None)
logger.info(
"[SDK] [%s] UserMessage: %d blocks, content_type=%s, "
"parent_tool_use_id=%s",
sid,
len(blocks),
type(content).__name__,
parent_id_preview[:12] if parent_id_preview else "None",
)

for block in blocks:
if isinstance(block, ToolResultBlock) and block.tool_use_id:
# Skip if already resolved (e.g. by flush) — the real
# result supersedes the empty flush, but re-emitting
# would confuse the frontend's state machine.
if block.tool_use_id in self.resolved_tool_calls:
continue
tool_info = self.current_tool_calls.get(block.tool_use_id, {})
tool_name = tool_info.get("name", "unknown")

Expand All @@ -144,7 +171,11 @@ def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
# Handle SDK built-in tool results carried via parent_tool_use_id
# instead of (or in addition to) ToolResultBlock content.
parent_id = sdk_message.parent_tool_use_id
if parent_id and parent_id not in resolved_in_blocks:
if (
parent_id
and parent_id not in resolved_in_blocks
and parent_id not in self.resolved_tool_calls
):
tool_info = self.current_tool_calls.get(parent_id, {})
tool_name = tool_info.get("name", "unknown")

Expand Down Expand Up @@ -228,11 +259,28 @@ def _flush_unresolved_tool_calls(self, responses: list[StreamBaseResponse]) -> N
output, which we pop and emit here before the next ``AssistantMessage``
starts.
"""
unresolved = [
(tid, info.get("name", "unknown"))
for tid, info in self.current_tool_calls.items()
if tid not in self.resolved_tool_calls
]
sid = (self.session_id or "?")[:12]
if not unresolved:
logger.info(
"[SDK] [%s] Flush called but all %d tool(s) already resolved",
sid,
len(self.current_tool_calls),
)
return
logger.info(
"[SDK] [%s] Flushing %d unresolved tool call(s): %s",
sid,
len(unresolved),
", ".join(f"{name}({tid[:12]})" for tid, name in unresolved),
)

flushed = False
for tool_id, tool_info in self.current_tool_calls.items():
if tool_id in self.resolved_tool_calls:
continue
tool_name = tool_info.get("name", "unknown")
for tool_id, tool_name in unresolved:
output = pop_pending_tool_output(tool_name)
if output is not None:
responses.append(
Expand All @@ -245,9 +293,12 @@ def _flush_unresolved_tool_calls(self, responses: list[StreamBaseResponse]) -> N
)
self.resolved_tool_calls.add(tool_id)
flushed = True
logger.debug(
f"Flushed pending output for built-in tool {tool_name} "
f"(call {tool_id})"
logger.info(
"[SDK] [%s] Flushed stashed output for %s " "(call %s, %d chars)",
sid,
tool_name,
tool_id[:12],
len(output),
)
else:
# No output available — emit an empty output so the frontend
Expand All @@ -263,9 +314,14 @@ def _flush_unresolved_tool_calls(self, responses: list[StreamBaseResponse]) -> N
)
self.resolved_tool_calls.add(tool_id)
flushed = True
logger.debug(
f"Flushed empty output for unresolved tool {tool_name} "
f"(call {tool_id})"
logger.warning(
"[SDK] [%s] Flushed EMPTY output for unresolved tool %s "
"(call %s) — stash was empty (likely SDK hook race "
"condition: PostToolUse hook hadn't completed before "
"flush was triggered)",
sid,
tool_name,
tool_id[:12],
)

if flushed and self.step_open:
Expand Down
Loading