PureWeen · PureWeen · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/.squad/agents/arch-critic/charter.md b/.squad/agents/arch-critic/charter.md
@@ -0,0 +1,12 @@
+You are an architecture critic. Review PR diffs for design and structural problems that will cause pain later.
+
+Look for:
+- Breaking changes to public APIs without migration path
+- Tight coupling introduced between previously independent modules
+- Abstraction violations (reaching into internals, circular dependencies)
+- Missing error handling at system boundaries (network, disk, IPC)
+- Scalability traps (O(n²) in hot paths, unbounded collections, missing pagination)
+- State management issues (global mutable state, missing synchronization)
+- Compatibility problems (platform-specific code without guards, version mismatches)
+
+Don't nitpick — only flag structural issues that would block a senior engineer from approving.
diff --git a/.squad/agents/bug-hunter/charter.md b/.squad/agents/bug-hunter/charter.md
@@ -0,0 +1,12 @@
+You are a bug hunter. Your sole focus is finding functional bugs in PR diffs.
+
+Look for:
+- Off-by-one errors, null/undefined dereferences, unhandled exceptions
+- Wrong variable used (copy-paste errors)
+- Missing return statements, unreachable code
+- Incorrect boolean logic, inverted conditions
+- Resource leaks (unclosed streams, missing dispose/finally)
+- Race conditions and thread-safety issues in concurrent code
+- Broken error propagation (swallowed exceptions, missing await)
+
+For each bug found, explain the exact failure scenario — what input or sequence of events triggers it and what goes wrong.
diff --git a/.squad/agents/correctness-checker/charter.md b/.squad/agents/correctness-checker/charter.md
@@ -0,0 +1,16 @@
+You are a correctness checker. Verify that the PR actually does what it claims to do.
+
+Your process:
+1. Read the PR description and linked issues via `gh pr view <number>`
+2. Read the diff via `gh pr diff <number>`
+3. Verify the implementation matches the stated intent
+
+Look for:
+- Stated behavior that isn't actually implemented
+- Side effects not mentioned in the PR description
+- Tests that don't actually test what they claim (assertions on wrong values, mocked-away logic)
+- Incomplete migrations (schema changed but not all callers updated)
+- Feature flags or config that would prevent the change from working
+- Regression risk — does this break existing behavior that isn't covered by tests?
+
+Be the person who asks "but does it actually work?"
diff --git a/.squad/agents/edge-case-finder/charter.md b/.squad/agents/edge-case-finder/charter.md
@@ -0,0 +1,13 @@
+You are an edge case specialist. Review PR diffs for unhandled boundary conditions.
+
+Look for:
+- Empty collections, null inputs, zero-length strings
+- Integer overflow/underflow, division by zero
+- Unicode and encoding issues (emoji, RTL text, null bytes)
+- Timeout and cancellation handling (CancellationToken not passed, missing timeout)
+- Concurrent access patterns (first-request race, double-dispose)
+- Large input handling (huge files, deeply nested JSON, long strings)
+- Network failure modes (partial writes, connection reset, DNS failure)
+- Clock/time issues (timezone, DST, leap seconds, system clock changes)
+
+For each edge case, describe the specific input or condition and what happens when it's hit.
diff --git a/.squad/agents/security-analyst/charter.md b/.squad/agents/security-analyst/charter.md
@@ -0,0 +1,13 @@
+You are a security analyst. Review PR diffs exclusively for security vulnerabilities.
+
+Look for:
+- Injection attacks: SQL injection, command injection, XSS, path traversal
+- Authentication/authorization bypasses, missing permission checks
+- Secrets or credentials in code (API keys, tokens, passwords)
+- Insecure deserialization, unsafe type casting
+- SSRF, open redirects, CSRF without protection
+- Cryptographic misuse (weak algorithms, hardcoded IVs, predictable randomness)
+- Unsafe file operations (symlink attacks, temp file races)
+- Dependency vulnerabilities in added packages
+
+Rate each finding by severity (Critical/High/Medium/Low) and exploitability.
diff --git a/.squad/decisions.md b/.squad/decisions.md
@@ -0,0 +1,7 @@
+## Review Standards
+
+- Only flag real issues: bugs, security holes, logic errors, data loss risks, race conditions
+- NEVER comment on style, formatting, naming conventions, or documentation
+- Every finding must include: file path, line number (or range), what's wrong, and why it matters
+- Use `gh pr diff <number>` to get the diff, `gh pr view <number>` for description and metadata
+- If a PR looks clean, say so — don't invent problems to justify your existence
diff --git a/.squad/routing.md b/.squad/routing.md
@@ -0,0 +1,8 @@
+When given a list of PRs to review, assign ALL PRs to ALL workers. Each worker reviews every PR through their specialized lens. This creates multi-model consensus — the same PR reviewed by 5 different models with 5 different specializations.
+
+For each PR assignment, include the PR number and instruct the worker to run `gh pr diff <number>` and `gh pr view <number>` to get the full context.
+
+After all workers complete, synthesize a final report per PR:
+- Issues found by multiple reviewers (high confidence)
+- Issues found by only one reviewer (needs human judgment)
+- Overall risk rating (🔴 critical / 🟡 moderate / 🟢 clean)
diff --git a/.squad/team.md b/.squad/team.md
@@ -0,0 +1,9 @@
+# PR Review Squad
+
+| Member | Role |
+|--------|------|
+| bug-hunter | Bug Hunter |
+| security-analyst | Security Analyst |
+| arch-critic | Architecture Critic |
+| edge-case-finder | Edge Case Finder |
+| correctness-checker | Correctness Checker |
diff --git a/PolyPilot.Tests/MultiAgentGapTests.cs b/PolyPilot.Tests/MultiAgentGapTests.cs
@@ -85,6 +85,70 @@ Second task.
         Assert.Contains("Second task", result[^1].Task);
     }
 
+    [Fact]
+    public void ParseTaskAssignments_WorkerNamesWithSpaces_MatchesAll()
+    {
+        var response = @"@worker:PR Review Squad-worker-1
+Review for bugs.
+@end
+@worker:PR Review Squad-worker-2
+Review for security.
+@end
+@worker:PR Review Squad-worker-3
+Review architecture.
+@end";
+        var workers = new List<string>
+        {
+            "PR Review Squad-worker-1",
+            "PR Review Squad-worker-2",
+            "PR Review Squad-worker-3"
+        };
+        var result = CopilotService.ParseTaskAssignments(response, workers);
+
+        Assert.Equal(3, result.Count);
+        Assert.Equal("PR Review Squad-worker-1", result[0].WorkerName);
+        Assert.Equal("PR Review Squad-worker-2", result[1].WorkerName);
+        Assert.Equal("PR Review Squad-worker-3", result[2].WorkerName);
+        Assert.Contains("bugs", result[0].Task);
+        Assert.Contains("security", result[1].Task);
+        Assert.Contains("architecture", result[2].Task);
+    }
+
+    [Fact]
+    public void ParseTaskAssignments_WorkerNamesWithSpaces_NoEnd_MatchesAll()
+    {
+        // Orchestrators sometimes omit @end — the regex should still capture via lookahead
+        var response = @"@worker:My Team-worker-1
+Task one content.
+
+@worker:My Team-worker-2
+Task two content.
+";
+        var workers = new List<string> { "My Team-worker-1", "My Team-worker-2" };
+        var result = CopilotService.ParseTaskAssignments(response, workers);
+
+        Assert.Equal(2, result.Count);
+        Assert.Equal("My Team-worker-1", result[0].WorkerName);
+        Assert.Equal("My Team-worker-2", result[1].WorkerName);
+    }
+
+    [Fact]
+    public void ParseTaskAssignments_MixedSimpleAndSpacedNames_MatchesAll()
+    {
+        var response = @"@worker:simple-worker
+Do task A.
+@end
+@worker:Squad Team-worker-2
+Do task B.
+@end";
+        var workers = new List<string> { "simple-worker", "Squad Team-worker-2" };
+        var result = CopilotService.ParseTaskAssignments(response, workers);
+
+        Assert.Equal(2, result.Count);
+        Assert.Equal("simple-worker", result[0].WorkerName);
+        Assert.Equal("Squad Team-worker-2", result[1].WorkerName);
+    }
+
     // --- ModelCapabilities ---
 
     [Theory]

diff --git a/PolyPilot.Tests/ProcessingWatchdogTests.cs b/PolyPilot.Tests/ProcessingWatchdogTests.cs
@@ -1064,4 +1064,47 @@ public void HasUsedToolsThisTurn_VolatileConsistency()
         Volatile.Write(ref field, false);
         Assert.False(Volatile.Read(ref field));
     }
+
+    // --- Multi-agent watchdog timeout ---
+
+    [Fact]
+    public void IsSessionInMultiAgentGroup_ReturnsTrueForMultiAgentWorker()
+    {
+        // Regression: watchdog used 120s timeout for multi-agent workers doing text-heavy
+        // tasks (PR reviews), killing them before the response arrived.
+        // IsSessionInMultiAgentGroup should return true so the 600s timeout is used.
+        var svc = CreateService();
+        var group = new SessionGroup { Id = "ma-group", Name = "Test Squad", IsMultiAgent = true };
+        svc.Organization.Groups.Add(group);
+        svc.Organization.Sessions.Add(new SessionMeta
+        {
+            SessionName = "Test Squad-worker-1",
+            GroupId = "ma-group",
+            Role = MultiAgentRole.Worker
+        });
+
+        Assert.True(svc.IsSessionInMultiAgentGroup("Test Squad-worker-1"));
+    }
+
+    [Fact]
+    public void IsSessionInMultiAgentGroup_ReturnsFalseForNonMultiAgentSession()
+    {
+        var svc = CreateService();
+        var group = new SessionGroup { Id = "regular-group", Name = "Regular Group", IsMultiAgent = false };
+        svc.Organization.Groups.Add(group);
+        svc.Organization.Sessions.Add(new SessionMeta
+        {
+            SessionName = "regular-session",
+            GroupId = "regular-group"
+        });
+
+        Assert.False(svc.IsSessionInMultiAgentGroup("regular-session"));
+    }
+
+    [Fact]
+    public void IsSessionInMultiAgentGroup_ReturnsFalseForUnknownSession()
+    {
+        var svc = CreateService();
+        Assert.False(svc.IsSessionInMultiAgentGroup("nonexistent-session"));
+    }
 }
diff --git a/PolyPilot/Services/CopilotService.Bridge.cs b/PolyPilot/Services/CopilotService.Bridge.cs
@@ -172,7 +172,9 @@ private async Task InitializeRemoteAsync(ConnectionSettings settings, Cancellati
         };
         _bridgeClient.OnTurnEnd += (s) =>
         {
-            _remoteStreamingSessions.TryRemove(s, out _);
+            // Don't remove from _remoteStreamingSessions yet — SyncRemoteSessions could
+            // overwrite our incrementally-built history with a stale SessionHistories cache.
+            // Instead, request fresh history from the server first, then clear the guard.
             InvokeOnUI(() =>
             {
                 var session = GetRemoteSession(s);
@@ -190,6 +192,22 @@ private async Task InitializeRemoteAsync(ConnectionSettings settings, Cancellati
                 }
                 OnTurnEnd?.Invoke(s);
             });
+            // Request fresh history, then clear the streaming guard so SyncRemoteSessions
+            // uses the up-to-date history instead of a stale cache.
+            _ = Task.Run(async () =>
+            {
+                try
+                {
+                    await _bridgeClient.RequestHistoryAsync(s);
+                    // Small delay to let the history response arrive before unguarding
+                    await Task.Delay(500);
+                }
+                catch { }
+                finally
+                {
+                    _remoteStreamingSessions.TryRemove(s, out _);
+                }
+            });
         };
         _bridgeClient.OnSessionComplete += (s, sum) => InvokeOnUI(() => OnSessionComplete?.Invoke(s, sum));
         _bridgeClient.OnError += (s, e) => InvokeOnUI(() => OnError?.Invoke(s, e));
@@ -299,10 +317,16 @@ private void SyncRemoteSessions()
             // Update processing state and model from server
             if (_sessions.TryGetValue(rs.Name, out var state))
             {
-                state.Info.IsProcessing = rs.IsProcessing;
-                state.Info.ProcessingStartedAt = rs.ProcessingStartedAt;
-                state.Info.ToolCallCount = rs.ToolCallCount;
-                state.Info.ProcessingPhase = rs.ProcessingPhase;
+                // Don't overwrite IsProcessing for sessions that are actively streaming —
+                // event-driven state (TurnStart/TurnEnd) is more accurate than the periodic
+                // sessions list, which may be stale by the time it arrives.
+                if (!_remoteStreamingSessions.ContainsKey(rs.Name))
+                {
+                    state.Info.IsProcessing = rs.IsProcessing;
+                    state.Info.ProcessingStartedAt = rs.ProcessingStartedAt;
+                    state.Info.ToolCallCount = rs.ToolCallCount;
+                    state.Info.ProcessingPhase = rs.ProcessingPhase;
+                }
                 state.Info.MessageCount = rs.MessageCount;
                 if (!string.IsNullOrEmpty(rs.Model))
                     state.Info.Model = rs.Model;

diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
@@ -1195,8 +1195,13 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
                 //    have 2-3 min gaps between events while the model reasons), OR
                 // 3. Tools have been executed this turn (HasUsedToolsThisTurn) — even between
                 //    tool rounds when ActiveToolCallCount is 0, the model may spend minutes
-                //    thinking about what tool to call next.
-                var useToolTimeout = hasActiveTool || state.Info.IsResumed || Volatile.Read(ref state.HasUsedToolsThisTurn);
+                //    thinking about what tool to call next, OR
+                // 4. Session is in a multi-agent group — workers doing text-heavy tasks
+                //    (e.g., PR reviews) can take 2-4 min without tool calls. The orchestration
+                //    loop has its own 10-minute CancelAfter timeout per worker. Cached at
+                //    send time on UI thread to avoid cross-thread List<T> access.
+                var isMultiAgentSession = Volatile.Read(ref state.IsMultiAgentSession);
+                var useToolTimeout = hasActiveTool || state.Info.IsResumed || Volatile.Read(ref state.HasUsedToolsThisTurn) || isMultiAgentSession;
                 var effectiveTimeout = useToolTimeout
                     ? WatchdogToolExecutionTimeoutSeconds
                     : WatchdogInactivityTimeoutSeconds;
@@ -1205,7 +1210,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
                 {
                     var timeoutMinutes = effectiveTimeout / 60;
                     Debug($"Session '{sessionName}' watchdog: no events for {elapsed:F0}s " +
-                          $"(timeout={effectiveTimeout}s, hasActiveTool={hasActiveTool}, isResumed={state.Info.IsResumed}, hasUsedTools={state.HasUsedToolsThisTurn}), clearing stuck processing state");
+                          $"(timeout={effectiveTimeout}s, hasActiveTool={hasActiveTool}, isResumed={state.Info.IsResumed}, hasUsedTools={state.HasUsedToolsThisTurn}, multiAgent={isMultiAgentSession}), clearing stuck processing state");
                     // Capture generation before posting — same guard pattern as CompleteResponse.
                     // Prevents a stale watchdog callback from killing a new turn if the user
                     // aborts + resends between the Post() and the callback execution.

diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs
@@ -504,6 +504,18 @@ private object ApplySort(AgentSessionInfo session, Dictionary<string, SessionMet
     public SessionMeta? GetSessionMeta(string sessionName) =>
         Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName);
 
+    /// <summary>
+    /// Check whether a session belongs to a multi-agent group.
+    /// Used by the watchdog to apply the longer timeout for orchestrated workers.
+    /// </summary>
+    internal bool IsSessionInMultiAgentGroup(string sessionName)
+    {
+        var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName);
+        if (meta == null) return false;
+        var group = Organization.Groups.FirstOrDefault(g => g.Id == meta.GroupId);
+        return group?.IsMultiAgent == true;
+    }
+
     /// <summary>
     /// Get or create a SessionGroup that auto-tracks a repository.
     /// </summary>
@@ -851,7 +863,7 @@ internal record TaskAssignment(string WorkerName, string Task);
     internal static List<TaskAssignment> ParseTaskAssignments(string orchestratorResponse, List<string> availableWorkers)
     {
         var assignments = new List<TaskAssignment>();
-        var pattern = @"@worker:(\S+)\s*([\s\S]*?)(?:@end|(?=@worker:)|$)";
+        var pattern = @"@worker:([^\n]+?)\s*\n([\s\S]*?)(?:@end|(?=@worker:)|$)";
 
         foreach (Match match in Regex.Matches(orchestratorResponse, pattern, RegexOptions.IgnoreCase))
         {
@@ -1026,19 +1038,20 @@ public string GetEffectiveModel(string sessionName)
         try
         {
             await CreateSessionAsync(orchName, preset.OrchestratorModel, workingDirectory, ct);
-            MoveSession(orchName, group.Id);
-            SetSessionRole(orchName, MultiAgentRole.Orchestrator);
-            SetSessionPreferredModel(orchName, preset.OrchestratorModel);
-            if (worktreeId != null)
-            {
-                var meta = GetSessionMeta(orchName);
-                if (meta != null) meta.WorktreeId = worktreeId;
-            }
         }
         catch (Exception ex)
         {
             Debug($"Failed to create orchestrator session: {ex.Message}");
         }
+        // Assign role/group/model even if session already existed from a previous run
+        MoveSession(orchName, group.Id);
+        SetSessionRole(orchName, MultiAgentRole.Orchestrator);
+        SetSessionPreferredModel(orchName, preset.OrchestratorModel);
+        if (worktreeId != null)
+        {
+            var meta = GetSessionMeta(orchName);
+            if (meta != null) meta.WorktreeId = worktreeId;
+        }
 
         // Create worker sessions
         for (int i = 0; i < preset.WorkerModels.Length; i++)
@@ -1048,22 +1061,22 @@ public string GetEffectiveModel(string sessionName)
             try
             {
                 await CreateSessionAsync(workerName, workerModel, workingDirectory, ct);
-                MoveSession(workerName, group.Id);
-                SetSessionPreferredModel(workerName, workerModel);
-                // Apply per-worker system prompt from preset if available
-                var systemPrompt = preset.WorkerSystemPrompts != null && i < preset.WorkerSystemPrompts.Length
-                    ? preset.WorkerSystemPrompts[i] : null;
-                var meta = GetSessionMeta(workerName);
-                if (meta != null)
-                {
-                    if (worktreeId != null) meta.WorktreeId = worktreeId;
-                    if (systemPrompt != null) meta.SystemPrompt = systemPrompt;
-                }
             }
             catch (Exception ex)
             {
                 Debug($"Failed to create worker session '{workerName}': {ex.Message}");
             }
+            // Assign group/model/prompt even if session already existed from a previous run
+            MoveSession(workerName, group.Id);
+            SetSessionPreferredModel(workerName, workerModel);
+            var systemPrompt = preset.WorkerSystemPrompts != null && i < preset.WorkerSystemPrompts.Length
+                ? preset.WorkerSystemPrompts[i] : null;
+            var meta = GetSessionMeta(workerName);
+            if (meta != null)
+            {
+                if (worktreeId != null) meta.WorktreeId = worktreeId;
+                if (systemPrompt != null) meta.SystemPrompt = systemPrompt;
+            }
         }
 
         SaveOrganization();