Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
a7414f0
Initial plan
Copilot Feb 14, 2026
4bd6b79
Add multi-agent orchestration mode support with model, service, bridg…
Copilot Feb 14, 2026
87a2003
Address code review feedback: guard against empty Groups list, fix or…
Copilot Feb 14, 2026
68bbbe8
Enhance multi-agent orchestration: single-orchestrator invariant, con…
PureWeen Feb 17, 2026
4527351
Merge origin/main into copilot/add-multi-agent-support
PureWeen Feb 17, 2026
dce3134
Add full orchestrator loop and sidebar multi-agent support
PureWeen Feb 17, 2026
2cfd5b3
Add role toggle to session context menu in sidebar
PureWeen Feb 17, 2026
4b4bf11
Add multi-agent Send All input bar and mode selector to sidebar
PureWeen Feb 17, 2026
9db4aa0
Add mini orchestration toolbar to expanded session view for multi-age…
PureWeen Feb 17, 2026
1e5ef23
Add multi-agent controls to sidebar and expanded view
PureWeen Feb 17, 2026
d5e4530
Merge remote-tracking branch 'origin/main' into multi-agent-orchestra…
PureWeen Feb 17, 2026
81cded9
Merge remote-tracking branch 'origin/main' into pr-104
PureWeen Feb 18, 2026
d38dc70
Fix mobile model selector dropdown positioning
PureWeen Feb 18, 2026
3ee6fc6
Add per-agent model assignment and OrchestratorReflect mode
PureWeen Feb 18, 2026
676f9c4
Add model capability warnings, group presets, and race-safe dispatch
PureWeen Feb 18, 2026
df9e493
Add dedicated evaluator support, evaluation scoring, name-pattern inf…
PureWeen Feb 18, 2026
7d75b4b
Add end-to-end scenario tests and enhanced model name inference
PureWeen Feb 18, 2026
9abeffe
Rename 'Fast Iteration Squad' preset to 'Quick Reflection Cycle'
PureWeen Feb 18, 2026
d67a2fa
Begin consolidating GroupReflectionState into ReflectionCycle
PureWeen Feb 18, 2026
3fab5cc
Merge origin/main into pr-104 (resolve sidebar event conflicts)
PureWeen Feb 18, 2026
4fb3187
Fix merge issues: align sidebar with GroupPreset API and expose BaseDir
PureWeen Feb 18, 2026
76c0329
Fix runtime crash: remove OnCreateGroup parameter not present on Crea…
PureWeen Feb 18, 2026
ff9a49a
Align multi-agent stall handling with single-agent 2-consecutive tole…
PureWeen Feb 18, 2026
12c4a56
Fix toolbar overflow and preset picker positioning
PureWeen Feb 18, 2026
9626051
Fix preset picker: compact dropdown with flex-wrap layout
PureWeen Feb 18, 2026
107777a
Require worktree selection when creating multi-agent teams
PureWeen Feb 18, 2026
358aca1
Fix team name input: full-width on own row instead of margin-left:auto
PureWeen Feb 18, 2026
8851b1b
Merge remote-tracking branch 'origin/main' into pr-104
PureWeen Feb 18, 2026
490c124
Unify multi-agent creation: worktree → presets + custom name
PureWeen Feb 18, 2026
c61673c
Add 'Delete Team' option to multi-agent group context menu
PureWeen Feb 18, 2026
02ceef1
Fix mode selector to reflect actual group mode
PureWeen Feb 18, 2026
ea1fb81
Fix mode dropdowns: add Reflect option, fix selection, improve readab…
PureWeen Feb 18, 2026
4a7f89c
Mode-aware input placeholders across sidebar and dashboard
PureWeen Feb 18, 2026
562a415
Add max iterations control and auto-start reflection cycle
PureWeen Feb 18, 2026
132ab3b
Add iterations control to dashboard grid and expanded views
PureWeen Feb 18, 2026
752fc9c
Fix reflection loop stopping after 1 iteration
PureWeen Feb 18, 2026
2d392bf
Fix reconciliation scattering multi-agent sessions + add 14 stability…
PureWeen Feb 18, 2026
aa4ea4f
Add diagnostic logging for reconciliation orphaning and pruning
PureWeen Feb 18, 2026
7a64783
Add 30 multi-agent regression tests for all session bugs
PureWeen Feb 19, 2026
69d8996
Add multi-agent architecture spec and executable test scenarios
PureWeen Feb 19, 2026
6926efc
Merge remote-tracking branch 'origin/main' into pr-104
PureWeen Feb 19, 2026
fc23388
Fix session resume killing active turns after 10 seconds
PureWeen Feb 19, 2026
38a2101
Fix watchdog using 120s timeout instead of 600s during tool-call loops
PureWeen Feb 19, 2026
b73631e
Add per-worker system prompts (agent personas)
PureWeen Feb 19, 2026
18b3df3
Fix 12 issues from multi-agent code review
PureWeen Feb 19, 2026
81e45a0
Fix 5 issues from multi-agent re-review council
PureWeen Feb 19, 2026
ab5c334
Fix 12 multi-agent orchestration issues + 3 found in verification
PureWeen Feb 20, 2026
8554fbc
Add Squad integration design to docs, scenarios, and tests
PureWeen Feb 20, 2026
d09cf87
Implement Squad integration: discovery, presets, UI, and tests
PureWeen Feb 20, 2026
f0a6c1f
Fix DeleteGroup: close multi-agent sessions instead of orphaning them
PureWeen Feb 20, 2026
0a1477d
Update docs, scenarios, and tests for full feature comprehension
PureWeen Feb 20, 2026
2938563
Merge origin/main into pr-104
PureWeen Feb 20, 2026
f679121
Implement Squad write-back: save presets as .squad/ directories
PureWeen Feb 20, 2026
2695fcd
Merge origin/main into pr-104
PureWeen Feb 20, 2026
d25088c
Merge origin/main into pr-104
PureWeen Feb 21, 2026
947b23a
Merge origin/main into pr-104
PureWeen Feb 21, 2026
579e351
Feature review: fix bugs, add 19 tests, 5 scenarios, update docs
PureWeen Feb 21, 2026
669a435
fix: address multi-model review findings for multi-agent orchestration
PureWeen Feb 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ For Android, always run `adb reverse tcp:9223 tcp:9223` after deploy.

## Architecture

**See `docs/multi-agent-orchestration.md` for the multi-agent architecture spec** (orchestration modes, reflection loop, sentinel protocol, invariants, Squad integration). Test scenarios in `PolyPilot.Tests/Scenarios/multi-agent-scenarios.json`. Read these before modifying orchestration, reconciliation, or TCS completion logic.

### Squad Integration
PolyPilot discovers [bradygaster/squad](https://github.com/bradygaster/squad) team definitions from `.squad/` (or legacy `.ai-team/`) directories in the worktree root. Each agent's `charter.md` becomes a worker system prompt, `team.md` defines the roster, `decisions.md` provides shared context injected into all worker prompts, and `routing.md` is injected into the orchestrator's planning prompt. Repo-level teams appear in a **"📂 From Repo"** section in the preset picker, above built-in presets.

**Squad write-back:** When saving a multi-agent group as a preset, PolyPilot writes the team definition back to `.squad/` format in the worktree root via `SquadWriter`. This creates `team.md`, `agents/{name}/charter.md`, and optional `decisions.md`/`routing.md`. The preset is also saved to `presets.json` as a personal backup. This enables round-tripping: discover → modify → save back → share via repo.

**Preset priority (three-tier merge):** Built-in presets < User presets (`~/.polypilot/presets.json`) < Repo teams (`.squad/`). Repo teams shadow presets with the same name. The preset picker shows three sections: "📂 From Repo", "⚙️ Built-in", and "👤 My Presets".

**Group deletion:** Deleting a multi-agent team closes and removes all its sessions (they're meaningless without the team). Deleting a regular group moves sessions to the default group.

This is a .NET MAUI Blazor Hybrid app targeting Mac Catalyst, Android, and iOS. It manages multiple GitHub Copilot CLI sessions through a native GUI.

### Three-Layer Stack
Expand Down
70 changes: 70 additions & 0 deletions PolyPilot.Tests/BridgeMessageTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -457,4 +457,74 @@ public void AttentionNeededPayload_AllReasons_RoundTrip(AttentionReason reason)

Assert.Equal(reason, restored!.Reason);
}

[Fact]
public void MultiAgentBroadcastPayload_RoundTrips()
{
var payload = new MultiAgentBroadcastPayload
{
GroupId = "group-123",
Message = "Build the feature"
};
var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentBroadcast, payload);
var json = msg.Serialize();
var restored = BridgeMessage.Deserialize(json)!.GetPayload<MultiAgentBroadcastPayload>();

Assert.NotNull(restored);
Assert.Equal("group-123", restored!.GroupId);
Assert.Equal("Build the feature", restored.Message);
}

[Fact]
public void MultiAgentCreateGroupPayload_RoundTrips()
{
var payload = new MultiAgentCreateGroupPayload
{
Name = "Dev Team",
Mode = "Orchestrator",
OrchestratorPrompt = "Coordinate the workers",
SessionNames = new List<string> { "session-1", "session-2" }
};
var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentCreateGroup, payload);
var json = msg.Serialize();
var restored = BridgeMessage.Deserialize(json)!.GetPayload<MultiAgentCreateGroupPayload>();

Assert.NotNull(restored);
Assert.Equal("Dev Team", restored!.Name);
Assert.Equal("Orchestrator", restored.Mode);
Assert.Equal("Coordinate the workers", restored.OrchestratorPrompt);
Assert.Equal(2, restored.SessionNames!.Count);
Assert.Contains("session-1", restored.SessionNames);
}

[Fact]
public void MultiAgentProgressPayload_RoundTrips()
{
var payload = new MultiAgentProgressPayload
{
GroupId = "group-1",
TotalSessions = 3,
CompletedSessions = 1,
ProcessingSessions = 2,
CompletedSessionNames = new List<string> { "worker-1" }
};
var msg = BridgeMessage.Create(BridgeMessageTypes.MultiAgentProgress, payload);
var json = msg.Serialize();
var restored = BridgeMessage.Deserialize(json)!.GetPayload<MultiAgentProgressPayload>();

Assert.NotNull(restored);
Assert.Equal("group-1", restored!.GroupId);
Assert.Equal(3, restored.TotalSessions);
Assert.Equal(1, restored.CompletedSessions);
Assert.Equal(2, restored.ProcessingSessions);
Assert.Single(restored.CompletedSessionNames);
}

[Fact]
public void MultiAgentMessageTypes_AreCorrectStrings()
{
Assert.Equal("multi_agent_broadcast", BridgeMessageTypes.MultiAgentBroadcast);
Assert.Equal("multi_agent_create_group", BridgeMessageTypes.MultiAgentCreateGroup);
Assert.Equal("multi_agent_progress", BridgeMessageTypes.MultiAgentProgress);
}
}
183 changes: 183 additions & 0 deletions PolyPilot.Tests/MultiAgentGapTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
using PolyPilot.Models;
using PolyPilot.Services;

namespace PolyPilot.Tests;

/// <summary>
/// Gap-coverage tests for multi-agent parsing, model capabilities, and reflection summaries.
/// </summary>
public class MultiAgentGapTests
{
// --- ParseTaskAssignments ---

[Fact]
public void ParseTaskAssignments_EmptyInput_ReturnsEmpty()
{
var result = CopilotService.ParseTaskAssignments("", new List<string> { "a", "b" });
Assert.Empty(result);
}

[Fact]
public void ParseTaskAssignments_SingleWorker_ExtractsTask()
{
var response = "@worker:alpha\nDo the thing.\n@end";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "alpha" });

Assert.Single(result);
Assert.Equal("alpha", result[0].WorkerName);
Assert.Contains("Do the thing", result[0].Task);
}

[Fact]
public void ParseTaskAssignments_MultipleWorkers_ExtractsAll()
{
var response = @"@worker:w1
Task one.
@end
@worker:w2
Task two.
@end
@worker:w3
Task three.
@end";
var workers = new List<string> { "w1", "w2", "w3" };
var result = CopilotService.ParseTaskAssignments(response, workers);

Assert.Equal(3, result.Count);
Assert.Equal("w1", result[0].WorkerName);
Assert.Equal("w2", result[1].WorkerName);
Assert.Equal("w3", result[2].WorkerName);
}

[Fact]
public void ParseTaskAssignments_FuzzyMatch_FindsClosestWorker()
{
// "coder" is a substring of "coder-session" → fuzzy match
var response = "@worker:coder\nWrite the code.\n@end";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "coder-session", "reviewer-session" });

Assert.Single(result);
Assert.Equal("coder-session", result[0].WorkerName);
}

[Fact]
public void ParseTaskAssignments_UnknownWorker_IsIgnored()
{
var response = "@worker:ghost\nDo something.\n@end";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "alpha", "beta" });

Assert.Empty(result);
}

[Fact]
public void ParseTaskAssignments_DuplicateWorker_TakesLast()
{
var response = @"@worker:alpha
First task.
@end
@worker:alpha
Second task.
@end";
var result = CopilotService.ParseTaskAssignments(response, new List<string> { "alpha" });

// The regex matches both blocks; both are added (last one wins in practice)
Assert.Equal(2, result.Count);
Assert.Contains("Second task", result[^1].Task);
}

// --- ModelCapabilities ---

[Theory]
[InlineData(null)]
[InlineData("")]
public void GetCapabilities_NullOrEmpty_ReturnsNone(string? slug)
{
var caps = ModelCapabilities.GetCapabilities(slug!);
Assert.Equal(ModelCapability.None, caps);
}

[Fact]
public void GetCapabilities_KnownModel_ReturnsFlags()
{
var caps = ModelCapabilities.GetCapabilities("gpt-5");
Assert.True(caps.HasFlag(ModelCapability.ReasoningExpert));
Assert.True(caps.HasFlag(ModelCapability.CodeExpert));
Assert.True(caps.HasFlag(ModelCapability.ToolUse));
}

[Fact]
public void GetRoleWarnings_UnknownModel_ReturnsWarning()
{
var warnings = ModelCapabilities.GetRoleWarnings("totally-unknown-model", MultiAgentRole.Worker);
Assert.NotEmpty(warnings);
Assert.Contains(warnings, w => w.Contains("Unknown model", StringComparison.OrdinalIgnoreCase));
}

[Fact]
public void GetRoleWarnings_WeakOrchestrator_ReturnsWarning()
{
// claude-haiku-4.5 is CostEfficient + Fast but not ReasoningExpert
var warnings = ModelCapabilities.GetRoleWarnings("claude-haiku-4.5", MultiAgentRole.Orchestrator);
Assert.NotEmpty(warnings);
Assert.Contains(warnings, w => w.Contains("reasoning", StringComparison.OrdinalIgnoreCase));
}

// --- BuildCompletionSummary ---

[Fact]
public void BuildCompletionSummary_GoalMet_ShowsCheckmark()
{
var cycle = ReflectionCycle.Create("Ship the feature", maxIterations: 5);
cycle.Advance("Done!\n[[REFLECTION_COMPLETE]]");

var summary = cycle.BuildCompletionSummary();

Assert.Contains("✅", summary);
Assert.Contains("Goal met", summary);
}

[Fact]
public void BuildCompletionSummary_Stalled_ShowsWarning()
{
var cycle = ReflectionCycle.Create("Improve quality", maxIterations: 10);
// Feed identical responses to trigger stall detection
cycle.Advance("Working on the task with specific details about implementation");
cycle.Advance("Working on the task with specific details about implementation");
cycle.Advance("Working on the task with specific details about implementation");

var summary = cycle.BuildCompletionSummary();

// IsStalled takes priority over IsCancelled in the ternary chain
Assert.Contains("⚠️", summary);
Assert.Contains("Stalled", summary);
Assert.DoesNotContain("⏹️", summary);
}

[Fact]
public void BuildCompletionSummary_Cancelled_ShowsStop()
{
var cycle = ReflectionCycle.Create("Long task", maxIterations: 10);
cycle.Advance("First attempt with unique content here...");
cycle.IsCancelled = true;
cycle.IsActive = false;

var summary = cycle.BuildCompletionSummary();

Assert.Contains("⏹️", summary);
Assert.Contains("Cancelled", summary);
}

[Fact]
public void BuildCompletionSummary_MaxIterations_ShowsClock()
{
var cycle = ReflectionCycle.Create("Goal", maxIterations: 2);
cycle.Advance("Trying with approach alpha...");
cycle.Advance("Still trying with approach beta and new ideas...");

var summary = cycle.BuildCompletionSummary();

Assert.Contains("⏱️", summary);
Assert.Contains("Max iterations", summary);
Assert.Contains("2/2", summary);
}
}
Loading