Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ autopilot start --prompt "..." # Start a new task
autopilot start --prompt "..." --plan # Let agent plan + implement
autopilot start --issue 12345 # Start from a GitHub issue
autopilot resume --pr 42345 # Resume from an existing PR
autopilot fix-ci --pr 42345 # Fix CI failures (interactive check selection)
autopilot fix-ci --pr 42345 --checks "build-ubuntu,test-integration" # Non-interactive
autopilot status # Show all task statuses
autopilot logs # Show latest task log
autopilot logs --session abc123 # Show specific task log
Expand All @@ -70,7 +72,10 @@ Create `autopilot.json` in your repo root or `~/.autopilot-loop/config.json`:
"keepalive_enabled": false,
"keepalive_interval_seconds": 300,
"branch_pattern": "autopilot/{task_id}",
"custom_instructions": "Run tests with: bin/rails test <path>\nRun linting with: bin/rubocop <path>"
"custom_instructions": "Run tests with: npm test\nRun linting with: npm run lint",
"ci_check_names": [],
"ci_poll_interval_seconds": 120,
"ci_poll_timeout_seconds": 5400
}
```

Expand All @@ -82,7 +87,7 @@ All values have sensible defaults — config file is optional.
- **tmux** (pre-installed in most Codespaces; `apt install tmux` elsewhere)
- **Python 3.8+**

Codespace idle timeout is set automatically at startup (120 min, org-capped).
Codespace idle timeout is set automatically at startup (120 min default, subject to your organization's limits).

## Local Development

Expand Down
5 changes: 4 additions & 1 deletion autopilot.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,8 @@
"keepalive_enabled": false,
"keepalive_interval_seconds": 300,
"branch_pattern": "autopilot/{task_id}",
"custom_instructions": ""
"custom_instructions": "",
"ci_check_names": [],
"ci_poll_interval_seconds": 120,
"ci_poll_timeout_seconds": 5400
}
139 changes: 136 additions & 3 deletions src/autopilot_loop/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import argparse
import json
import logging
import os
import subprocess
Expand Down Expand Up @@ -112,8 +113,6 @@ def cmd_start(args):

def cmd_run(args):
"""Internal: run the orchestrator for a task (called from tmux)."""
from autopilot_loop.orchestrator import Orchestrator

task = get_task(args.task_id)
if not task:
print("Error: task %s not found" % args.task_id, file=sys.stderr)
Expand All @@ -128,7 +127,15 @@ def cmd_run(args):
file_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
logging.getLogger().addHandler(file_handler)

orch = Orchestrator(task_id=args.task_id, config=config)
# Dispatch to the right orchestrator based on task mode
task_mode = task.get("task_mode", "review")
if task_mode == "ci":
from autopilot_loop.orchestrator import CIOrchestrator
orch = CIOrchestrator(task_id=args.task_id, config=config)
else:
from autopilot_loop.orchestrator import Orchestrator
orch = Orchestrator(task_id=args.task_id, config=config)

result = orch.run()

if result.get("state") == "COMPLETE":
Expand Down Expand Up @@ -253,6 +260,123 @@ def cmd_logs(args):
print(" %s" % name)


def cmd_fix_ci(args):
"""Fix CI failures on an existing PR."""
from autopilot_loop.github_api import get_failed_checks

config = load_config({
"model": args.model,
"max_iterations": args.max_iters,
})

# Validate PR exists and get branch
try:
result = subprocess.run(
["gh", "pr", "view", str(args.pr), "--json", "headRefName", "--jq", ".headRefName"],
capture_output=True, text=True, check=True,
)
branch = result.stdout.strip()
subprocess.run(["git", "checkout", branch], check=True)
except subprocess.CalledProcessError as e:
print("Error: could not fetch PR #%d: %s" % (args.pr, e), file=sys.stderr)
sys.exit(1)

# Get failed checks
failed_checks = get_failed_checks(args.pr)
if not failed_checks:
print("No failed CI checks found on PR #%d" % args.pr)
return

# Determine which checks to fix
if args.checks:
# Non-interactive: substring match
patterns = [p.strip() for p in args.checks.split(",")]
selected = [c for c in failed_checks if any(p in c["name"] for p in patterns)]
if not selected:
print("No failed checks matched: %s" % args.checks, file=sys.stderr)
print("Available failed checks:")
for c in failed_checks:
print(" %s" % c["name"])
sys.exit(1)
elif config.get("ci_check_names"):
# Pre-configured in config
patterns = config["ci_check_names"]
selected = [c for c in failed_checks if any(p in c["name"] for p in patterns)]
if not selected:
print("No failed checks matched ci_check_names config: %s" % patterns, file=sys.stderr)
sys.exit(1)
else:
# Interactive: list and prompt
print("Failed CI checks on PR #%d:" % args.pr)
print()
for i, c in enumerate(failed_checks, 1):
print(" %d. %s" % (i, c["name"]))
print()

try:
selection = input("Which checks to fix? (comma-separated numbers, or 'all'): ").strip()
except (EOFError, KeyboardInterrupt):
print("\nAborted.")
sys.exit(1)

if selection.lower() == "all":
selected = list(failed_checks)
else:
try:
indices = [int(s.strip()) for s in selection.split(",")]
selected = [failed_checks[i - 1] for i in indices if 1 <= i <= len(failed_checks)]
except (ValueError, IndexError):
print("Error: invalid selection", file=sys.stderr)
sys.exit(1)

if not selected:
print("No checks selected.")
return

check_names = [c["name"] for c in selected]
print("\u2713 Selected %d checks:" % len(selected))
for name in check_names:
print(" \u2022 %s" % name)

task_id = _generate_task_id()
from autopilot_loop.persistence import update_task

create_task(
task_id=task_id,
prompt="(fix-ci for PR #%d)" % args.pr,
max_iterations=config["max_iterations"],
model=config["model"],
)
update_task(
task_id,
pr_number=args.pr,
branch=branch,
state="FETCH_ANNOTATIONS",
task_mode="ci",
ci_check_names=json.dumps(check_names),
)

# Launch in tmux
sessions_dir = get_sessions_dir(task_id)
log_file = os.path.join(sessions_dir, "orchestrator.log")
tmux_session = "autopilot-%s" % task_id
run_cmd = "autopilot _run --task-id %s 2>&1 | tee -a %s" % (task_id, log_file)

try:
subprocess.run(
["tmux", "new-session", "-d", "-s", tmux_session, run_cmd],
check=True,
)
except FileNotFoundError:
logger.warning("tmux not found, running in foreground")
cmd_run(argparse.Namespace(task_id=task_id))
return

print("\u2713 Fixing CI on PR #%d as task %s" % (args.pr, task_id))
print("\u2713 Branch: %s" % branch)
print("\u2713 Running in tmux session: %s" % tmux_session)


def cmd_stop(args):
"""Stop a running task."""
task_id = args.task_id
Expand Down Expand Up @@ -309,6 +433,13 @@ def main():
p_stop = subparsers.add_parser("stop", help="Stop a running task")
p_stop.add_argument("task_id", type=str, help="Task ID to stop")

# fix-ci
p_fixci = subparsers.add_parser("fix-ci", help="Fix CI failures on an existing PR")
p_fixci.add_argument("--pr", type=int, required=True, help="PR number")
p_fixci.add_argument("--checks", type=str, help="Comma-separated check names (substring match)")
p_fixci.add_argument("--max-iters", type=int, help="Max fix iterations")
p_fixci.add_argument("--model", type=str, help="Model override")

# _run (internal, called from tmux)
p_run = subparsers.add_parser("_run", help=argparse.SUPPRESS)
p_run.add_argument("--task-id", required=True, help=argparse.SUPPRESS)
Expand All @@ -326,6 +457,8 @@ def main():
cmd_logs(args)
elif args.command == "stop":
cmd_stop(args)
elif args.command == "fix-ci":
cmd_fix_ci(args)
elif args.command == "_run":
cmd_run(args)
else:
Expand Down
3 changes: 3 additions & 0 deletions src/autopilot_loop/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
"keepalive_interval_seconds": 300,
"branch_pattern": "autopilot/{task_id}",
"custom_instructions": "",
"ci_check_names": [],
"ci_poll_interval_seconds": 120,
"ci_poll_timeout_seconds": 5400,
}

CONFIG_FILENAMES = [
Expand Down
137 changes: 137 additions & 0 deletions src/autopilot_loop/github_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,18 @@

import json
import logging
import re
import subprocess

logger = logging.getLogger(__name__)

__all__ = [
"GitHubAPIError",
"find_pr_for_branch",
"get_check_annotations",
"get_check_states",
"get_copilot_review",
"get_failed_checks",
"get_head_sha",
"get_issue",
"get_repo_nwo",
Expand Down Expand Up @@ -281,3 +285,136 @@ def get_unresolved_review_comments(pr_number):
})

return comments


# --- CI check functions ---

_ANSI_RE = re.compile(r"\x1b\[[0-9;]*m")


def _strip_ansi(text):
"""Remove ANSI escape codes from text."""
return _ANSI_RE.sub("", text)


def get_failed_checks(pr_number):
"""Get failed CI checks for a PR.

Excludes aggregation gate checks (names ending in '-results').
Parses run_id and job_id from the check link URL.

Returns:
List of dicts with {name, link, run_id, job_id}.
"""
output = _run_gh([
"pr", "checks", str(pr_number),
"--json", "name,state,link",
"--jq", '[.[] | select(.state == "FAILURE")]',
], check=False)

if not output:
return []

raw_checks = json.loads(output)
checks = []
for c in raw_checks:
name = c.get("name", "")
# Skip aggregation gates (e.g., "build-results", "test-results")
if name.endswith("-results"):
continue

link = c.get("link", "")
run_id = None
job_id = None
# Parse from URL: /actions/runs/{run_id}/job/{job_id}
m = re.search(r"/actions/runs/(\d+)/job/(\d+)", link)
if m:
run_id = int(m.group(1))
job_id = int(m.group(2))

checks.append({
"name": name,
"link": link,
"run_id": run_id,
"job_id": job_id,
})

return checks


def get_check_annotations(job_ids):
"""Fetch failure annotations for CI jobs.

Calls /check-runs/{id}/annotations for each job, filters to
annotation_level == 'failure', deduplicates by (path, start_line),
strips ANSI codes, and excludes generic 'Process completed' messages.

Args:
job_ids: List of job IDs (ints).

Returns:
List of dicts with {path, start_line, end_line, title, message}.
"""
nwo = get_repo_nwo()
seen = set() # (path, start_line) for dedup
annotations = []

for job_id in job_ids:
output = _run_gh([
"api", "repos/%s/check-runs/%d/annotations" % (nwo, job_id),
], check=False)

if not output:
continue

raw = json.loads(output)
for ann in raw:
if ann.get("annotation_level") != "failure":
continue

path = ann.get("path", "")
start_line = ann.get("start_line", 0)
message = _strip_ansi(ann.get("message", ""))
title = _strip_ansi(ann.get("title", ""))

# Skip generic "Process completed with exit code N" messages
if message.startswith("Process completed with exit code"):
continue

key = (path, start_line)
if key in seen:
continue
seen.add(key)

annotations.append({
"path": path,
"start_line": start_line,
"end_line": ann.get("end_line", start_line),
"title": title,
"message": message,
})

return annotations[:50] # Cap to avoid prompt bloat


def get_check_states(pr_number, check_names):
"""Get the current state of specific checks on a PR.

Args:
pr_number: PR number.
check_names: List of check names to query.

Returns:
Dict mapping check name to state string (e.g., 'SUCCESS', 'FAILURE', 'PENDING').
"""
output = _run_gh([
"pr", "checks", str(pr_number),
"--json", "name,state",
], check=False)

if not output:
return {name: "UNKNOWN" for name in check_names}

raw = json.loads(output)
state_map = {c["name"]: c["state"] for c in raw}
return {name: state_map.get(name, "UNKNOWN") for name in check_names}
Loading
Loading