Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions libs/cua-bench/KiCad-task/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# KiCad Task

Simple cua-bench task that installs the open-source [KiCad](https://www.kicad.org/) EDA suite and verifies that the agent can create and save a new project.

## Task

- **Setup**: Installs KiCad via the cua-bench app registry (Linux: PPA + apt; Windows: winget; macOS: Homebrew).
- **Goal**: Create a new KiCad project with a given name and save it to `Desktop/KiCadProjects/<project_name>/`.
- **Verification**: Checks that the project folder exists and contains the expected `<project_name>.kicad_pro` file.

## Variants

| Variant | Project name | Description |
|--------|----------------|-------------|
| 0 | MyFirstBoard | Create and save project "MyFirstBoard" to Desktop/KiCadProjects. |
| 1 | BlinkyPCB | Create and save project "BlinkyPCB" to Desktop/KiCadProjects. |

## Running

Requires native provider (Docker/QEMU) with `os_type: "linux"` (or `"windows"` if you adjust the task config).

```bash
# Interactive preview (from cua-bench repo root)
cb interact KiCad-task --variant-id 0

# Run with oracle (completes successfully; solve is a no-op, so reward is 0.0)
cb run task KiCad-task --variant-id 0 --oracle

# Run with agent
cb run task KiCad-task --variant-id 0 --agent cua-agent --model <model>
```

**Note:** With `--oracle`, the run completes (setup installs KiCad, solve is a no-op, evaluate runs). Reward is 0.0 unless an agent or human creates the project. Setup may take several minutes while KiCad is installed in the environment.

## Files

- `main.py` – Task definition, setup (install KiCad), evaluation (check project file), and solve stub.
102 changes: 102 additions & 0 deletions libs/cua-bench/KiCad-task/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""KiCad workflow tasks for cua-bench.

Simple example: create a new KiCad project and save it. Verification checks
that the project directory and .kicad_pro file exist.
"""

import cua_bench as cb


@cb.tasks_config(split="train")
def load():
"""Define KiCad task variants."""
tasks = [
{
"task_type": "create_project",
"project_name": "MyFirstBoard",
"description": (
"KiCad is already open. Create a new project named 'MyFirstBoard', "
"save it to the Desktop in a folder named KiCadProjects, then close KiCad."
),
},
{
"task_type": "create_project",
"project_name": "BlinkyPCB",
"description": (
"KiCad is already open. Create a new project named 'BlinkyPCB', "
"save it to the Desktop in a folder named KiCadProjects, then close KiCad."
),
},
]

return [
cb.Task(
description=task["description"],
metadata=task,
computer={
"provider": "native",
"setup_config": {
"os_type": "linux",
"width": 1920,
"height": 1080,
},
},
)
for task in tasks
]


@cb.setup_task(split="train")
async def start(task_cfg: cb.Task, session: cb.DesktopSession):
"""Install KiCad and launch it so the window is visible."""
await session.apps.kicad.install(with_shortcut=True)
await session.apps.kicad.launch()


@cb.evaluate_task(split="train")
async def evaluate(task_cfg: cb.Task, session: cb.DesktopSession) -> list[float]:
"""Verify the KiCad project was created: project dir and .kicad_pro file must exist."""
project_name = task_cfg.metadata.get("project_name", "")
if not project_name:
return [0.0]

os_type = "linux"
if hasattr(session, "os_type"):
os_type = session.os_type
elif hasattr(session, "_config") and session._config:
os_type = session._config.get("os_type", "linux")
# Normalize to linux/windows for path checks
is_windows = os_type in ("windows", "win11", "win10", "win7", "winxp", "win98")

if is_windows:
# Windows: Desktop\KiCadProjects\<name>\<name>.kicad_pro
project_file = f"%USERPROFILE%\\Desktop\\KiCadProjects\\{project_name}\\{project_name}.kicad_pro"
result = await session.run_command(
f'if exist "{project_file}" (echo FOUND) else (echo NOT_FOUND)',
check=False,
)
else:
# Linux/macOS: ~/Desktop/KiCadProjects/<name>/<name>.kicad_pro
project_file = f"$HOME/Desktop/KiCadProjects/{project_name}/{project_name}.kicad_pro"
result = await session.run_command(
f'test -f {project_file} && echo FOUND || echo NOT_FOUND',
check=False,
)

stdout = (result.get("stdout", "") if isinstance(result, dict) else str(result)).strip()
# Require exact FOUND (avoid NOT_FOUND matching)
return [1.0] if stdout == "FOUND" else [0.0]


@cb.solve_task(split="train")
async def solve(task_cfg: cb.Task, session: cb.DesktopSession):
"""Oracle not implemented: KiCad requires GUI interaction to create/save projects.

No-op so that `cb run task KiCad-task --variant-id 0 --oracle` completes
(setup runs, evaluate runs and returns 0.0). Use with an agent for real solutions.
"""
pass


if __name__ == "__main__":
cb.interact(__file__)
64 changes: 64 additions & 0 deletions libs/cua-bench/KiCad-task/test_kicad_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Simple tests for KiCad-task: load task and evaluate logic with mock session."""

import asyncio
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock

import pytest

# Load task from sibling main.py
import importlib.util
_MAIN = Path(__file__).resolve().parent / "main.py"
spec = importlib.util.spec_from_file_location("kicad_task", _MAIN)
kicad_task = importlib.util.module_from_spec(spec)
spec.loader.exec_module(kicad_task)


def test_load_returns_two_tasks():
"""Task config should return 2 variants."""
tasks = kicad_task.load()
assert len(tasks) == 2
assert tasks[0].metadata.get("project_name") == "MyFirstBoard"
assert tasks[1].metadata.get("project_name") == "BlinkyPCB"


@pytest.mark.asyncio
async def test_evaluate_fails_when_project_file_missing():
"""Evaluate returns 0.0 when project file does not exist (NOT_FOUND)."""
task = kicad_task.load()[0]
session = MagicMock()
session.os_type = "linux"
session.run_command = AsyncMock(return_value={"stdout": "NOT_FOUND", "stderr": "", "return_code": 0})

score = await kicad_task.evaluate(task, session)
assert score == [0.0]
session.run_command.assert_called_once()


@pytest.mark.asyncio
async def test_evaluate_succeeds_when_project_file_exists():
"""Evaluate returns 1.0 when project file exists (FOUND)."""
task = kicad_task.load()[0]
session = MagicMock()
session.os_type = "linux"
session.run_command = AsyncMock(return_value={"stdout": "FOUND", "stderr": "", "return_code": 0})

score = await kicad_task.evaluate(task, session)
assert score == [1.0]
session.run_command.assert_called_once()


@pytest.mark.asyncio
async def test_evaluate_windows_path_when_os_type_windows():
"""Evaluate uses Windows path and if exist when session is Windows."""
task = kicad_task.load()[0]
session = MagicMock()
session.os_type = "windows"
session.run_command = AsyncMock(return_value={"stdout": "FOUND", "stderr": "", "return_code": 0})

score = await kicad_task.evaluate(task, session)
assert score == [1.0]
call_args = session.run_command.call_args[0][0]
assert "if exist" in call_args
assert "KiCadProjects" in call_args
assert ".kicad_pro" in call_args
1 change: 1 addition & 0 deletions libs/cua-bench/cua_bench/apps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ async def start(task_cfg: cb.Task, session: cb.DesktopSession):
from . import adobe_photoshop # noqa: F401
from . import calendar # noqa: F401
from . import godot # noqa: F401
from . import kicad # noqa: F401
from . import notes # noqa: F401
from . import reminders # noqa: F401
from . import unity # noqa: F401
Expand Down
Loading