trycua · Jasonqi146 · Feb 10, 2026
diff --git a/libs/cua-bench/KiCad-task/README.md b/libs/cua-bench/KiCad-task/README.md
@@ -0,0 +1,37 @@
+# KiCad Task
+
+Simple cua-bench task that installs the open-source [KiCad](https://www.kicad.org/) EDA suite and verifies that the agent can create and save a new project.
+
+## Task
+
+- **Setup**: Installs KiCad via the cua-bench app registry (Linux: PPA + apt; Windows: winget; macOS: Homebrew).
+- **Goal**: Create a new KiCad project with a given name and save it to `Desktop/KiCadProjects/<project_name>/`.
+- **Verification**: Checks that the project folder exists and contains the expected `<project_name>.kicad_pro` file.
+
+## Variants
+
+| Variant | Project name   | Description |
+|--------|----------------|-------------|
+| 0      | MyFirstBoard   | Create and save project "MyFirstBoard" to Desktop/KiCadProjects. |
+| 1      | BlinkyPCB      | Create and save project "BlinkyPCB" to Desktop/KiCadProjects. |
+
+## Running
+
+Requires native provider (Docker/QEMU) with `os_type: "linux"` (or `"windows"` if you adjust the task config).
+
+```bash
+# Interactive preview (from cua-bench repo root)
+cb interact KiCad-task --variant-id 0
+
+# Run with oracle (completes successfully; solve is a no-op, so reward is 0.0)
+cb run task KiCad-task --variant-id 0 --oracle
+
+# Run with agent
+cb run task KiCad-task --variant-id 0 --agent cua-agent --model <model>
+```
+
+**Note:** With `--oracle`, the run completes (setup installs KiCad, solve is a no-op, evaluate runs). Reward is 0.0 unless an agent or human creates the project. Setup may take several minutes while KiCad is installed in the environment.
+
+## Files
+
+- `main.py` – Task definition, setup (install KiCad), evaluation (check project file), and solve stub.
diff --git a/libs/cua-bench/KiCad-task/main.py b/libs/cua-bench/KiCad-task/main.py
@@ -0,0 +1,102 @@
+"""KiCad workflow tasks for cua-bench.
+
+Simple example: create a new KiCad project and save it. Verification checks
+that the project directory and .kicad_pro file exist.
+"""
+
+import cua_bench as cb
+
+
+@cb.tasks_config(split="train")
+def load():
+    """Define KiCad task variants."""
+    tasks = [
+        {
+            "task_type": "create_project",
+            "project_name": "MyFirstBoard",
+            "description": (
+                "KiCad is already open. Create a new project named 'MyFirstBoard', "
+                "save it to the Desktop in a folder named KiCadProjects, then close KiCad."
+            ),
+        },
+        {
+            "task_type": "create_project",
+            "project_name": "BlinkyPCB",
+            "description": (
+                "KiCad is already open. Create a new project named 'BlinkyPCB', "
+                "save it to the Desktop in a folder named KiCadProjects, then close KiCad."
+            ),
+        },
+    ]
+
+    return [
+        cb.Task(
+            description=task["description"],
+            metadata=task,
+            computer={
+                "provider": "native",
+                "setup_config": {
+                    "os_type": "linux",
+                    "width": 1920,
+                    "height": 1080,
+                },
+            },
+        )
+        for task in tasks
+    ]
+
+
+@cb.setup_task(split="train")
+async def start(task_cfg: cb.Task, session: cb.DesktopSession):
+    """Install KiCad and launch it so the window is visible."""
+    await session.apps.kicad.install(with_shortcut=True)
+    await session.apps.kicad.launch()
+
+
+@cb.evaluate_task(split="train")
+async def evaluate(task_cfg: cb.Task, session: cb.DesktopSession) -> list[float]:
+    """Verify the KiCad project was created: project dir and .kicad_pro file must exist."""
+    project_name = task_cfg.metadata.get("project_name", "")
+    if not project_name:
+        return [0.0]
+
+    os_type = "linux"
+    if hasattr(session, "os_type"):
+        os_type = session.os_type
+    elif hasattr(session, "_config") and session._config:
+        os_type = session._config.get("os_type", "linux")
+    # Normalize to linux/windows for path checks
+    is_windows = os_type in ("windows", "win11", "win10", "win7", "winxp", "win98")
+
+    if is_windows:
+        # Windows: Desktop\KiCadProjects\<name>\<name>.kicad_pro
+        project_file = f"%USERPROFILE%\\Desktop\\KiCadProjects\\{project_name}\\{project_name}.kicad_pro"
+        result = await session.run_command(
+            f'if exist "{project_file}" (echo FOUND) else (echo NOT_FOUND)',
+            check=False,
+        )
+    else:
+        # Linux/macOS: ~/Desktop/KiCadProjects/<name>/<name>.kicad_pro
+        project_file = f"$HOME/Desktop/KiCadProjects/{project_name}/{project_name}.kicad_pro"
+        result = await session.run_command(
+            f'test -f {project_file} && echo FOUND || echo NOT_FOUND',
+            check=False,
+        )
+
+    stdout = (result.get("stdout", "") if isinstance(result, dict) else str(result)).strip()
+    # Require exact FOUND (avoid NOT_FOUND matching)
+    return [1.0] if stdout == "FOUND" else [0.0]
+
+
+@cb.solve_task(split="train")
+async def solve(task_cfg: cb.Task, session: cb.DesktopSession):
+    """Oracle not implemented: KiCad requires GUI interaction to create/save projects.
+
+    No-op so that `cb run task KiCad-task --variant-id 0 --oracle` completes
+    (setup runs, evaluate runs and returns 0.0). Use with an agent for real solutions.
+    """
+    pass
+
+
+if __name__ == "__main__":
+    cb.interact(__file__)
diff --git a/libs/cua-bench/KiCad-task/test_kicad_task.py b/libs/cua-bench/KiCad-task/test_kicad_task.py
@@ -0,0 +1,64 @@
+"""Simple tests for KiCad-task: load task and evaluate logic with mock session."""
+
+import asyncio
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+# Load task from sibling main.py
+import importlib.util
+_MAIN = Path(__file__).resolve().parent / "main.py"
+spec = importlib.util.spec_from_file_location("kicad_task", _MAIN)
+kicad_task = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(kicad_task)
+
+
+def test_load_returns_two_tasks():
+    """Task config should return 2 variants."""
+    tasks = kicad_task.load()
+    assert len(tasks) == 2
+    assert tasks[0].metadata.get("project_name") == "MyFirstBoard"
+    assert tasks[1].metadata.get("project_name") == "BlinkyPCB"
+
+
+@pytest.mark.asyncio
+async def test_evaluate_fails_when_project_file_missing():
+    """Evaluate returns 0.0 when project file does not exist (NOT_FOUND)."""
+    task = kicad_task.load()[0]
+    session = MagicMock()
+    session.os_type = "linux"
+    session.run_command = AsyncMock(return_value={"stdout": "NOT_FOUND", "stderr": "", "return_code": 0})
+
+    score = await kicad_task.evaluate(task, session)
+    assert score == [0.0]
+    session.run_command.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_evaluate_succeeds_when_project_file_exists():
+    """Evaluate returns 1.0 when project file exists (FOUND)."""
+    task = kicad_task.load()[0]
+    session = MagicMock()
+    session.os_type = "linux"
+    session.run_command = AsyncMock(return_value={"stdout": "FOUND", "stderr": "", "return_code": 0})
+
+    score = await kicad_task.evaluate(task, session)
+    assert score == [1.0]
+    session.run_command.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_evaluate_windows_path_when_os_type_windows():
+    """Evaluate uses Windows path and if exist when session is Windows."""
+    task = kicad_task.load()[0]
+    session = MagicMock()
+    session.os_type = "windows"
+    session.run_command = AsyncMock(return_value={"stdout": "FOUND", "stderr": "", "return_code": 0})
+
+    score = await kicad_task.evaluate(task, session)
+    assert score == [1.0]
+    call_args = session.run_command.call_args[0][0]
+    assert "if exist" in call_args
+    assert "KiCadProjects" in call_args
+    assert ".kicad_pro" in call_args
diff --git a/libs/cua-bench/cua_bench/apps/__init__.py b/libs/cua-bench/cua_bench/apps/__init__.py
@@ -51,6 +51,7 @@ async def start(task_cfg: cb.Task, session: cb.DesktopSession):
 from . import adobe_photoshop  # noqa: F401
 from . import calendar  # noqa: F401
 from . import godot  # noqa: F401
+from . import kicad  # noqa: F401
 from . import notes  # noqa: F401
 from . import reminders  # noqa: F401
 from . import unity  # noqa: F401