diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f37389..5628d66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,21 @@ # CHANGELOG +## v0.9.0 (2026-02-24) + +### Features + +- **train**: Add end-to-end pipeline automation with --demo-dir flag + ([`b874018`](https://github.com/OpenAdaptAI/openadapt-ml/commit/b874018d45b174f4f9c6bacb855f571f91612cf0)) + +Add prepare_bundle() and generate_screenshot_mapping() to convert_demos.py for single-call demo + conversion. Extend both train.py and lambda_labs.py train commands with --demo-dir, + --captures-dir, --mapping flags so the full pipeline (mapping → conversion → bundle → upload → + train) runs as one command. Add --gpu-wait for Lambda GPU availability retry loop. + +Co-Authored-By: Claude Opus 4.6 + + ## v0.8.0 (2026-02-24) ### Features diff --git a/openadapt_ml/cloud/lambda_labs.py b/openadapt_ml/cloud/lambda_labs.py index 7139665..734bd73 100644 --- a/openadapt_ml/cloud/lambda_labs.py +++ b/openadapt_ml/cloud/lambda_labs.py @@ -966,6 +966,25 @@ def main(): "-b", help="Local bundle directory (with training_data.jsonl + images/) to upload", ) + train_parser.add_argument( + "--demo-dir", + help="Directory with annotated demo JSON files (auto-converts to bundle)", + ) + train_parser.add_argument( + "--captures-dir", + help="Parent directory containing capture directories (for screenshot resolution)", + ) + train_parser.add_argument( + "--mapping", + help="Pre-computed screenshot_mapping.json (optional, auto-generated if omitted)", + ) + train_parser.add_argument( + "--gpu-wait", + type=int, + default=0, + metavar="MINUTES", + help="Wait up to N minutes for GPU availability (0=fail immediately, default: 0)", + ) train_parser.add_argument( "--no-terminate", action="store_true", @@ -1309,6 +1328,29 @@ def main(): start_time = time_module.time() training_completed = False # Track if training actually finished + # --- Step 0: Auto-convert demos to bundle if --demo-dir provided --- + if args.demo_dir and not args.bundle: + from openadapt_ml.training.convert_demos import prepare_bundle + + if not args.captures_dir: + print("Error: --captures-dir is required with --demo-dir") + return + + print("=" * 50) + print("Step 0: Converting demos to training bundle") + print("=" * 50) + try: + bundle_dir = prepare_bundle( + demo_dir=args.demo_dir, + captures_dir=args.captures_dir, + mapping_path=args.mapping, + ) + args.bundle = str(bundle_dir) + print(f"Bundle ready at: {bundle_dir}\n") + except Exception as e: + print(f"Error converting demos: {e}") + return + # Instance pricing (approximate $/hr) INSTANCE_PRICES = { "gpu_1x_a10": 0.75, @@ -1334,15 +1376,37 @@ def main(): print(f"Using existing instance: {instances[0].id[:8]}...") instance = instances[0] else: - # Launch new instance + # Launch new instance (with optional GPU wait/retry) ssh_key = setup_lambda_ssh_key(client) - print(f"Launching {args.type}...") - instance = client.launch_instance( - instance_type=args.type, - ssh_key_names=[ssh_key], - name="openadapt-training", - ) - print(f"Instance launched: {instance.id[:8]}... at {instance.ip}") + gpu_wait = getattr(args, "gpu_wait", 0) + deadline = time_module.time() + gpu_wait * 60 + retry_interval = 60 # check every minute + + while True: + try: + print(f"Launching {args.type}...") + instance = client.launch_instance( + instance_type=args.type, + ssh_key_names=[ssh_key], + name="openadapt-training", + ) + print( + f"Instance launched: {instance.id[:8]}... at {instance.ip}" + ) + break + except RuntimeError as e: + if "No regions available" not in str(e) or gpu_wait <= 0: + raise + remaining = deadline - time_module.time() + if remaining <= 0: + print(f"Error: GPU not available after {gpu_wait} min wait") + return + mins_left = int(remaining / 60) + print( + f" GPU unavailable, retrying in {retry_interval}s " + f"({mins_left} min remaining)..." + ) + time_module.sleep(retry_interval) price_per_hour = INSTANCE_PRICES.get(instance.instance_type, 1.00) print(f" Instance type: {instance.instance_type} (~${price_per_hour:.2f}/hr)") diff --git a/openadapt_ml/scripts/train.py b/openadapt_ml/scripts/train.py index 46c9450..3541d80 100644 --- a/openadapt_ml/scripts/train.py +++ b/openadapt_ml/scripts/train.py @@ -208,6 +208,21 @@ def main( type=str, help="Path to JSONL training data (internal SFT format with images + messages).", ) + parser.add_argument( + "--demo-dir", + type=str, + help="Directory with annotated demo JSON files (auto-converts to JSONL bundle).", + ) + parser.add_argument( + "--captures-dir", + type=str, + help="Parent directory containing capture directories (for screenshot resolution).", + ) + parser.add_argument( + "--mapping", + type=str, + help="Pre-computed screenshot_mapping.json (optional with --demo-dir).", + ) parser.add_argument( "--use-unsloth", action="store_true", @@ -222,6 +237,25 @@ def main( # Determine effective flags use_unsloth = args.use_unsloth and not args.no_unsloth + # Auto-convert demos to JSONL if --demo-dir provided + jsonl_path = args.jsonl + if args.demo_dir and not jsonl_path: + from openadapt_ml.training.convert_demos import prepare_bundle + + if not args.captures_dir: + parser.error("--captures-dir is required with --demo-dir") + + print("=" * 50) + print("Converting demos to training bundle...") + print("=" * 50) + bundle_dir = prepare_bundle( + demo_dir=args.demo_dir, + captures_dir=args.captures_dir, + mapping_path=args.mapping, + ) + jsonl_path = str(bundle_dir / "training_data.jsonl") + print(f"Bundle ready: {jsonl_path}\n") + main( args.config, capture_path=args.capture, @@ -229,5 +263,5 @@ def main( output_dir=args.output_dir, open_dashboard=args.open, use_unsloth=use_unsloth, - jsonl_path=args.jsonl, + jsonl_path=jsonl_path, ) diff --git a/openadapt_ml/training/convert_demos.py b/openadapt_ml/training/convert_demos.py index ae45ea4..6b24f67 100644 --- a/openadapt_ml/training/convert_demos.py +++ b/openadapt_ml/training/convert_demos.py @@ -385,6 +385,82 @@ def convert_step( return sample +# --------------------------------------------------------------------------- +# Screenshot mapping generation +# --------------------------------------------------------------------------- + + +def generate_screenshot_mapping( + demo_dir: Path, + captures_dir: Path, +) -> dict[str, dict[str, str]]: + """Auto-generate screenshot mapping by scanning captures on disk. + + For each annotated demo, finds the matching capture directory and maps + step indices to screenshot file paths. Uses the filename pattern + ``capture_1_step_N.png`` in the capture's ``screenshots/`` folder. + + Args: + demo_dir: Directory containing annotated demo JSON files. + captures_dir: Parent directory containing capture directories + (e.g. ``/Users/.../oa/src/``). + + Returns: + Mapping of task_id -> {step_index_str -> absolute_screenshot_path}. + """ + mapping: dict[str, dict[str, str]] = {} + + demo_files = sorted(demo_dir.glob("*.json")) + for demo_path in demo_files: + with open(demo_path) as f: + demo = json.load(f) + + task_id = demo.get("task_id", demo_path.stem) + steps = demo.get("steps", []) + + # Find capture directory — try exact match then case-insensitive + capture_dir = captures_dir / task_id + if not capture_dir.is_dir(): + for d in captures_dir.iterdir(): + if d.is_dir() and d.name.lower() == task_id.lower(): + capture_dir = d + break + else: + print( + f" Warning: no capture directory for {task_id} in {captures_dir}", + file=sys.stderr, + ) + continue + + screenshots_dir = capture_dir / "screenshots" + if not screenshots_dir.is_dir(): + print( + f" Warning: no screenshots/ in {capture_dir}", + file=sys.stderr, + ) + continue + + # Map step indices to screenshot files + step_map: dict[str, str] = {} + for step in steps: + idx = step.get("step_index", 0) + png = screenshots_dir / f"capture_1_step_{idx}.png" + if png.exists(): + step_map[str(idx)] = str(png.resolve()) + else: + print( + f" Warning: missing screenshot for {task_id} step {idx}: {png}", + file=sys.stderr, + ) + + if step_map: + mapping[task_id] = step_map + print(f" {task_id}: {len(step_map)} screenshots") + + print(f"Generated mapping for {len(mapping)} tasks") + return mapping + + # --------------------------------------------------------------------------- # Screenshot resolution # --------------------------------------------------------------------------- @@ -636,6 +712,71 @@ def create_bundle( return jsonl_path +def prepare_bundle( + demo_dir: str | Path, + captures_dir: str | Path, + bundle_dir: str | Path | None = None, + mapping_path: str | Path | None = None, + include_thinking: bool = True, +) -> Path: + """End-to-end: generate mapping, convert demos, create bundle. + + This is the single-call entry point for the full conversion pipeline. + It auto-generates the screenshot mapping (or uses a pre-computed one), + converts all demos, and creates a self-contained bundle directory. + + Args: + demo_dir: Directory with annotated demo JSON files. + captures_dir: Parent directory containing capture directories. + bundle_dir: Output bundle directory (default: temp dir). + mapping_path: Optional pre-computed screenshot_mapping.json. + include_thinking: Include blocks in training data. + + Returns: + Path to the bundle directory containing training_data.jsonl + images/. + """ + import tempfile + + demo_dir = Path(demo_dir) + captures_dir = Path(captures_dir) + + if bundle_dir is None: + bundle_dir = Path(tempfile.mkdtemp(prefix="training_bundle_")) + else: + bundle_dir = Path(bundle_dir) + bundle_dir.mkdir(parents=True, exist_ok=True) + + # Step 1: Screenshot mapping + if mapping_path: + with open(mapping_path) as f: + screenshot_mapping = json.load(f) + print(f"Loaded screenshot mapping from {mapping_path}") + else: + print("Auto-generating screenshot mapping...") + screenshot_mapping = generate_screenshot_mapping(demo_dir, captures_dir) + # Save for reproducibility + mapping_out = bundle_dir / "screenshot_mapping.json" + with open(mapping_out, "w") as f: + json.dump(screenshot_mapping, f, indent=2) + print(f"Saved mapping to {mapping_out}") + + # Step 2: Convert demos + samples = convert_all_demos( + demo_dir=demo_dir, + captures_dir=captures_dir, + screenshot_mapping=screenshot_mapping, + include_thinking=include_thinking, + ) + + if not samples: + raise ValueError(f"No training samples produced from {demo_dir}") + + # Step 3: Create bundle + create_bundle(samples, bundle_dir) + + return bundle_dir + + def main() -> int: """CLI entry point.""" parser = argparse.ArgumentParser( diff --git a/pyproject.toml b/pyproject.toml index 788609b..ba68d9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openadapt-ml" -version = "0.8.0" +version = "0.9.0" description = "Model-agnostic, domain-agnostic ML engine for GUI automation agents" readme = "README.md" requires-python = ">=3.10" diff --git a/tests/test_convert_demos.py b/tests/test_convert_demos.py index c2dc603..b2f41d7 100644 --- a/tests/test_convert_demos.py +++ b/tests/test_convert_demos.py @@ -28,6 +28,8 @@ _validate_demo, convert_step, create_bundle, + generate_screenshot_mapping, + prepare_bundle, ) @@ -366,3 +368,161 @@ def test_bundle_missing_image(self, tmp_path): with open(jsonl_path) as f: line = json.loads(f.readline()) assert line["images"] == [] + + +class TestGenerateScreenshotMapping: + """Test auto-generation of screenshot mapping from captures on disk.""" + + def _make_capture(self, captures_dir, task_id, num_steps): + """Create a fake capture directory with screenshots.""" + cap_dir = captures_dir / task_id / "screenshots" + cap_dir.mkdir(parents=True) + for i in range(num_steps): + (cap_dir / f"capture_1_step_{i}.png").write_bytes(b"fake") + return cap_dir + + def _make_demo(self, demo_dir, task_id, num_steps): + """Create a fake annotated demo JSON.""" + demo = { + "task_id": task_id, + "instruction": "Do something", + "steps": [ + {"step_index": i, "action_raw": f"CLICK(0.{i}, 0.{i})"} + for i in range(num_steps) + ], + } + demo_path = demo_dir / f"{task_id}.json" + demo_path.write_text(json.dumps(demo)) + return demo_path + + def test_basic_mapping(self, tmp_path): + demos = tmp_path / "demos" + demos.mkdir() + captures = tmp_path / "captures" + captures.mkdir() + + self._make_demo(demos, "task-1", 3) + self._make_capture(captures, "task-1", 3) + + mapping = generate_screenshot_mapping(demos, captures) + assert "task-1" in mapping + assert len(mapping["task-1"]) == 3 + for i in range(3): + assert str(i) in mapping["task-1"] + + def test_missing_capture_warns(self, tmp_path): + demos = tmp_path / "demos" + demos.mkdir() + captures = tmp_path / "captures" + captures.mkdir() + + self._make_demo(demos, "task-missing", 2) + # No capture dir created + + mapping = generate_screenshot_mapping(demos, captures) + assert "task-missing" not in mapping + + def test_missing_screenshot_warns(self, tmp_path): + demos = tmp_path / "demos" + demos.mkdir() + captures = tmp_path / "captures" + captures.mkdir() + + self._make_demo(demos, "task-2", 3) + self._make_capture(captures, "task-2", 2) # only 2 screenshots for 3 steps + + mapping = generate_screenshot_mapping(demos, captures) + assert "task-2" in mapping + assert len(mapping["task-2"]) == 2 # only 2 found + + def test_multiple_tasks(self, tmp_path): + demos = tmp_path / "demos" + demos.mkdir() + captures = tmp_path / "captures" + captures.mkdir() + + for tid in ["a", "b", "c"]: + self._make_demo(demos, tid, 2) + self._make_capture(captures, tid, 2) + + mapping = generate_screenshot_mapping(demos, captures) + assert len(mapping) == 3 + + +class TestPrepareBundle: + """Test end-to-end prepare_bundle pipeline.""" + + def test_prepare_bundle_auto_mapping(self, tmp_path): + demos = tmp_path / "demos" + demos.mkdir() + captures = tmp_path / "captures" + captures.mkdir() + + # Create demo + capture + demo = { + "task_id": "t1", + "instruction": "Click the button", + "steps": [ + { + "step_index": 0, + "action_raw": "CLICK(0.5, 0.5)", + "observation": "I see a button", + "intent": "Click it", + } + ], + } + (demos / "t1.json").write_text(json.dumps(demo)) + ss_dir = captures / "t1" / "screenshots" + ss_dir.mkdir(parents=True) + (ss_dir / "capture_1_step_0.png").write_bytes(b"fake png") + + bundle_dir = tmp_path / "bundle" + result = prepare_bundle(demos, captures, bundle_dir=bundle_dir) + + assert result == bundle_dir + assert (bundle_dir / "training_data.jsonl").exists() + assert (bundle_dir / "images").is_dir() + assert (bundle_dir / "screenshot_mapping.json").exists() + + # Verify JSONL content + with open(bundle_dir / "training_data.jsonl") as f: + sample = json.loads(f.readline()) + assert sample["images"] == ["images/capture_1_step_0.png"] + assert sample["messages"][0]["role"] == "system" + + def test_prepare_bundle_with_mapping(self, tmp_path): + demos = tmp_path / "demos" + demos.mkdir() + captures = tmp_path / "captures" + captures.mkdir() + + demo = { + "task_id": "t2", + "instruction": "Type hello", + "steps": [{"step_index": 0, "action_raw": 'TYPE("hello")'}], + } + (demos / "t2.json").write_text(json.dumps(demo)) + + # Create image and pre-computed mapping + img = tmp_path / "img.png" + img.write_bytes(b"fake") + mapping = {"t2": {"0": str(img)}} + mapping_path = tmp_path / "mapping.json" + mapping_path.write_text(json.dumps(mapping)) + + bundle_dir = tmp_path / "bundle" + prepare_bundle(demos, captures, bundle_dir=bundle_dir, mapping_path=mapping_path) + + assert (bundle_dir / "training_data.jsonl").exists() + # Should NOT have screenshot_mapping.json since we provided one + assert not (bundle_dir / "screenshot_mapping.json").exists() + + def test_prepare_bundle_no_samples_raises(self, tmp_path): + demos = tmp_path / "demos" + demos.mkdir() + captures = tmp_path / "captures" + captures.mkdir() + # Empty demo dir — no JSON files + + with pytest.raises(ValueError, match="No training samples"): + prepare_bundle(demos, captures, bundle_dir=tmp_path / "bundle")