Skip to content

Commit cc73131

Browse files
bpiwowarclaude
andcommitted
feat(tui): implement delete operations and update kill shortcut
- Add delete_job_safely to StateProvider with proper running job check - Add delete_experiment to WorkspaceStateProvider with cache clearing - Add DELETE_JOB_SAFELY and DELETE_EXPERIMENT RPC methods for SSH monitoring - Emit JobStateChangedEvent(state=unscheduled) on job deletion for TUI refresh - Change kill shortcut from 'k' to 'Ctrl+K' for consistency with Ctrl+D delete Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4efb19f commit cc73131

File tree

9 files changed

+213
-10
lines changed

9 files changed

+213
-10
lines changed

docs/source/interfaces.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Press `?` in the TUI to show the help screen with all shortcuts.
9797
| `Enter` | Select experiment |
9898
| `d` | Show experiment runs |
9999
| `Ctrl+d` | Delete experiment |
100-
| `k` | Kill all running jobs |
100+
| `Ctrl+k` | Kill all running jobs |
101101
| `S` | Sort by status |
102102
| `D` | Sort by date |
103103

@@ -107,7 +107,7 @@ Press `?` in the TUI to show the help screen with all shortcuts.
107107
|-----|--------|
108108
| `l` | View job logs |
109109
| `Ctrl+d` | Delete job |
110-
| `k` | Kill running job |
110+
| `Ctrl+k` | Kill running job |
111111
| `/` | Open search filter |
112112
| `c` | Clear search filter |
113113
| `S` | Sort by status |

src/experimaestro/scheduler/remote/client.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,35 @@ def clean_job(self, job: BaseJob, perform: bool = False) -> bool:
702702
result = self._call_sync(RPCMethod.CLEAN_JOB, params)
703703
return result.get("success", False)
704704

705+
def delete_job_safely(self, job: BaseJob, perform: bool = True) -> tuple[bool, str]:
706+
"""Safely delete a job and its data via remote server"""
707+
if not perform:
708+
# Dry run - check if job can be deleted (not running)
709+
if job.state and job.state.running():
710+
return False, f"Cannot delete running job {job.identifier}"
711+
return True, f"Job {job.identifier} can be deleted"
712+
713+
params = {
714+
"job_id": job.identifier,
715+
"experiment_id": getattr(job, "experiment_id", ""),
716+
"run_id": getattr(job, "run_id", ""),
717+
"perform": True,
718+
}
719+
result = self._call_sync(RPCMethod.DELETE_JOB_SAFELY, params)
720+
return result.get("success", False), result.get("message", "")
721+
722+
def delete_experiment(
723+
self, experiment_id: str, delete_jobs: bool = False, perform: bool = True
724+
) -> tuple[bool, str]:
725+
"""Delete an experiment and optionally its job data via remote server"""
726+
params = {
727+
"experiment_id": experiment_id,
728+
"delete_jobs": delete_jobs,
729+
"perform": perform,
730+
}
731+
result = self._call_sync(RPCMethod.DELETE_EXPERIMENT, params)
732+
return result.get("success", False), result.get("message", "")
733+
705734
def get_process_info(self, job: BaseJob):
706735
"""Get process information for a job
707736

src/experimaestro/scheduler/remote/protocol.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class RPCMethod(str, Enum):
5858
GET_DEPENDENCIES_MAP = "get_dependencies_map"
5959
KILL_JOB = "kill_job"
6060
CLEAN_JOB = "clean_job"
61+
DELETE_JOB_SAFELY = "delete_job_safely"
62+
DELETE_EXPERIMENT = "delete_experiment"
6163
GET_SYNC_INFO = "get_sync_info"
6264
GET_PROCESS_INFO = "get_process_info"
6365

src/experimaestro/scheduler/remote/server.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ def __init__(
8282
RPCMethod.GET_DEPENDENCIES_MAP.value: self._handle_get_dependencies_map,
8383
RPCMethod.KILL_JOB.value: self._handle_kill_job,
8484
RPCMethod.CLEAN_JOB.value: self._handle_clean_job,
85+
RPCMethod.DELETE_JOB_SAFELY.value: self._handle_delete_job_safely,
86+
RPCMethod.DELETE_EXPERIMENT.value: self._handle_delete_experiment,
8587
RPCMethod.GET_SYNC_INFO.value: self._handle_get_sync_info,
8688
RPCMethod.GET_PROCESS_INFO.value: self._handle_get_process_info,
8789
}
@@ -445,3 +447,45 @@ def _handle_get_process_info(self, params: Dict) -> Optional[Dict]:
445447
"type": pinfo.type,
446448
"running": pinfo.running,
447449
}
450+
451+
def _handle_delete_job_safely(self, params: Dict) -> Dict:
452+
"""Handle delete_job_safely request"""
453+
job_id = params.get("job_id")
454+
experiment_id = params.get("experiment_id")
455+
run_id = params.get("run_id")
456+
perform = params.get("perform", True)
457+
458+
if not job_id or not experiment_id:
459+
raise TypeError("job_id and experiment_id are required")
460+
461+
# Get the job first
462+
job = self._state_provider.get_job(job_id, experiment_id, run_id)
463+
if job is None:
464+
return {"success": False, "message": "Job not found"}
465+
466+
# Delete the job safely
467+
try:
468+
success, message = self._state_provider.delete_job_safely(
469+
job, perform=perform
470+
)
471+
return {"success": success, "message": message}
472+
except Exception as e:
473+
return {"success": False, "message": str(e)}
474+
475+
def _handle_delete_experiment(self, params: Dict) -> Dict:
476+
"""Handle delete_experiment request"""
477+
experiment_id = params.get("experiment_id")
478+
delete_jobs = params.get("delete_jobs", False)
479+
perform = params.get("perform", True)
480+
481+
if not experiment_id:
482+
raise TypeError("experiment_id is required")
483+
484+
# Delete the experiment
485+
try:
486+
success, message = self._state_provider.delete_experiment(
487+
experiment_id, delete_jobs=delete_jobs, perform=perform
488+
)
489+
return {"success": success, "message": message}
490+
except Exception as e:
491+
return {"success": False, "message": str(e)}

src/experimaestro/scheduler/state_provider.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -312,13 +312,44 @@ def get_stray_jobs(self) -> List[BaseJob]:
312312
return [j for j in self.get_orphan_jobs() if j.state and j.state.running()]
313313

314314
def delete_job_safely(self, job: BaseJob, perform: bool = True) -> Tuple[bool, str]:
315-
"""Safely delete a job and its data"""
316-
return False, "Not implemented"
315+
"""Safely delete a job and its data
316+
317+
Only deletes jobs that are finished (not running). Uses clean_job
318+
for the actual deletion.
319+
320+
Args:
321+
job: The job to delete
322+
perform: If True, actually perform deletion; if False, just check
323+
324+
Returns:
325+
Tuple of (success, message)
326+
"""
327+
# Check if job is running - cannot delete running jobs
328+
if job.state and job.state.running():
329+
return False, f"Cannot delete running job {job.identifier}"
330+
331+
# Use clean_job for the actual deletion
332+
if self.clean_job(job, perform=perform):
333+
if perform:
334+
return True, f"Deleted job {job.identifier}"
335+
else:
336+
return True, f"Job {job.identifier} can be deleted"
337+
else:
338+
return False, f"Failed to delete job {job.identifier}"
317339

318340
def delete_experiment(
319-
self, experiment_id: str, perform: bool = True
341+
self, experiment_id: str, delete_jobs: bool = False, perform: bool = True
320342
) -> Tuple[bool, str]:
321-
"""Delete an experiment and all its data"""
343+
"""Delete an experiment and optionally its job data
344+
345+
Args:
346+
experiment_id: Experiment identifier to delete
347+
delete_jobs: If True, also delete job directories (default: False)
348+
perform: If True, actually perform deletion; if False, just check
349+
350+
Returns:
351+
Tuple of (success, message)
352+
"""
322353
return False, "Not implemented"
323354

324355
def cleanup_orphan_partials(self, perform: bool = False) -> List[str]:

src/experimaestro/scheduler/workspace_state_provider.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,21 @@ def clean_job(self, job: MockJob, perform: bool = False) -> bool:
11871187

11881188
if job.path.exists():
11891189
shutil.rmtree(job.path)
1190+
1191+
# Clear job from cache
1192+
with self._job_cache_lock:
1193+
self._job_cache.pop(job.identifier, None)
1194+
1195+
# Emit state change event (unscheduled = job removed)
1196+
from experimaestro.scheduler.state_status import JobStateChangedEvent
1197+
1198+
self._notify_state_listeners(
1199+
JobStateChangedEvent(
1200+
job_id=job.identifier,
1201+
state="unscheduled",
1202+
)
1203+
)
1204+
11901205
return True
11911206
except Exception as e:
11921207
logger.warning("Failed to clean job %s: %s", job.identifier, e)
@@ -1547,6 +1562,88 @@ def get_process_info(self, job: MockJob) -> Optional[ProcessInfo]:
15471562
except (json.JSONDecodeError, OSError):
15481563
return None
15491564

1565+
# =========================================================================
1566+
# Experiment deletion
1567+
# =========================================================================
1568+
1569+
def delete_experiment(
1570+
self, experiment_id: str, delete_jobs: bool = False, perform: bool = True
1571+
) -> tuple[bool, str]:
1572+
"""Delete an experiment and optionally its job data
1573+
1574+
Args:
1575+
experiment_id: Experiment identifier to delete
1576+
delete_jobs: If True, also delete job directories (default: False)
1577+
perform: If True, actually perform deletion; if False, just check
1578+
1579+
Returns:
1580+
Tuple of (success, message)
1581+
"""
1582+
import shutil
1583+
1584+
# Check for running jobs first
1585+
jobs = self.get_jobs(experiment_id=experiment_id)
1586+
running_jobs = [j for j in jobs if j.state and j.state.running()]
1587+
if running_jobs:
1588+
return False, f"Cannot delete: {len(running_jobs)} jobs are still running"
1589+
1590+
# Find experiment directories (v2 layout)
1591+
exp_dir = self.workspace_path / "experiments" / experiment_id
1592+
events_dir = self._experiments_dir / experiment_id
1593+
1594+
# Check for v1 layout
1595+
v1_exp_dir = self.workspace_path / "xp" / experiment_id
1596+
1597+
if not exp_dir.exists() and not v1_exp_dir.exists():
1598+
return False, f"Experiment {experiment_id} not found"
1599+
1600+
if not perform:
1601+
return True, f"Experiment {experiment_id} can be deleted"
1602+
1603+
errors = []
1604+
1605+
# Delete job directories if requested
1606+
if delete_jobs:
1607+
for job in jobs:
1608+
if job.path and job.path.exists():
1609+
try:
1610+
shutil.rmtree(job.path)
1611+
except OSError as e:
1612+
errors.append(f"Failed to delete job {job.identifier}: {e}")
1613+
1614+
# Delete v2 experiment directory
1615+
if exp_dir.exists():
1616+
try:
1617+
shutil.rmtree(exp_dir)
1618+
except OSError as e:
1619+
errors.append(f"Failed to delete experiment dir: {e}")
1620+
1621+
# Delete events directory
1622+
if events_dir.exists():
1623+
try:
1624+
shutil.rmtree(events_dir)
1625+
except OSError as e:
1626+
errors.append(f"Failed to delete events dir: {e}")
1627+
1628+
# Delete v1 experiment directory
1629+
if v1_exp_dir.exists():
1630+
try:
1631+
shutil.rmtree(v1_exp_dir)
1632+
except OSError as e:
1633+
errors.append(f"Failed to delete v1 experiment dir: {e}")
1634+
1635+
# Clear caches
1636+
self._clear_experiment_cache(experiment_id)
1637+
with self._job_cache_lock:
1638+
job_ids_to_remove = [j.identifier for j in jobs]
1639+
for job_id in job_ids_to_remove:
1640+
self._job_cache.pop(job_id, None)
1641+
1642+
if errors:
1643+
return False, f"Partial deletion: {'; '.join(errors)}"
1644+
1645+
return True, f"Deleted experiment {experiment_id}"
1646+
15501647
# =========================================================================
15511648
# Lifecycle
15521649
# =========================================================================

src/experimaestro/tui/dialogs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,14 @@ def compose(self) -> ComposeResult:
130130
Enter Select experiment
131131
d Show experiment runs
132132
Ctrl+d Delete experiment
133-
k Kill all running jobs
133+
Ctrl+k Kill all running jobs
134134
S Sort by status
135135
D Sort by date
136136
137137
[bold cyan]Jobs[/bold cyan]
138138
l View job logs
139139
Ctrl+d Delete job
140-
k Kill running job
140+
Ctrl+k Kill running job
141141
/ Open search filter
142142
c Clear search filter
143143
S Sort by status

src/experimaestro/tui/widgets/experiments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class ExperimentsList(Widget):
2727
BINDINGS = [
2828
Binding("d", "show_runs", "Runs"),
2929
Binding("ctrl+d", "delete_experiment", "Delete", show=False),
30-
Binding("k", "kill_experiment", "Kill", show=False),
30+
Binding("ctrl+k", "kill_experiment", "Kill", show=False),
3131
Binding("S", "sort_by_status", "Sort ⚑", show=False),
3232
Binding("D", "sort_by_date", "Sort Date", show=False),
3333
]

src/experimaestro/tui/widgets/jobs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ class JobsTable(Vertical):
456456

457457
BINDINGS = [
458458
Binding("ctrl+d", "delete_job", "Delete", show=False),
459-
Binding("k", "kill_job", "Kill", show=False),
459+
Binding("ctrl+k", "kill_job", "Kill", show=False),
460460
Binding("l", "view_logs", "Logs", key_display="l"),
461461
Binding("f", "copy_path", "Copy Path", show=False),
462462
Binding("/", "toggle_search", "Search"),

0 commit comments

Comments
 (0)