|
105 | 105 |
|
106 | 106 |
|
107 | 107 | run_id_to_system_metrics_monitor = {} |
| 108 | +run_id_to_stdout_logger = {} |
108 | 109 |
|
109 | 110 |
|
110 | 111 | _active_run_stack = ThreadLocalVariable(default_factory=lambda: []) |
@@ -267,6 +268,8 @@ def start_run( |
267 | 268 | tags: Optional[dict[str, Any]] = None, |
268 | 269 | description: Optional[str] = None, |
269 | 270 | log_system_metrics: Optional[bool] = None, |
| 271 | + log_stdout: Optional[bool] = None, |
| 272 | + log_stdout_interval: int = 5, |
270 | 273 | ) -> ActiveRun: |
271 | 274 | """ |
272 | 275 | Start a new MLflow run, setting it as the active run under which metrics and parameters |
@@ -309,6 +312,11 @@ def start_run( |
309 | 312 | to MLflow, e.g., cpu/gpu utilization. If None, we will check environment variable |
310 | 313 | `MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING` to determine whether to log system metrics. |
311 | 314 | System metrics logging is an experimental feature in MLflow 2.8 and subject to change. |
| 315 | + log_stdout: bool, defaults to None. If True, stdout will be captured and periodically |
| 316 | + logged to MLflow as an artifact named 'stdout.log'. If False, stdout logging is |
| 317 | + disabled. If None, stdout logging is disabled by default. |
| 318 | + log_stdout_interval: int, defaults to 5. The interval in seconds at which to log |
| 319 | + the captured stdout to MLflow. Only used when log_stdout is True. |
312 | 320 |
|
313 | 321 | Returns: |
314 | 322 | :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping the |
@@ -502,6 +510,19 @@ def start_run( |
502 | 510 | _logger.error(f"Failed to start system metrics monitoring: {e}.") |
503 | 511 |
|
504 | 512 | active_run_stack.append(ActiveRun(active_run_obj)) |
| 513 | + |
| 514 | + if log_stdout: |
| 515 | + try: |
| 516 | + from mlflow.utils.stdout_logging import log_stdout_stream |
| 517 | + |
| 518 | + # Create a context manager that will be entered when the ActiveRun is used |
| 519 | + stdout_logger = log_stdout_stream(interval_seconds=log_stdout_interval) |
| 520 | + run_id_to_stdout_logger[active_run_obj.info.run_id] = stdout_logger |
| 521 | + # Start the stdout logging |
| 522 | + stdout_logger.__enter__() |
| 523 | + except Exception as e: |
| 524 | + _logger.error(f"Failed to start stdout logging: {e}.") |
| 525 | + |
505 | 526 | return active_run_stack[-1] |
506 | 527 |
|
507 | 528 |
|
@@ -548,6 +569,12 @@ def end_run(status: str = RunStatus.to_string(RunStatus.FINISHED)) -> None: |
548 | 569 | if last_active_run_id in run_id_to_system_metrics_monitor: |
549 | 570 | system_metrics_monitor = run_id_to_system_metrics_monitor.pop(last_active_run_id) |
550 | 571 | system_metrics_monitor.finish() |
| 572 | + if last_active_run_id in run_id_to_stdout_logger: |
| 573 | + stdout_logger = run_id_to_stdout_logger.pop(last_active_run_id) |
| 574 | + try: |
| 575 | + stdout_logger.__exit__(None, None, None) |
| 576 | + except Exception as e: |
| 577 | + _logger.error(f"Failed to stop stdout logging: {e}.") |
551 | 578 |
|
552 | 579 |
|
553 | 580 | def _safe_end_run(): |
|
0 commit comments