stdout logging to stdout.log

naimenz · naimenz · commit 76e05207f3c6 · 2025-07-09T15:02:22.000-07:00
diff --git a/examples/stdout_logging/test_stdout_integration.py b/examples/stdout_logging/test_stdout_integration.py
@@ -0,0 +1,23 @@
+import time
+
+import mlflow
+
+mlflow.set_tracking_uri("http://127.0.0.1:5002")
+
+if __name__ == "__main__":
+    mlflow.set_experiment("stdout_test")
+
+    print("Testing stdout logging integration...")
+
+    with mlflow.start_run(log_stdout=True, log_stdout_interval=3) as run:
+        print(f"MLflow Run ID: {run.info.run_id}")
+        print("This should appear in both terminal and MLflow!")
+
+        N_LOGS = 30
+        for i in range(N_LOGS):
+            print(f"Message {i + 1}/{N_LOGS}")
+            time.sleep(1)
+
+        print("Test completed!")
+
+    print("This message should only appear in terminal (run has ended)")
diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py
@@ -105,6 +105,7 @@
 
 
 run_id_to_system_metrics_monitor = {}
+run_id_to_stdout_logger = {}
 
 
 _active_run_stack = ThreadLocalVariable(default_factory=lambda: [])
@@ -267,6 +268,8 @@ def start_run(
     tags: Optional[dict[str, Any]] = None,
     description: Optional[str] = None,
     log_system_metrics: Optional[bool] = None,
+    log_stdout: Optional[bool] = None,
+    log_stdout_interval: int = 5,
 ) -> ActiveRun:
     """
     Start a new MLflow run, setting it as the active run under which metrics and parameters
@@ -309,6 +312,11 @@ def start_run(
             to MLflow, e.g., cpu/gpu utilization. If None, we will check environment variable
             `MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING` to determine whether to log system metrics.
             System metrics logging is an experimental feature in MLflow 2.8 and subject to change.
+        log_stdout: bool, defaults to None. If True, stdout will be captured and periodically
+            logged to MLflow as an artifact named 'stdout.log'. If False, stdout logging is
+            disabled. If None, stdout logging is disabled by default.
+        log_stdout_interval: int, defaults to 5. The interval in seconds at which to log
+            the captured stdout to MLflow. Only used when log_stdout is True.
 
     Returns:
         :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping the
@@ -502,6 +510,19 @@ def start_run(
             _logger.error(f"Failed to start system metrics monitoring: {e}.")
 
     active_run_stack.append(ActiveRun(active_run_obj))
+
+    if log_stdout:
+        try:
+            from mlflow.utils.stdout_logging import log_stdout_stream
+
+            # Create a context manager that will be entered when the ActiveRun is used
+            stdout_logger = log_stdout_stream(interval_seconds=log_stdout_interval)
+            run_id_to_stdout_logger[active_run_obj.info.run_id] = stdout_logger
+            # Start the stdout logging
+            stdout_logger.__enter__()
+        except Exception as e:
+            _logger.error(f"Failed to start stdout logging: {e}.")
+
     return active_run_stack[-1]
 
 
@@ -548,6 +569,12 @@ def end_run(status: str = RunStatus.to_string(RunStatus.FINISHED)) -> None:
         if last_active_run_id in run_id_to_system_metrics_monitor:
             system_metrics_monitor = run_id_to_system_metrics_monitor.pop(last_active_run_id)
             system_metrics_monitor.finish()
+        if last_active_run_id in run_id_to_stdout_logger:
+            stdout_logger = run_id_to_stdout_logger.pop(last_active_run_id)
+            try:
+                stdout_logger.__exit__(None, None, None)
+            except Exception as e:
+                _logger.error(f"Failed to stop stdout logging: {e}.")
 
 
 def _safe_end_run():
diff --git a/mlflow/utils/stdout_logging.py b/mlflow/utils/stdout_logging.py
@@ -0,0 +1,97 @@
+import sys
+import threading
+import time
+from contextlib import contextmanager
+from io import StringIO
+
+import mlflow
+
+
+class TeeStringIO:
+    """A file-like object that writes to both original stdout and a StringIO buffer."""
+
+    def __init__(self, original_stdout, string_buffer):
+        self.original_stdout = original_stdout
+        self.string_buffer = string_buffer
+
+    def write(self, data):
+        # Write to both original stdout and our buffer
+        self.original_stdout.write(data)
+        self.string_buffer.write(data)
+        return len(data)
+
+    def flush(self):
+        self.original_stdout.flush()
+        self.string_buffer.flush()
+
+    def __getattr__(self, name):
+        # Delegate other attributes to original stdout
+        return getattr(self.original_stdout, name)
+
+
+@contextmanager
+def log_stdout_stream(interval_seconds=5):
+    """
+    A context manager to stream stdout to an MLflow artifact.
+
+    This context manager redirects `sys.stdout` to an in-memory buffer.
+    A background thread periodically flushes this buffer and logs its
+    contents to an MLflow artifact file named 'stdout.log'.
+
+    Args:
+        interval_seconds (int): The interval in seconds at which to log
+                                the stdout buffer to MLflow.
+
+    Example:
+        import time
+        import mlflow
+
+        with mlflow.start_run():
+            with log_stdout_stream():
+                print("This is the start of my script.")
+                time.sleep(6)
+                print("This message will appear in the first log upload.")
+                time.sleep(6)
+                print("And this will be in the second.")
+            # The context manager will automatically handle final log upload
+            # and cleanup.
+        print("Stdout is now back to normal.")
+    """
+    if not mlflow.active_run():
+        raise RuntimeError("An active MLflow run is required to stream stdout.")
+
+    original_stdout = sys.stdout
+    stdout_buffer = StringIO()
+    tee_stdout = TeeStringIO(original_stdout, stdout_buffer)
+    sys.stdout = tee_stdout
+
+    stop_event = threading.Event()
+    log_thread = None
+
+    def _log_loop():
+        while not stop_event.is_set():
+            time.sleep(interval_seconds)
+            _log_current_stdout()
+
+    def _log_current_stdout():
+        content = stdout_buffer.getvalue()
+
+        if content:
+            mlflow.log_text(content, "stdout.log")
+
+    try:
+        log_thread = threading.Thread(target=_log_loop, name="mlflow-stdout-logging")
+        log_thread.daemon = True
+        log_thread.start()
+        yield
+    finally:
+        if log_thread:
+            stop_event.set()
+            log_thread.join()
+
+        # Final flush and log to capture any remaining output
+        _log_current_stdout()
+
+        # Restore stdout
+        sys.stdout = original_stdout
+        stdout_buffer.close()