sararob · sararob · Sep 1, 2022 · Sep 1, 2022 · Sep 7, 2022 · Sep 14, 2022
diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
@@ -91,6 +91,7 @@
 )
 get_experiment_df = metadata.metadata._experiment_tracker.get_experiment_df
 start_run = metadata.metadata._experiment_tracker.start_run
+autolog = metadata.metadata._experiment_tracker.autolog
 start_execution = metadata.metadata._experiment_tracker.start_execution
 log = metadata.metadata._experiment_tracker.log
 log_time_series_metrics = metadata.metadata._experiment_tracker.log_time_series_metrics

diff --git a/google/cloud/aiplatform/metadata/metadata.py b/google/cloud/aiplatform/metadata/metadata.py
@@ -15,6 +15,9 @@
 # limitations under the License.
 #
 
+import logging
+
+from typing import Dict, Union, Optional, Any
 from typing import Dict, Union, Optional, Any, List
 
 from google.api_core import exceptions
@@ -239,9 +242,20 @@ def set_experiment(
 
         self._experiment = experiment
 
+    def _initialize_mlflow_and_start_run(setting: str):
+        import mlflow
+
+        # supress mlflow logs
+        mlflow_logger = logging.getLogger('mlflow')
+        mlflow_logger.setLevel(logging.ERROR)
+
+        mlflow.set_tracking_uri(f"vertex-mlflow-plugin://{setting}")
+        mlflow.autolog()
+
     def start_run(
         self,
         run: str,
+        autolog: bool = False,
         *,
         tensorboard: Union[tensorboard_resource.Tensorboard, str, None] = None,
         resume=False,
@@ -306,11 +320,30 @@ def start_run(
 
         else:
             self._experiment_run = experiment_run_resource.ExperimentRun.create(
-                run_name=run, experiment=self._experiment, tensorboard=tensorboard
+                run_name=run,
+                experiment=self._experiment,
+                tensorboard=tensorboard,
             )
-
+            if autolog:
+                try:
+                    import mlflow
+                except ImportError:
+                    raise ImportError(
+                        f"MLFlow is not installed. Please install MLFlow to use autologging in Vertex Experiments."
+                    )
+                _ExperimentTracker._initialize_mlflow_and_start_run(setting="run_scoped_autolog")
         return self._experiment_run
 
+    def autolog(self):
+        try:
+            import mlflow
+        except ImportError:
+            raise ImportError(
+                f"MLFlow is not installed. Please install MLFlow to use autologging in Vertex Experiments."
+            )
+
+        _ExperimentTracker._initialize_mlflow_and_start_run(setting="global_autolog")
+
     def end_run(self, state: gapic.Execution.State = gapic.Execution.State.COMPLETE):
         """Ends the the current experiment run.
 

diff --git a/google/cloud/aiplatform/mlflow_plugin/plugin_src/__init__.py b/google/cloud/aiplatform/mlflow_plugin/plugin_src/__init__.py
diff --git a/google/cloud/aiplatform/mlflow_plugin/plugin_src/file_store.py b/google/cloud/aiplatform/mlflow_plugin/plugin_src/file_store.py
@@ -0,0 +1,84 @@
+from mlflow.store.tracking import file_store
+
+from google.cloud.aiplatform.compat.types import execution as gca_execution
+
+import uuid
+from google.cloud import aiplatform
+
+
+class VertexMlflowTracking(file_store.FileStore):
+    """FileStore provided through entrypoints system"""
+
+    def __init__(self, store_uri=None, artifact_uri=None):
+        self.autolog_setting = store_uri.split('/')[2]
+
+        current_experiment = aiplatform.metadata.metadata._experiment_tracker.experiment.name
+        current_run = aiplatform.metadata.metadata._experiment_tracker.experiment_run
+
+        self.vertex_experiment = current_experiment
+        self.vertex_experiment_run = current_run
+
+        super(VertexMlflowTracking, self).__init__()
+
+    def create_run(self, experiment_id, user_id, start_time, tags):
+        framework = ""
+
+        for tag in tags:
+            if tag.key == "mlflow.autologging":
+                framework = tag.value
+
+        # Create a new run for the user only if they've called aiplatform.autolog()
+        if self.autolog_setting == "global_autolog":
+            new_vertex_run = f"{framework}-{uuid.uuid4()}"
+            self.vertex_experiment_run = aiplatform.start_run(run=new_vertex_run)
+
+        return super().create_run(experiment_id, user_id, start_time, tags)
+
+    def update_run_info(self, run_id, run_status, end_time):
+
+        # a run_status of 3 means the run has finished
+        # see here: https://www.mlflow.org/docs/latest/python_api/mlflow.entities.html#mlflow.entities.RunStatus
+        if run_status == 3 and self.autolog_setting == "global_autolog":
+            self.vertex_experiment_run.end_run()
+
+        return super().update_run_info(run_id, run_status, end_time)
+
+
+
+    def log_batch(self, run_id, metrics, params, tags):
+        summary_metrics = {}
+        params = {}
+        time_series_metrics = {}
+
+        if len(metrics) > 0:
+            for metric in metrics:
+                if metric.step:
+                    if metric.step not in summary_metrics:
+                        time_series_metrics[metric.step] = {metric.key:metric.value}
+                    else:
+                        time_series_metrics[metric.step][metric.key] = metric.value
+                else:
+                    summary_metrics[metric.key] = metric.value
+
+        if len(params) > 0:
+            for param in params:
+                params[param.key] = param.value
+
+        if summary_metrics:
+            self.vertex_experiment_run.log_metrics(metrics=summary_metrics)
+
+        # TODO: if there are ts metrics but no summary metrics, should we log the metrics from the last step as summary metrics?
+        # if summary_metrics is None and time_series_metrics is not None:
+
+        if params:
+            self.vertex_experiment_run.log_params(params=params)
+
+        if time_series_metrics:
+            for step, ts_metrics in time_series_metrics.items():
+                aiplatform.log_time_series_metrics(ts_metrics, step)
+
+    def log_metric(self, run_id, metric):
+
+        print('in log metric')
+        return self.log_batch(run_id, metric)
+
diff --git a/setup.py b/setup.py
@@ -119,6 +119,7 @@
         "console_scripts": [
             "tb-gcp-uploader=google.cloud.aiplatform.tensorboard.uploader_main:run_main"
         ],
+        "mlflow.tracking_store": "vertex-mlflow-plugin=google.cloud.aiplatform.mlflow_plugin.plugin_src.file_store:VertexMlflowTracking",
     },
     namespace_packages=("google", "google.cloud"),
     author="Google LLC",