[SkinnyClient] Remove pandas and numpy dependency (mlflow#3972)

* Remove numpy and pandas dependencies from the MLflow Skinny client Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Flip the condition to skip if is skinny Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Remove flavor changes Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix __all__ with new change Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Move autolog imports into autolog Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Avoid mlflow.azureml import Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Undo black changes for lines shorter than 100 chars Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Move in pyfunc import in sagemaker deploy code Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Avoid containers import Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Verify as_pandas=False behaves as expected Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Undo sagemaker and azureml deploy changes and condition cli import for both built in deployment plugins Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix format issue Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Move imports into the try catch else Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix bug in brackets Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Simplify equality check Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Add view type support to run and verify Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Avoid keyword argument for client.search_runs Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix view type for deleted tests Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Avoid setting view type to null Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix param passing Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Run formatter Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix mlflow/__init__.py formatting Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Add pylint noqa equivalent Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Resolve lint issues with pylint and flake8 ignores Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Address most comments Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Resolve skinny test build issues Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Undo format change Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Move test_runs input into csv gen Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Add pytest import Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Add output_format instead of as_pandas Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Use direct imports Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Clean up the string for the ValueError message Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix doc issue Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Resolve test import issue Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Add time to search_runs check Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Fix formatting Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Add pandas import for skinyn skipped test Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Cleanup docs, simplify tests, address formatting comments Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Resolve missing param for pandas only test Signed-off-by: Eduardo de Leon <eddeleon@microsoft.com> * Doc tweaks Co-authored-by: dbczumar <39497902+dbczumar@users.noreply.github.com>
simonvanbernem · Jan 27, 2021 · d1a2647 · d1a2647
1 parent 7a6c245
commit d1a2647
Show file tree

Hide file tree

Showing 17 changed files with 326 additions and 166 deletions.
diff --git a/dev/run-python-skinny-tests.sh b/dev/run-python-skinny-tests.sh
@@ -13,8 +13,18 @@ err=0
 trap 'err=1' ERR
 export MLFLOW_SKINNY='true'
 
-pytest --verbose tests/test_skinny.py
+pytest --verbose tests/test_skinny_client_omits_sql_libs.py
+
+# After verifying skinny client does not include store specific requirements,
+# we are installing sqlalchemy store requirements as our example store for the test suite.
+# SQL Alchemy serves as a simple, fully featured option to test skinny client store scenarios.
 python -m pip install sqlalchemy alembic sqlparse
+
+# Given the example store does not delete dependencies, we verify non store related dependencies
+# after the example store setup. This verifies both the example store and skinny client do not add
+# unintended libraries.
+pytest --verbose tests/test_skinny_client_omits_data_science_libs.py
+
 pytest --verbose tests/test_runs.py
 pytest --verbose tests/tracking/test_client.py
 pytest --verbose tests/tracking/test_tracking.py

diff --git a/dev/skinny-requirements.txt b/dev/skinny-requirements.txt
@@ -1,5 +1,3 @@
-## Small test reqs
-scipy
 ## Test-only dependencies
 pytest==3.2.1
 pytest-cov==2.6.0

diff --git a/mlflow/__init__.py b/mlflow/__init__.py
@@ -46,22 +46,48 @@
 import mlflow.tracking as tracking  # noqa: E402
 
 # model flavors
-import mlflow.fastai as fastai  # noqa: E402
-import mlflow.gluon as gluon  # noqa: E402
-import mlflow.h2o as h2o  # noqa: E402
-import mlflow.keras as keras  # noqa: E402
-import mlflow.lightgbm as lightgbm  # noqa: E402
-import mlflow.mleap as mleap  # noqa: E402
-import mlflow.onnx as onnx  # noqa: E402
-import mlflow.pyfunc as pyfunc  # noqa: E402
-import mlflow.pytorch as pytorch  # noqa: E402
-import mlflow.sklearn as sklearn  # noqa: E402
-import mlflow.spacy as spacy  # noqa: E402
-import mlflow.spark as spark  # noqa: E402
-import mlflow.statsmodels as statsmodels  # noqa: E402
-import mlflow.tensorflow as tensorflow  # noqa: E402
-import mlflow.xgboost as xgboost  # noqa: E402
-import mlflow.shap as shap  # noqa: E402
+_model_flavors_supported = []
+try:
+    # pylint: disable=unused-import
+    import mlflow.fastai as fastai  # noqa: E402
+    import mlflow.gluon as gluon  # noqa: E402
+    import mlflow.h2o as h2o  # noqa: E402
+    import mlflow.keras as keras  # noqa: E402
+    import mlflow.lightgbm as lightgbm  # noqa: E402
+    import mlflow.mleap as mleap  # noqa: E402
+    import mlflow.onnx as onnx  # noqa: E402
+    import mlflow.pyfunc as pyfunc  # noqa: E402
+    import mlflow.pytorch as pytorch  # noqa: E402
+    import mlflow.sklearn as sklearn  # noqa: E402
+    import mlflow.spacy as spacy  # noqa: E402
+    import mlflow.spark as spark  # noqa: E402
+    import mlflow.statsmodels as statsmodels  # noqa: E402
+    import mlflow.tensorflow as tensorflow  # noqa: E402
+    import mlflow.xgboost as xgboost  # noqa: E402
+    import mlflow.shap as shap  # noqa: E402
+
+    _model_flavors_supported = [
+        "fastai",
+        "gluon",
+        "h2o",
+        "keras",
+        "lightgbm",
+        "mleap",
+        "onnx",
+        "pyfunc",
+        "pytorch",
+        "sklearn",
+        "spacy",
+        "spark",
+        "statsmodels",
+        "tensorflow",
+        "xgboost",
+        "shap",
+    ]
+except ImportError as e:
+    # We are conditional loading these commands since the skinny client does
+    # not support them due to the pandas and numpy dependencies of MLflow Models
+    pass
 
 
 _configure_mlflow_loggers(root_module_name=__name__)
@@ -149,21 +175,4 @@
     "set_registry_uri",
     "list_run_infos",
     "autolog",
-    # model flavors
-    "fastai",
-    "gluon",
-    "h2o",
-    "keras",
-    "lightgbm",
-    "mleap",
-    "onnx",
-    "pyfunc",
-    "pytorch",
-    "sklearn",
-    "spacy",
-    "spark",
-    "statsmodels",
-    "tensorflow",
-    "xgboost",
-    "shap",
-]
+] + _model_flavors_supported
diff --git a/mlflow/cli.py b/mlflow/cli.py
@@ -6,14 +6,11 @@
 import click
 from click import UsageError
 
-import mlflow.azureml.cli
 import mlflow.db
 import mlflow.experiments
-import mlflow.models.cli
 import mlflow.deployments.cli
 import mlflow.projects as projects
 import mlflow.runs
-import mlflow.sagemaker.cli
 import mlflow.store.artifact.cli
 from mlflow import tracking
 from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
@@ -453,14 +450,25 @@ def gc(backend_store_uri, run_ids):
         print("Run with ID %s has been permanently deleted." % str(run_id))
 
 
-cli.add_command(mlflow.models.cli.commands)
 cli.add_command(mlflow.deployments.cli.commands)
-cli.add_command(mlflow.sagemaker.cli.commands)
 cli.add_command(mlflow.experiments.commands)
 cli.add_command(mlflow.store.artifact.cli.commands)
-cli.add_command(mlflow.azureml.cli.commands)
 cli.add_command(mlflow.runs.commands)
 cli.add_command(mlflow.db.commands)
 
+try:
+    # pylint: disable=unused-import
+    import mlflow.models.cli
+    import mlflow.azureml.cli
+    import mlflow.sagemaker.cli
+
+    cli.add_command(mlflow.azureml.cli.commands)
+    cli.add_command(mlflow.sagemaker.cli.commands)
+    cli.add_command(mlflow.models.cli.commands)
+except ImportError as e:
+    # We are conditional loading these commands since the skinny client does
+    # not support them due to the pandas and numpy dependencies of MLflow Models
+    pass
+
 if __name__ == "__main__":
     cli()
diff --git a/mlflow/deployments/cli.py b/mlflow/deployments/cli.py
@@ -1,7 +1,6 @@
 import click
 import sys
 import json
-import pandas as pd
 from mlflow.utils import cli_args
 from mlflow.deployments import interface
 from mlflow.utils.proto_json_utils import NumpyEncoder, _get_jsonable_obj
@@ -249,6 +248,8 @@ def predict(target, name, input_path, output_path):
     """
     Predict the results for the deployed model for the given input(s)
     """
+    import pandas as pd
+
     df = pd.read_json(input_path)
     client = interface.get_deploy_client(target)
     result = client.predict(name, df)

diff --git a/mlflow/models/model.py b/mlflow/models/model.py
@@ -9,7 +9,6 @@
 
 import mlflow
 from mlflow.exceptions import MlflowException
-from mlflow.models.signature import ModelSignature
 from mlflow.utils.file_utils import TempDir
 from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
 
@@ -31,7 +30,7 @@ def __init__(
         run_id=None,
         utc_time_created=None,
         flavors=None,
-        signature: ModelSignature = None,
+        signature=None,  # ModelSignature
         saved_input_example_info: Dict[str, Any] = None,
         **kwargs
     ):
@@ -62,7 +61,7 @@ def add_flavor(self, name, **params):
         return self
 
     @property
-    def signature(self) -> Optional[ModelSignature]:
+    def signature(self):  # -> Optional[ModelSignature]
         return self._signature
 
     @signature.setter
@@ -115,6 +114,9 @@ def load(cls, path):
     @classmethod
     def from_dict(cls, model_dict):
         """Load a model from its YAML representation."""
+
+        from .signature import ModelSignature
+
         if "signature" in model_dict and isinstance(model_dict["signature"], dict):
             model_dict = model_dict.copy()
             model_dict["signature"] = ModelSignature.from_dict(model_dict["signature"])

diff --git a/mlflow/store/tracking/file_store.py b/mlflow/store/tracking/file_store.py
@@ -23,7 +23,6 @@
 from mlflow.entities.run_info import check_run_is_active, check_run_is_deleted
 from mlflow.exceptions import MlflowException, MissingConfigException
 import mlflow.protos.databricks_pb2 as databricks_pb2
-from mlflow.models import Model
 from mlflow.protos.databricks_pb2 import INTERNAL_ERROR, RESOURCE_DOES_NOT_EXIST
 from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH, SEARCH_MAX_RESULTS_THRESHOLD
 from mlflow.store.tracking.abstract_store import AbstractStore
@@ -865,6 +864,8 @@ def log_batch(self, run_id, metrics, params, tags):
             raise MlflowException(e, INTERNAL_ERROR)
 
     def record_logged_model(self, run_id, mlflow_model):
+        from mlflow.models import Model
+
         if not isinstance(mlflow_model, Model):
             raise TypeError(
                 "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'".format(

diff --git a/mlflow/store/tracking/sqlalchemy_store.py b/mlflow/store/tracking/sqlalchemy_store.py
@@ -7,7 +7,6 @@
 import sqlalchemy.sql.expression as sql
 
 from mlflow.entities.lifecycle_stage import LifecycleStage
-from mlflow.models import Model
 from mlflow.store.tracking import SEARCH_MAX_RESULTS_THRESHOLD
 from mlflow.store.db.db_types import MYSQL, MSSQL
 import mlflow.store.db.utils
@@ -755,6 +754,8 @@ def log_batch(self, run_id, metrics, params, tags):
             raise MlflowException(e, INTERNAL_ERROR)
 
     def record_logged_model(self, run_id, mlflow_model):
+        from mlflow.models import Model
+
         if not isinstance(mlflow_model, Model):
             raise TypeError(
                 "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'".format(

diff --git a/mlflow/tracking/_tracking_service/client.py b/mlflow/tracking/_tracking_service/client.py
@@ -7,7 +7,6 @@
 import time
 import os
 
-from mlflow.models import Model
 from mlflow.store.tracking import SEARCH_MAX_RESULTS_DEFAULT
 from mlflow.tracking._tracking_service import utils
 from mlflow.utils.validation import (
@@ -246,6 +245,8 @@ def log_batch(self, run_id, metrics=(), params=(), tags=()):
         self.store.log_batch(run_id=run_id, metrics=metrics, params=params, tags=tags)
 
     def _record_logged_model(self, run_id, mlflow_model):
+        from mlflow.models import Model
+
         if not isinstance(mlflow_model, Model):
             raise TypeError(
                 "Argument 'mlflow_model' should be of type mlflow.models.Model but was "