Skip to content

Commit

Permalink
Update docs and examples to reflect model_uri changes (mlflow#1254)
Browse files Browse the repository at this point in the history
* Change 'model_path' param to 'model_uri'

* Flower classifier updates for UDF evaluation

* Fixes
  • Loading branch information
dbczumar authored May 30, 2019
1 parent 267dcc0 commit 2844431
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docs/source/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ accepts the following data formats as input:
exist_okay=True)
# Build an Azure ML container image for deployment
azure_image, azure_model = mlflow.azureml.build_image(model_path="<path-to-model>",
azure_image, azure_model = mlflow.azureml.build_image(model_uri="<path-to-model>",
workspace=azure_workspace,
description="Wine regression model 1",
synchronous=True)
Expand Down
4 changes: 2 additions & 2 deletions examples/flower_classifier/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,14 @@ run_id ``101``.
.. code-block:: bash
# score the deployed model
python score_images_rest.py --port 54321 http://127.0.0.1 ./my_images_to_score
python score_images_rest.py --model-uri runs:/101/model --port 54321 http://127.0.0.1 --data-path /path/to/images/for/scoring
- To test batch scoring in Spark, run score_images_spark.py to score the model in Spark like this:

.. code-block:: bash
python score_images_spark.py ./my_images_to_score model --run-id 101
python score_images_spark.py --model-uri runs:/101/model --data-path /path/to/images/for/scoring
Expand Down
10 changes: 6 additions & 4 deletions examples/flower_classifier/score_images_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import click
import pandas as pd

from mlflow.utils import cli_args


def score_model(path, uri, port):
"""
Expand Down Expand Up @@ -48,14 +50,14 @@ def read_image(x):

@click.command(help="Score images.")
@click.option("--port", type=click.INT, default=80, help="Port at which the model is deployed.")
@click.argument("model_uri")
@click.argument("input_data_path")
def run(input_data_path, model_uri, port):
@cli_args.MODEL_URI
@click.argument("--data-path", "-d")
def run(data_path, model_uri, port):
"""
Score images with MLflow deployed deployed at given uri and port and print out the response
to standard out.
"""
print(score_model(input_data_path, model_uri, port).text)
print(score_model(data_path, model_uri, port).text)


if __name__ == '__main__':
Expand Down
17 changes: 8 additions & 9 deletions examples/flower_classifier/score_images_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import mlflow
import mlflow.pyfunc
from mlflow.utils import cli_args

from pyspark.sql.types import *
from pyspark.sql.types import Row
Expand All @@ -36,24 +37,23 @@ def read_images(spark, filenames):
image=read_image_bytes_base64(x))).toDF(schema=schema)


def score_model(spark, data_path, model_path, model_run_id=None):
def score_model(spark, data_path, model_uri):
if os.path.isdir(data_path):
filenames = [os.path.abspath(os.path.join(data_path, x)) for x in os.listdir(data_path)
if os.path.isfile(os.path.join(data_path, x))]
else:
filenames = [data_path]

image_classifier_udf = mlflow.pyfunc.spark_udf(spark=spark,
path=model_path,
run_id=model_run_id,
model_uri=model_uri,
result_type=ArrayType(StringType()))

image_df = read_images(spark, filenames)

raw_preds = image_df.withColumn("prediction", image_classifier_udf("image")).select(
["filename", "prediction"]).toPandas()
# load the pyfunc model to get our domain
pyfunc_model = mlflow.pyfunc.load_pyfunc(model_path, run_id=model_run_id)
pyfunc_model = mlflow.pyfunc.load_pyfunc(model_uri=model_uri)
preds = pd.DataFrame(raw_preds["filename"], index=raw_preds.index)
preds[pyfunc_model._column_names] = pd.DataFrame(raw_preds['prediction'].values.tolist(),
columns=pyfunc_model._column_names,
Expand All @@ -68,18 +68,17 @@ def score_model(spark, data_path, model_path, model_run_id=None):


@click.command(help="Score images.")
@click.option("--run-id", type=click.STRING, default=None, help="MLflow run id")
@click.argument("data_path")
@click.argument("model_path")
def run(data_path, model_path, run_id):
@cli_args.MODEL_URI
@click.argument("--data-path", "-d")
def run(model_uri, data_path):
with pyspark.sql.SparkSession.builder \
.config(key="spark.python.worker.reuse", value=True) \
.config(key="spark.ui.enabled", value=False) \
.master("local-cluster[2, 1, 1024]") \
.getOrCreate() as spark:
# ignore spark log output
spark.sparkContext.setLogLevel("OFF")
print(score_model(spark, data_path, model_path, run_id))
print(score_model(spark, data_path, model_uri))


if __name__ == '__main__':
Expand Down

0 comments on commit 2844431

Please sign in to comment.