fix batch scoring

j-so · j-so · commit 44abcac49d1b · 2020-06-22T15:50:49.000-07:00
diff --git a/.pipelines/diabetes_regression-batchscoring-ci.yml b/.pipelines/diabetes_regression-batchscoring-ci.yml
@@ -49,11 +49,6 @@ stages:
     timeoutInMinutes: 0
     steps:
     - template: code-quality-template.yml
-    - template: diabetes_regression-get-model-id-artifact-template.yml
-      parameters:
-        projectId: '$(resources.pipeline.model-train-ci.projectID)'
-        pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)'
-        artifactBuildId: ${{ parameters.artifactBuildId }}
     - task: AzureCLI@1
       name: publish_batchscore
       inputs:
@@ -66,19 +61,32 @@ stages:
           # Invoke the Python building and publishing a training pipeline
           python -m ml_service.pipelines.diabetes_regression_build_parallel_batchscore_pipeline
  
+  - job: "Get_Model_Artifact"
+    displayName: "Get Model Artifact"
+    container: mlops
+    timeoutInMinutes: 0
+    steps:
+    - template: diabetes_regression-get-model-id-artifact-template.yml
+      parameters:
+        projectId: '$(resources.pipeline.model-train-ci.projectID)'
+        pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)'
+        artifactBuildId: ${{ parameters.artifactBuildId }}
+
   - job: "Run_Batch_Score_Pipeline"
     displayName: "Run Batch Scoring Pipeline"
-    dependsOn: "Build_Batch_Scoring_Pipeline"
+    dependsOn: ["Build_Batch_Scoring_Pipeline", "Get_Model_Artifact"]
     timeoutInMinutes: 240
     pool: server
     variables:
       pipeline_id: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['publish_batchscore.pipeline_id']]
+      model_name: $[ dependencies.Get_Model_Artifact.outputs['MODEL_NAME']]
+      model_version: $[ dependencies.Get_Model_Artifact.outputs['MODEL_VERSION']]
     steps:
     - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
       displayName: 'Invoke Batch Scoring pipeline'
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
         PipelineId: '$(pipeline_id)'
         ExperimentName: '$(EXPERIMENT_NAME)'
-        PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "model_version": "$(MODEL_VERSION)"}'
+        PipelineParameters: '"ParameterAssignments": {"model_name": "$(model_name)", "model_version": "$(model_version)"}'
       
diff --git a/diabetes_regression/scoring/parallel_batchscore.py b/diabetes_regression/scoring/parallel_batchscore.py
@@ -30,6 +30,7 @@
 import sys
 from typing import List
 from util.model_helper import get_model
+from azureml.core import Model
 
 model = None
 
@@ -64,13 +65,12 @@ def parse_args() -> List[str]:
         for idx, itm in enumerate(sys.argv)
         if itm == "--model_version"
     ]
-
-    if len(model_version_param) == 0:
-        raise ValueError(
-            "Model name is required but no model name parameter was passed to the script"  # NOQA: E501
-        )
-
-    model_version = model_version_param[0][1]
+    model_version = (
+        None
+        if len(model_version_param) < 1
+        or len(model_version_param[0][1].strip()) == 0  # NOQA: E501
+        else model_version_param[0][1]
+    )
 
     model_tag_name_param = [
         (sys.argv[idx], sys.argv[idx + 1])
@@ -107,15 +107,17 @@ def init():
     try:
         print("Initializing batch scoring script...")
 
+        # Get the model using name/version/tags filter
         model_filter = parse_args()
         amlmodel = get_model(
             model_name=model_filter[0],
             model_version=model_filter[1],
             tag_name=model_filter[2],
             tag_value=model_filter[3])
 
+        # Load the model using name/version found
         global model
-        modelpath = Model.get_model_path(model_name=model_filter[0])
+        modelpath = Model.get_model_path(model_name=amlmodel.name, version=amlmodel.version)
         model = joblib.load(modelpath)
         print("Loaded model {}".format(model_filter[0]))
     except Exception as ex:
diff --git a/diabetes_regression/util/model_helper.py b/diabetes_regression/util/model_helper.py
@@ -46,19 +46,19 @@ def get_model(
         print("No workspace defined - using current experiment workspace.")
         aml_workspace = get_current_workspace()
 
-    if tagname is not None and tagvalue is not None:
+    if tag_name is not None and tag_value is not None:
         model = AMLModel(
             aml_workspace,
             name=model_name,
             version=model_version,
             tags=[[tag_name, tag_value]])
-    elif (tagname is None and tagvalue is not None) or (
-        tagvalue is None and tagname is not None
+    elif (tag_name is None and tag_value is not None) or (
+        tag_value is None and tag_name is not None
     ):
         raise ValueError(
             "model_tag_name and model_tag_value should both be supplied"
             + "or excluded"  # NOQA: E501
         )
     else:
-        model = AMLModel(aml_workspace, name=env.model_name, version=env.model_version) # NOQA: E501
+        model = AMLModel(aml_workspace, name=model_name, version=model_version)  # NOQA: E501
     return model
diff --git a/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py b/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py
@@ -33,7 +33,6 @@
     Workspace,
     Dataset,
     Datastore,
-    Model,
     RunConfiguration,
 )
 from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
@@ -44,24 +43,6 @@
 from typing import Tuple
 
 
-def parse_args() -> Namespace:
-    """
-    Parse arguments supplied to the pipeline creation script.
-    The only allowed arguments are model_tag_name and model_tag_value
-    specifying a custom tag/value pair to help locate a specific model.
-
-
-    :returns: Namespace with two attributes model_tag_name and model_tag_value
-    and corresponding values
-
-    """
-    parser = ArgumentParser()
-    parser.add_argument("--model_tag_name", default=None, type=str)
-    parser.add_argument("--model_tag_value", default=None, type=str)
-    args = parser.parse_args()
-    return args
-
-
 def get_or_create_datastore(
     datastorename: str, ws: Workspace, env: Env, input: bool = True
 ) -> Datastore:
@@ -312,7 +293,6 @@ def get_scoring_pipeline(
     """
     Creates the scoring pipeline.
 
-    :param model: The model to use for scoring
     :param scoring_dataset: Data to score
     :param output_loc: Location to save the scoring results
     :param score_run_config: Parallel Run configuration to support
@@ -399,8 +379,6 @@ def build_batchscore_pipeline():
     try:
         env = Env()
 
-        args = parse_args()
-
         # Get Azure machine learning workspace
         aml_workspace = Workspace.get(
             name=env.workspace_name,