Remove default value for --columns_file cli-flag, this flag isn't use…

…d by all pipelines, and causes the cli tool to mistakenly log columns not used by the pipeline
dagardner-nv · Jun 13, 2023 · 7cd597c · 7cd597c
1 parent 11b627a
commit 7cd597c
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 25 deletions.
diff --git a/docs/source/cloud_deployment_guide.md b/docs/source/cloud_deployment_guide.md
@@ -636,7 +636,7 @@ helm install --set ngc.apiKey="$API_KEY" \
         --pipeline_batch_size=1024 \
         --model_max_batch_size=64 \
         --use_cpp=True \
-        pipeline-fil \
+        pipeline-fil --columns_file=data/columns_fil.txt \
           from-file --filename=./examples/data/nvsmi.jsonlines \
           monitor --description 'FromFile Rate' --smoothing=0.001 \
           deserialize \
@@ -661,7 +661,7 @@ helm install --set ngc.apiKey="$API_KEY" \
         --pipeline_batch_size=1024 \
         --model_max_batch_size=64 \
         --use_cpp=True \
-        pipeline-fil \
+        pipeline-fil --columns_file=data/columns_fil.txt \
           from-kafka --input_topic <YOUR_INPUT_KAFKA_TOPIC> --bootstrap_servers broker:9092 \
           monitor --description 'FromKafka Rate' --smoothing=0.001 \
           deserialize \

diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md
@@ -101,7 +101,7 @@ morpheus --log_level=DEBUG \
    `# Run a pipeline with 8 threads and a model batch size of 32 (Must be equal or less than Triton config)` \
    run --num_threads=8 --pipeline_batch_size=1024 --model_max_batch_size=1024 \
    `# Specify a NLP pipeline with 256 sequence length (Must match Triton config)` \
-   pipeline-fil \
+   pipeline-fil --columns_file=${MORPHEUS_ROOT}/morpheus/data/columns_fil.txt \
    `# 1st Stage: Read from file` \
    from-file --filename=examples/data/nvsmi.jsonlines \
    `# 2nd Stage: Deserialize from JSON strings to objects` \

diff --git a/morpheus/cli/commands.py b/morpheus/cli/commands.py
@@ -368,7 +368,6 @@ def pipeline_nlp(ctx: click.Context, **kwargs):
                     "A label file is a simple text file where each line corresponds to a label. "
                     "If unspecified the value specified by the --label flag will be used."))
 @click.option('--columns_file',
-              default="data/columns_fil.txt",
               type=MorpheusRelativePath(dir_okay=False, exists=True, file_okay=True, resolve_path=True),
               help=("Specifies a file to read column features."))
 @click.option('--viz_file',
@@ -413,8 +412,6 @@ def pipeline_fil(ctx: click.Context, **kwargs):
     if ("columns_file" in kwargs and kwargs["columns_file"] is not None):
         config.fil.feature_columns = load_labels_file(kwargs["columns_file"])
         logger.debug("Loaded columns. Current columns: [%s]", str(config.fil.feature_columns))
-    else:
-        raise ValueError('Unable to find columns file')
 
     from morpheus.pipeline import LinearPipeline
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -349,8 +349,9 @@ def test_pipeline_fil(self, config, callback_values):
         """
         Creates a pipeline roughly matching that of the abp validation test
         """
-        args = (GENERAL_ARGS + ['pipeline-fil'] + FILE_SRC_ARGS + ['deserialize', 'preprocess'] + INF_TRITON_ARGS +
-                MONITOR_ARGS + ['add-class'] + VALIDATE_ARGS + ['serialize'] + TO_FILE_ARGS)
+        args = (GENERAL_ARGS + ['pipeline-fil', '--columns_file=data/columns_fil.txt'] + FILE_SRC_ARGS +
+                ['deserialize', 'preprocess'] + INF_TRITON_ARGS + MONITOR_ARGS + ['add-class'] + VALIDATE_ARGS +
+                ['serialize'] + TO_FILE_ARGS)
 
         obj = {}
         runner = CliRunner()
@@ -419,23 +420,24 @@ def test_pipeline_fil_all(self, config, callback_values, tmp_path, mlflow_uri):
         with open(labels_file, 'w', encoding='UTF-8') as fh:
             fh.writelines(['frogs\n', 'lizards\n', 'toads'])
 
-        args = (GENERAL_ARGS + ['pipeline-fil', '--labels_file', labels_file] + FILE_SRC_ARGS + FROM_KAFKA_ARGS + [
-            'deserialize',
-            'filter',
-            'dropna',
-            '--column',
-            'xyz',
-            'preprocess',
-            'add-scores',
-            'unittest-conv-msg',
-            'inf-identity',
-            'inf-pytorch',
-            '--model_filename',
-            tmp_model,
-            'mlflow-drift',
-            '--tracking_uri',
-            mlflow_uri
-        ] + INF_TRITON_ARGS + MONITOR_ARGS + ['add-class'] + VALIDATE_ARGS + ['serialize'] + TO_FILE_ARGS +
+        args = (GENERAL_ARGS + ['pipeline-fil', '--labels_file', labels_file, '--columns_file=data/columns_fil.txt'] +
+                FILE_SRC_ARGS + FROM_KAFKA_ARGS + [
+                    'deserialize',
+                    'filter',
+                    'dropna',
+                    '--column',
+                    'xyz',
+                    'preprocess',
+                    'add-scores',
+                    'unittest-conv-msg',
+                    'inf-identity',
+                    'inf-pytorch',
+                    '--model_filename',
+                    tmp_model,
+                    'mlflow-drift',
+                    '--tracking_uri',
+                    mlflow_uri
+                ] + INF_TRITON_ARGS + MONITOR_ARGS + ['add-class'] + VALIDATE_ARGS + ['serialize'] + TO_FILE_ARGS +
                 TO_KAFKA_ARGS)
 
         obj = {}