Skip to content

Commit

Permalink
Move testing utilities into tests/_utils (nv-morpheus#1065)
Browse files Browse the repository at this point in the history
* Separate testing utilities into (`tests/_utils`) from tests for `morpheus.utils`
* This required updating the imports for most tests, which then required numerous pylint fixes
* Adopt updated version of yapf to fix parsing error (tuple definition with type hints raises lib2to3.pgen2.parse.ParseError: bad input google/yapf#1058)

fixes nv-morpheus#1061

Authors:
  - David Gardner (https://github.com/dagardner-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: nv-morpheus#1065
  • Loading branch information
dagardner-nv authored Aug 23, 2023
1 parent a343c77 commit 7ee9e8f
Show file tree
Hide file tree
Showing 87 changed files with 391 additions and 357 deletions.
3 changes: 2 additions & 1 deletion morpheus/stages/inference/inference_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ def _split_batches(x: MultiInferenceMessage, max_batch_size: int) -> typing.List
return out_resp

@staticmethod
def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[TensorMemory]]):
def _convert_response(
x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[TensorMemory]]) -> MultiResponseMessage:

# Convert a MultiInferenceMessage into a MultiResponseMessage
in_message = x[0]
Expand Down
12 changes: 12 additions & 0 deletions tests/utils/__init__.py → tests/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
import time
import types
import typing
from unittest import mock

import numpy as np
import pytest

from morpheus.io.deserializers import read_file_to_df
Expand Down Expand Up @@ -161,3 +163,13 @@ def import_or_skip(modname: str,
if fail_missing:
raise ImportError(e) from e
raise


def mk_async_infer(inf_results: np.ndarray) -> typing.Callable:
mock_infer_result = mock.MagicMock()
mock_infer_result.as_numpy.side_effect = inf_results

def async_infer(callback=None, **_):
callback(mock_infer_result, None)

return async_infer
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@

import cudf as cdf # rename to avoid clash with property method

from _utils import TEST_DIRS
from _utils import assert_results
from morpheus.io.deserializers import read_file_to_df
from morpheus.utils import compare_df
from morpheus.utils.type_aliases import DataFrameType
from utils import TEST_DIRS
from utils import assert_results


class DatasetManager:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import typing

from morpheus.messages import MultiInferenceMessage
from morpheus.messages import TensorMemory
from morpheus.messages.memory.tensor_memory import TensorMemory
from morpheus.stages.inference import inference_stage


Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/benchmarks/test_bench_e2e_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import pytest

from _utils import TEST_DIRS
from morpheus.config import Config
from morpheus.config import ConfigAutoEncoder
from morpheus.config import ConfigFIL
Expand All @@ -42,7 +43,6 @@
from morpheus.stages.preprocess.train_ae_stage import TrainAEStage
from morpheus.utils.file_utils import load_labels_file
from morpheus.utils.logger import configure_logging
from utils import TEST_DIRS

E2E_CONFIG_FILE = os.path.join(TEST_DIRS.morpheus_root, "tests/benchmarks/e2e_test_configs.json")
with open(E2E_CONFIG_FILE, 'r', encoding='UTF-8') as f:
Expand Down
8 changes: 4 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def _camouflage_is_running():
Whether or not we are using Camouflage or an actual Triton server
"""

from utils import TEST_DIRS
from _utils import TEST_DIRS

logger = logging.getLogger(f"morpheus.{__name__}")

Expand Down Expand Up @@ -869,7 +869,7 @@ def test something(dataset: DatasetManager):
Users who don't want to parametarize over the DataFrame should use the `dataset_pandas` or `dataset_cudf` fixtures.
"""
from utils import dataset_manager
from _utils import dataset_manager
yield dataset_manager.DatasetManager(df_type=df_type)


Expand All @@ -894,7 +894,7 @@ def test_something(dataset_pandas: DatasetManager):
expected_df = expected_df.rename(columns=dict(zip(expected_df.columns, class_labels)))
```
"""
from utils import dataset_manager
from _utils import dataset_manager
yield dataset_manager.DatasetManager(df_type='pandas')


Expand All @@ -908,7 +908,7 @@ def test_something(dataset_cudf: DatasetManager):
cdf = dataset_cudf["filter_probs.csv"]
pdf = dataset_cudf.pandas["filter_probs.csv"]
"""
from utils import dataset_manager
from _utils import dataset_manager
yield dataset_manager.DatasetManager(df_type='cudf')


Expand Down
82 changes: 40 additions & 42 deletions tests/dfencoder/test_autoencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@
import pytest
import torch

from _utils import TEST_DIRS
from _utils.dataset_manager import DatasetManager
from morpheus.config import AEFeatureScalar
from morpheus.models.dfencoder import ae_module
from morpheus.models.dfencoder import autoencoder
from morpheus.models.dfencoder import scalers
from morpheus.models.dfencoder.dataframe import EncoderDataFrame
from utils import TEST_DIRS
from utils.dataset_manager import DatasetManager

# pylint: disable=redefined-outer-name

# Only pandas and Python is supported
pytestmark = [pytest.mark.use_pandas, pytest.mark.use_python]
Expand All @@ -57,8 +55,8 @@
NUMERIC_COLS = ['eventID', 'ae_anomaly_score']


@pytest.fixture(scope="function")
def train_ae():
@pytest.fixture(name="train_ae", scope="function")
def train_ae_fixture():
"""
Construct an AutoEncoder instance with the same values used by `train_ae_stage`
"""
Expand All @@ -78,8 +76,8 @@ def train_ae():
)


@pytest.fixture(scope="function")
def train_df(dataset_pandas: DatasetManager) -> typing.Iterator[pd.DataFrame]:
@pytest.fixture(name="train_df", scope="function")
def train_df_fixture(dataset_pandas: DatasetManager) -> typing.Iterator[pd.DataFrame]:
yield dataset_pandas[os.path.join(TEST_DIRS.validation_data_dir, "dfp-cloudtrail-role-g-validation-data-input.csv")]


Expand Down Expand Up @@ -108,59 +106,59 @@ def test_ohe():
assert torch.equal(results, expected.to("cuda", copy=True)), f"{results} != {expected}"


def test_compute_embedding_size():
for (inp, expected) in [(0, 0), (5, 4), (20, 9), (40000, 600)]:
assert ae_module._compute_embedding_size(inp) == expected
@pytest.mark.parametrize("num_cats,expected", [(0, 0), (5, 4), (20, 9), (40000, 600)])
def test_compute_embedding_size(num_cats: int, expected: int):
assert ae_module._compute_embedding_size(num_cats) == expected


def test_complete_layer_constructor():
layer = ae_module.CompleteLayer(4, 5)
assert len(layer.layers) == 1
assert isinstance(layer.layers[0], torch.nn.Linear)
assert layer.layers[0].in_features == 4
assert layer.layers[0].out_features == 5

layer = ae_module.CompleteLayer(4, 5, activation='tanh')
assert len(layer.layers) == 2
assert layer.layers[1] is torch.tanh

layer = ae_module.CompleteLayer(4, 5, dropout=0.2)
assert len(layer.layers) == 2
assert isinstance(layer.layers[1], torch.nn.Dropout)
assert layer.layers[1].p == 0.2

layer = ae_module.CompleteLayer(6, 11, activation='sigmoid', dropout=0.3)
assert len(layer.layers) == 3
assert isinstance(layer.layers[0], torch.nn.Linear)
assert layer.layers[0].in_features == 6
assert layer.layers[0].out_features == 11
assert layer.layers[1] is torch.sigmoid
assert isinstance(layer.layers[2], torch.nn.Dropout)
assert layer.layers[2].p == 0.3
complete_layer = ae_module.CompleteLayer(4, 5)
assert len(complete_layer.layers) == 1
assert isinstance(complete_layer.layers[0], torch.nn.Linear)
assert complete_layer.layers[0].in_features == 4
assert complete_layer.layers[0].out_features == 5

complete_layer = ae_module.CompleteLayer(4, 5, activation='tanh')
assert len(complete_layer.layers) == 2
assert complete_layer.layers[1] is torch.tanh

complete_layer = ae_module.CompleteLayer(4, 5, dropout=0.2)
assert len(complete_layer.layers) == 2
assert isinstance(complete_layer.layers[1], torch.nn.Dropout)
assert complete_layer.layers[1].p == 0.2

complete_layer = ae_module.CompleteLayer(6, 11, activation='sigmoid', dropout=0.3)
assert len(complete_layer.layers) == 3
assert isinstance(complete_layer.layers[0], torch.nn.Linear)
assert complete_layer.layers[0].in_features == 6
assert complete_layer.layers[0].out_features == 11
assert complete_layer.layers[1] is torch.sigmoid
assert isinstance(complete_layer.layers[2], torch.nn.Dropout)
assert complete_layer.layers[2].p == 0.3


def test_complete_layer_interpret_activation():
layer = ae_module.CompleteLayer(4, 5)
assert layer.interpret_activation('elu') is torch.nn.functional.elu
complete_layer = ae_module.CompleteLayer(4, 5)
assert complete_layer.interpret_activation('elu') is torch.nn.functional.elu

# Test for bad activation, this really does raise the base Exception class.
with pytest.raises(Exception):
layer.interpret_activation()
complete_layer.interpret_activation()

with pytest.raises(Exception):
layer.interpret_activation("does_not_exist")
complete_layer.interpret_activation("does_not_exist")

layer = ae_module.CompleteLayer(6, 11, activation='sigmoid')
assert layer.interpret_activation() is torch.sigmoid
complete_layer = ae_module.CompleteLayer(6, 11, activation='sigmoid')
assert complete_layer.interpret_activation() is torch.sigmoid


@pytest.mark.usefixtures("manual_seed")
def test_complete_layer_forward():
# Setting dropout probability to 0. The results of dropout our deterministic, but are only
# consistent when run on the same GPU.
layer = ae_module.CompleteLayer(3, 5, activation='tanh', dropout=0)
complete_layer = ae_module.CompleteLayer(3, 5, activation='tanh', dropout=0)
tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=torch.float32)
results = layer.forward(tensor)
results = complete_layer.forward(tensor)
expected = torch.tensor([[0.7223, 0.7902, 0.9647, 0.5613, 0.9163], [0.9971, 0.9897, 0.9988, 0.8317, 0.9992],
[1.0000, 0.9995, 1.0000, 0.9417, 1.0000], [1.0000, 1.0000, 1.0000, 0.9806, 1.0000]],
dtype=torch.float32)
Expand Down
2 changes: 1 addition & 1 deletion tests/dfencoder/test_dfencoder_distributed_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
import numpy as np
import pytest

from _utils import TEST_DIRS
from morpheus.models.dfencoder.autoencoder import AutoEncoder
from morpheus.models.dfencoder.dataloader import DatasetFromPath
from morpheus.models.dfencoder.dataloader import DFEncoderDataLoader
from morpheus.models.dfencoder.multiprocessing import start_processes
from utils import TEST_DIRS

# import torch

Expand Down
4 changes: 2 additions & 2 deletions tests/dfencoder/test_dfencoder_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
import pandas as pd
import pytest

from _utils import TEST_DIRS
from morpheus.models.dfencoder.autoencoder import AutoEncoder
from utils import TEST_DIRS

INFERENCE_START_DATE = "2022-11-01"
VALIDATION_SET_SIZE = 3000 # around 1/10 of the train set
Expand Down Expand Up @@ -130,7 +130,7 @@ def test_dfencoder_e2e():
# Assert the consistency of output rows and columns
assert len(inf_res) == len(inference_df)
assert sorted(inf_res.columns) == sorted(
[ft + col_suffix for ft in FEATURE_COLUMNS
[feature + col_suffix for feature in FEATURE_COLUMNS
for col_suffix in ["", "_pred", "_loss", "_z_loss"]] + ["max_abs_z", "mean_abs_z", "z_loss_scaler_type"])
# make sure the user baseline is modeled well enough so the minimum and median z scores
# from inference are in range
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@

import cudf

from _utils import TEST_DIRS
from _utils.dataset_manager import DatasetManager
from morpheus.common import TypeId
from morpheus.config import Config
from morpheus.config import PipelineModes
from morpheus.messages import MessageMeta
from morpheus.messages import MultiInferenceFILMessage
from morpheus.messages import MultiMessage
from utils import TEST_DIRS
from utils.dataset_manager import DatasetManager


def check_inf_message(msg: MultiInferenceFILMessage,
Expand Down
2 changes: 1 addition & 1 deletion tests/examples/developer_guide/test_pass_thru.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@

import cudf

from _utils import TEST_DIRS
from morpheus.config import Config
from morpheus.messages import MessageMeta
from morpheus.messages import MultiMessage
from morpheus.pipeline.single_port_stage import SinglePortStage
from utils import TEST_DIRS


def _check_pass_thru(config: Config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@

import pytest

from _utils import TEST_DIRS
from _utils.dataset_manager import DatasetManager
from morpheus.config import Config
from morpheus.config import PipelineModes
from morpheus.messages import MessageMeta
from utils import TEST_DIRS
from utils.dataset_manager import DatasetManager

EXPECTED_NEW_COLS = ['to_count', 'bcc_count', 'cc_count', 'total_recipients', 'data']

Expand Down
28 changes: 13 additions & 15 deletions tests/examples/digital_fingerprinting/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@

import os
import sys
import typing
from unittest import mock

import pytest

from _utils import TEST_DIRS
from _utils import import_or_skip
from _utils.dataset_manager import DatasetManager
from morpheus.config import Config
from utils import TEST_DIRS
from utils import import_or_skip
from utils.dataset_manager import DatasetManager

# pylint: disable=redefined-outer-name

SKIP_REASON = (
"Tests for the digital_fingerprinting production example requires a number of packages not installed in the "
Expand Down Expand Up @@ -55,14 +54,14 @@ def mlflow(fail_missing: bool):
yield import_or_skip("mlflow", reason=SKIP_REASON, fail_missing=fail_missing)


@pytest.fixture(scope='session')
def ae_feature_cols():
with open(os.path.join(TEST_DIRS.data_dir, 'columns_ae_cloudtrail.txt'), encoding='UTF-8') as fh:
@pytest.fixture(name='ae_feature_cols', scope='session')
def ae_feature_cols_fixture():
with open(os.path.join(TEST_DIRS.data_dir, 'columns_ae_cloudtrail.txt'), encoding='utf-8') as fh:
yield [x.strip() for x in fh.readlines()]


@pytest.fixture
def config(config_no_cpp: Config, ae_feature_cols: list[str]):
@pytest.fixture(name="config")
def config_fixture(config_no_cpp: Config, ae_feature_cols: typing.List[str]):
"""
The digital_fingerprinting production example utilizes the Auto Encoder config, and requires C++ execution disabled.
"""
Expand All @@ -73,8 +72,8 @@ def config(config_no_cpp: Config, ae_feature_cols: list[str]):
yield config


@pytest.fixture
def example_dir():
@pytest.fixture(name="example_dir")
def example_dir_fixture():
yield os.path.join(TEST_DIRS.examples_dir, 'digital_fingerprinting/production/morpheus')


Expand All @@ -83,15 +82,14 @@ def example_dir():
# For this reason we need to ensure that the digital_fingerprinting/production/morpheus dir is in sys.path
@pytest.fixture(autouse=True)
def dfp_prod_in_sys_path(
request: pytest.FixtureRequest, # pylint: disable=unused-argument
restore_sys_path: list[str], # pylint: disable=unused-argument
reset_plugins: None, # pylint: disable=unused-argument
example_dir: str):
sys.path.append(example_dir)


@pytest.fixture
def dfp_message_meta(config: Config, dataset_pandas: DatasetManager):
@pytest.fixture(name="dfp_message_meta")
def dfp_message_meta_fixture(config, dataset_pandas: DatasetManager):
import pandas as pd
from dfp.messages.multi_dfp_message import DFPMessageMeta

Expand Down
Loading

0 comments on commit 7ee9e8f

Please sign in to comment.