From c25de5051bb1f2e4f44c358098243732348ec2bf Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Fri, 17 Jan 2025 16:02:07 -0800 Subject: [PATCH] Documentation improvements (#2117) * Consolidate the `examples/digital_fingerprinting/production/README.md` and `docs/source/developer_guide/guides/5_digital_fingerprinting.md` documents (#2107) * Ensure that the `README.md` file refers to the `5_digital_fingerprinting.md` file. * Remove redundant build instructions from `5_digital_fingerprinting.md` and instead direct the user to `README.md`. * The `README.md` file now documents how to build and run the example. * The `5_digital_fingerprinting.md` file now serves as a reference for features and output fields, along with guiding the user for customizing the pipeline. * Support ARM builds for DFP containers * Remove DFP documentation regarding helm charts. * Document the requirement for installing `model-utils` dependency target for the `onnx-to-trt` tool (#2103). * Update the `onnx-to-trt` import error message to reflect the `model-utils` Conda env file, rather than logging-and-raising place the error message directly into the exception, prevents the error message from being lost in the traceback. * Update the `--seq_length` flag in the `onnx-to-trt` command for converting the phishing model (#2116). * Replace hard-coded instances of `x86_64` #2114 * Add ARM to matrix for the `model-utils` target. * Add `.cache*` to `.gitignore` allows for platform-specific `.cache` directories. * Ignore verifying anchor tags for github.com, the way github.com handles anchor tags into markdown conflicts with the link checker. Closes #2103 Closes #2107 Closes #2114 Closes #2116 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/2117 --- .gitignore | 2 +- ci/release/update-version.sh | 6 + .../model-utils_cuda-125_arch-aarch64.yaml | 26 ++++ dependencies.yaml | 2 +- docs/source/basics/overview.rst | 9 ++ docs/source/conf.py | 5 +- docs/source/developer_guide/contributing.md | 14 +- .../guides/5_digital_fingerprinting.md | 144 +----------------- examples/abp_nvsmi_detection/README.md | 2 +- .../3_simple_cpp_stage/README.md | 2 +- .../4_rabbitmq_cpp_stage/README.md | 2 +- .../production/Dockerfile | 10 +- .../production/README.md | 47 +----- .../production/mlflow/Dockerfile | 2 +- .../production/morpheus/benchmarks/README.md | 2 +- examples/doca/vdb_realtime/README.md | 2 +- .../gnn_fraud_detection_pipeline/README.md | 4 +- examples/llm/agents/README.md | 4 +- examples/llm/completion/README.md | 4 +- examples/llm/rag/README.md | 2 +- examples/llm/vdb_upload/README.md | 2 +- models/README.md | 18 ++- .../fraud-detection-models/README.md | 2 +- models/triton-model-repo/README.md | 2 +- .../morpheus/stages/input/arxiv_source.py | 4 +- python/morpheus/morpheus/utils/onnx_to_trt.py | 12 +- python/morpheus_llm/morpheus_llm/error.py | 2 +- tests/benchmarks/README.md | 4 +- tests/conftest.py | 2 +- tests/morpheus_llm/stages/arxiv/conftest.py | 2 +- 30 files changed, 108 insertions(+), 233 deletions(-) create mode 100644 conda/environments/model-utils_cuda-125_arch-aarch64.yaml diff --git a/.gitignore b/.gitignore index 064d3cf759..aab82a14bf 100755 --- a/.gitignore +++ b/.gitignore @@ -88,7 +88,7 @@ htmlcov/ .tox/ .coverage .coverage.* -.cache +.cache* nosetests.xml coverage.xml *.cover diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 20a1df98dc..feff9d7a25 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -98,11 +98,17 @@ sed_runner 's/'"VERSION ${CURRENT_FULL_VERSION}.*"'/'"VERSION ${NEXT_FULL_VERSIO examples/developer_guide/3_simple_cpp_stage/CMakeLists.txt \ examples/developer_guide/4_rabbitmq_cpp_stage/CMakeLists.txt +# docs/source/basics/overview.rst +sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" docs/source/basics/overview.rst + # docs/source/cloud_deployment_guide.md sed_runner "s|${CURRENT_SHORT_TAG}.tgz|${NEXT_SHORT_TAG}.tgz|g" docs/source/cloud_deployment_guide.md sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" docs/source/cloud_deployment_guide.md sed_runner "s|tree/branch-${CURRENT_SHORT_TAG}|tree/branch-${NEXT_SHORT_TAG}|g" docs/source/cloud_deployment_guide.md +# docs/source/developer_guide/guides/5_digital_fingerprinting.md +sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide/guides/5_digital_fingerprinting.md + # docs/source/examples.md sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" docs/source/examples.md diff --git a/conda/environments/model-utils_cuda-125_arch-aarch64.yaml b/conda/environments/model-utils_cuda-125_arch-aarch64.yaml new file mode 100644 index 0000000000..eeeeb45ce1 --- /dev/null +++ b/conda/environments/model-utils_cuda-125_arch-aarch64.yaml @@ -0,0 +1,26 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- conda-forge +- huggingface +- rapidsai +- rapidsai-nightly +- nvidia +- nvidia/label/dev +- pytorch +dependencies: +- cuml=24.10.* +- jupyterlab +- matplotlib +- onnx +- pandas +- pip +- python=3.10 +- scikit-learn=1.3.2 +- seaborn +- seqeval=1.2.2 +- transformers=4.36.2 +- xgboost +- pip: + - tensorrt-cu12 +name: model-utils_cuda-125_arch-aarch64 diff --git a/dependencies.yaml b/dependencies.yaml index 3924268f86..f8ddae5706 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -164,7 +164,7 @@ files: output: conda matrix: cuda: ["12.5"] - arch: [x86_64] + arch: [x86_64, aarch64] includes: - model-training-tuning - python diff --git a/docs/source/basics/overview.rst b/docs/source/basics/overview.rst index e36e401056..1a39b44046 100644 --- a/docs/source/basics/overview.rst +++ b/docs/source/basics/overview.rst @@ -107,6 +107,15 @@ queried in the same manner: --max_workspace_size INTEGER [default: 16000] --help Show this message and exit. +ONNX To TensorRT +---------------- +The ONNX to TensorRT (TRT) conversion utility requires additional packages, which can be installed using the following command: +```bash +conda env update --solver=libmamba -n morpheus --file conda/environments/model-utils_cuda-125_arch-$(arch).yaml +``` + +Example usage of the ONNX to TRT conversion utility can be found in `models/README.md `_. + AutoComplete ------------ diff --git a/docs/source/conf.py b/docs/source/conf.py index 79e7f2000b..03205618bb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -193,13 +193,16 @@ # Config linkcheck # Ignore localhost and url prefix fragments # Ignore openai.com links, as these always report a 403 when requested by the linkcheck agent +# The way Github handles anchors into markdown files is not compatible with the way linkcheck handles them. +# This allows us to continue to verify that the links are valid, but ignore the anchors. linkcheck_ignore = [ r'http://localhost:\d+/', r'https://localhost:\d+/', r'^http://$', r'^https://$', r'https://(platform\.)?openai.com', - r'https://code.visualstudio.com' + r'https://code.visualstudio.com', + r"^https://github.com/nv-morpheus/Morpheus/blob/.*#.+$" ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/developer_guide/contributing.md b/docs/source/developer_guide/contributing.md index d1b3093955..0c463ed1ad 100644 --- a/docs/source/developer_guide/contributing.md +++ b/docs/source/developer_guide/contributing.md @@ -159,11 +159,11 @@ Morpheus provides multiple Conda environment files to support different workflow The following are the available Conda environment files, all are located in the `conda/environments` directory, with the following naming convention: `__arch-.yaml`. | Environment | File | Description | | --- | --- | --- | -| `all` | `all_cuda-125_arch-x86_64.yaml` | All dependencies required to build, run and test Morpheus, along with all of the examples. This is a superset of the `dev`, `runtime` and `examples` environments. | -| `dev` | `dev_cuda-125_arch-x86_64.yaml` | Dependencies required to build, run and test Morpheus. This is a superset of the `runtime` environment. | -| `examples` | `examples_cuda-125_arch-x86_64.yaml` | Dependencies required to run all examples. This is a superset of the `runtime` environment. | -| `model-utils` | `model-utils_cuda-125_arch-x86_64.yaml` | Dependencies required to train models independent of Morpheus. | -| `runtime` | `runtime_cuda-125_arch-x86_64.yaml` | Minimal set of dependencies strictly required to run Morpheus. | +| `all` | `all_cuda-125_arch-.yaml` | All dependencies required to build, run and test Morpheus, along with all of the examples. This is a superset of the `dev`, `runtime` and `examples` environments. | +| `dev` | `dev_cuda-125_arch-.yaml` | Dependencies required to build, run and test Morpheus. This is a superset of the `runtime` environment. | +| `examples` | `examples_cuda-125_arch-.yaml` | Dependencies required to run all examples. This is a superset of the `runtime` environment. | +| `model-utils` | `model-utils_cuda-125_arch-.yaml` | Dependencies required to train models independent of Morpheus. | +| `runtime` | `runtime_cuda-125_arch-.yaml` | Minimal set of dependencies strictly required to run Morpheus. | ##### Updating Morpheus Dependencies @@ -200,11 +200,11 @@ When ready, commit both the changes to the `dependencies.yaml` file and the upda ``` 1. Create the Morpheus Conda environment using either the `dev` or `all` environment file. Refer to the [Conda Environment YAML Files](#conda-environment-yaml-files) section for more information. ```bash - conda env create --solver=libmamba -n morpheus --file conda/environments/dev_cuda-125_arch-x86_64.yaml + conda env create --solver=libmamba -n morpheus --file conda/environments/dev_cuda-125_arch-$(arch).yaml ``` or ```bash - conda env create --solver=libmamba -n morpheus --file conda/environments/all_cuda-125_arch-x86_64.yaml + conda env create --solver=libmamba -n morpheus --file conda/environments/all_cuda-125_arch-$(arch).yaml ``` diff --git a/docs/source/developer_guide/guides/5_digital_fingerprinting.md b/docs/source/developer_guide/guides/5_digital_fingerprinting.md index 24de3516f4..17e25acebe 100644 --- a/docs/source/developer_guide/guides/5_digital_fingerprinting.md +++ b/docs/source/developer_guide/guides/5_digital_fingerprinting.md @@ -22,8 +22,11 @@ Every account, user, service, and machine has a digital fingerprint that represe To construct this digital fingerprint, we will be training unsupervised behavioral models at various granularities, including a generic model for all users in the organization along with fine-grained models for each user to monitor their behavior. These models are continuously updated and retrained over time​, and alerts are triggered when deviations from normality occur for any user​. +## Running the DFP Example +Instructions for building and running the DFP example are available in the [`examples/digital_fingerprinting/production/README.md`](https://github.com/nv-morpheus/Morpheus/blob/branch-25.02/examples/digital_fingerprinting/production/README.md) guide in the Morpheus repository. + ## Training Sources -The data we will want to use for the training and inference will be any sensitive system that the user interacts with, such as VPN, authentication and cloud services. The digital fingerprinting example (`examples/digital_fingerprinting/README.md`) included in Morpheus ingests logs from [Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/reports-monitoring/concept-sign-ins), and [Duo Authentication](https://duo.com/docs/adminapi). +The data we will want to use for the training and inference will be any sensitive system that the user interacts with, such as VPN, authentication and cloud services. The digital fingerprinting example ([`examples/digital_fingerprinting/production/README.md`](https://github.com/nv-morpheus/Morpheus/blob/branch-25.02/examples/digital_fingerprinting/production/README.md)) included in Morpheus ingests logs from [Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/reports-monitoring/concept-sign-ins), and [Duo Authentication](https://duo.com/docs/adminapi). The location of these logs could be either local to the machine running Morpheus, a shared file system like NFS, or on a remote store such as [Amazon S3](https://aws.amazon.com/s3/). @@ -131,145 +134,6 @@ The reference architecture is composed of the following services:​ | `morpheus_pipeline` | Used for executing both training and inference pipelines | | `fetch_data` | Downloads the example datasets for the DFP example | -### Running via `docker-compose` -#### System requirements -* [Docker](https://docs.docker.com/get-docker/) and [docker-compose](https://docs.docker.com/compose/) installed on the host machine​ -* Supported GPU with [NVIDIA Container Toolkit​](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) - -> **Note:** For GPU Requirements refer to the [Getting Started](../../getting_started.md#requirements) guide. - -#### Building the services -From the root of the Morpheus repo, run: -```bash -cd examples/digital_fingerprinting/production -export MORPHEUS_CONTAINER_VERSION="$(git describe --tags --abbrev=0)-runtime" -docker compose build -``` - -> **Note:** This requires version 1.28.0 or higher of Docker Compose, and preferably v2. If you encounter an error similar to: -> -> ``` -> ERROR: The Compose file './docker-compose.yml' is invalid because: -> services.jupyter.deploy.resources.reservations value Additional properties are not allowed ('devices' was -> unexpected) -> ``` -> -> This is most likely due to using an older version of the `docker-compose` command, instead re-run the build with `docker compose`. Refer to [Migrate to Compose V2](https://docs.docker.com/compose/migrate/) for more information. - -#### Downloading the example datasets -First, we will need to install additional requirements in to the Conda environment. Then run the `examples/digital_fingerprinting/fetch_example_data.py` script. This will download the example data into the `examples/data/dfp` dir. - -The script can be run from within the `fetch_data` Docker Compose service, or from within a Conda environment on the host machine. - -##### Docker Compose Service Method -This approach has the advantage of not requiring any additional setup on the host machine. From the `examples/digital_fingerprinting/production` dir run: -```bash -docker compose up mlflow -``` -##### Conda Environment Method -This approach is useful for users who have already set up a Conda environment on their host machine, and has the advantage that the downloaded data will be owned by the host user. - -If a Conda environment has already been created, it can be updated by running the following command from the root of the Morpheus repo: -```bash -conda env update --solver=libmamba \ - -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml -``` - -If a Conda environment has not been created, it can be created by running the following command from the root of the Morpheus repo: -```bash -conda env create --solver=libmamba \ - -n morpheus \ - --file ./conda/environments/all_cuda-125_arch-x86_64.yaml -``` - -Once the Conda environment has been updated or created, fetch the data with the following command: -```bash -python examples/digital_fingerprinting/fetch_example_data.py all -``` - -#### Running the services -##### Jupyter Server -From the `examples/digital_fingerprinting/production` dir run: -```bash -docker compose up jupyter -``` - -Once the build is complete and the service has started, a message similar to the following should display: -``` -jupyter | To access the server, open this file in a browser: -jupyter | file:///root/.local/share/jupyter/runtime/jpserver-7-open.html -jupyter | Or copy and paste one of these URLs: -jupyter | http://localhost:8888/lab?token= -jupyter | or http://127.0.0.1:8888/lab?token= -``` - -Copy and paste the URL into a web browser. There are four notebooks included with the DFP example: -* dfp_azure_training.ipynb - Training pipeline for Azure Active Directory data -* dfp_azure_inference.ipynb - Inference pipeline for Azure Active Directory data -* dfp_duo_training.ipynb - Training pipeline for Duo Authentication -* dfp_duo_inference.ipynb - Inference pipeline for Duo Authentication - -> **Note:** The token in the URL is a one-time use token and a new one is generated with each invocation. - -##### Morpheus Pipeline -By default, the `morpheus_pipeline` will run the training pipeline for Duo data from the `examples/digital_fingerprinting/production` dir run: -```bash -docker compose up morpheus_pipeline -``` - -If instead you want to run a different pipeline from the `examples/digital_fingerprinting/production` dir, run: -```bash -docker compose run morpheus_pipeline bash -``` - - -From the prompt within the `morpheus_pipeline` container, you can run either the `dfp_azure_pipeline.py` or `dfp_duo_pipeline.py` pipeline scripts. -```bash -python dfp_azure_pipeline.py --help -python dfp_duo_pipeline.py --help -``` - -Both scripts are capable of running either a training or inference pipeline for their respective data sources. The command-line options for both are the same: -| Flag | Type | Description | -| ---- | ---- | ----------- | -| `--train_users` | One of: `all`, `generic`, `individual`, `none` | Indicates whether or not to train per user or a generic model for all users. Selecting `none` runs the inference pipeline. | -| `--skip_user` | TEXT | User IDs to skip. Mutually exclusive with `only_user` | -| `--only_user` | TEXT | Only users specified by this option will be included. Mutually exclusive with `skip_user` | -| `--start_time` | TEXT | The start of the time window, if undefined `start_date` will be `now()-duration` | -| `--duration` | TEXT | The duration to run starting from `start_time` [default: `60d`] | -| `--cache_dir` | TEXT | The location to cache data such as S3 downloads and pre-processed data [environment variable: `DFP_CACHE_DIR`; default: `./.cache/dfp`] | -| `--log_level` | One of: `CRITICAL`, `FATAL`, `ERROR`, `WARN`, `WARNING`, `INFO`, `DEBUG` | Specify the logging level to use. [default: `WARNING`] | -| `--sample_rate_s` | INTEGER | Minimum time step, in milliseconds, between object logs. [environment variable: `DFP_SAMPLE_RATE_S`; default: 0] | -| `-f`, `--input_file` | TEXT | List of files to process. Can specify multiple arguments for multiple files. Also accepts glob (*) wildcards and schema prefixes such as `s3://`. For example, to make a local cache of an s3 bucket, use `filecache::s3://mybucket/*`. Refer to [`fsspec` documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html?highlight=open_files#fsspec.open_files) for list of possible options. | -| `--watch_inputs` | FLAG | Instructs the pipeline to continuously check the paths specified by `--input_file` for new files. This assumes that the at least one paths contains a wildcard. | -| `--watch_interval` | FLOAT | Amount of time, in seconds, to wait between checks for new files. Only used if --watch_inputs is set. [default `1.0`] | -| `--tracking_uri` | TEXT | The MLflow tracking URI to connect to. [default: `http://localhost:5000`] | -| `--help` | | Show this message and exit. | - - -To run the DFP pipelines with the example datasets within the container, run: - -* Duo Training Pipeline - ```bash - python dfp_duo_pipeline.py --train_users=all --start_time="2022-08-01" --input_file="/workspace/examples/data/dfp/duo-training-data/*.json" - ``` - -* Duo Inference Pipeline - ```bash - python dfp_duo_pipeline.py --train_users=none --start_time="2022-08-30" --input_file="/workspace/examples/data/dfp/duo-inference-data/*.json" - ``` - -* Azure Training Pipeline - ```bash - python dfp_azure_pipeline.py --train_users=all --start_time="2022-08-01" --input_file="/workspace/examples/data/dfp/azure-training-data/*.json" - ``` - -* Azure Inference Pipeline - ```bash - python dfp_azure_pipeline.py --train_users=none --start_time="2022-08-30" --input_file="/workspace/examples/data/dfp/azure-inference-data/*.json" - ``` - ##### Output Fields The output files will contain those logs from the input dataset for which an anomaly was detected; this is determined by the z-score in the `mean_abs_z` field. By default, any logs with a z-score of 2.0 or higher are considered anomalous. Refer to [`DFPPostprocessingStage`](6_digital_fingerprinting_reference.md#post-processing-stage-dfppostprocessingstage). diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index 0fcc250fb7..445b448095 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -63,7 +63,7 @@ This example can be easily applied to datasets generated from your own NVIDIA GP pyNVML is not installed by default, use the following command to install it: ```bash -conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-x86_64.yaml +conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-$(arch).yaml ``` Run the following to start generating your dataset: diff --git a/examples/developer_guide/3_simple_cpp_stage/README.md b/examples/developer_guide/3_simple_cpp_stage/README.md index 2a48a8bb7c..e465c2e98a 100644 --- a/examples/developer_guide/3_simple_cpp_stage/README.md +++ b/examples/developer_guide/3_simple_cpp_stage/README.md @@ -21,5 +21,5 @@ limitations under the License. |-------------|-----------|-------| | Conda | ✔ | | | Morpheus Docker Container | ✔ | | -| Morpheus Release Container | ✔ | Requires adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-125_arch-x86_64.yaml` | +| Morpheus Release Container | ✔ | Requires adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-125_arch-$(arch).yaml` | | Dev Container | ✔ | | diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md index f117985983..8799e91c5a 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md @@ -23,7 +23,7 @@ This example builds upon the `examples/developer_guide/2_2_rabbitmq` example add |-------------|-----------|-------| | Conda | ✔ | | | Morpheus Docker Container | ✔ | Requires launching the RabbitMQ container on the host | -| Morpheus Release Container | ✔ | Requires launching the RabbitMQ container on the host, and adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-125_arch-x86_64.yaml` | +| Morpheus Release Container | ✔ | Requires launching the RabbitMQ container on the host, and adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-125_arch-$(arch).yaml` | | Dev Container | ✘ | | ## Installing Pika diff --git a/examples/digital_fingerprinting/production/Dockerfile b/examples/digital_fingerprinting/production/Dockerfile index b765e3b406..343f988565 100644 --- a/examples/digital_fingerprinting/production/Dockerfile +++ b/examples/digital_fingerprinting/production/Dockerfile @@ -16,7 +16,7 @@ ARG BASE_IMG=nvcr.io/nvidia/cuda ARG BASE_IMG_TAG=12.5.1-base-ubuntu22.04 -FROM ${BASE_IMG}:${BASE_IMG_TAG} AS base +FROM --platform=$TARGETPLATFORM ${BASE_IMG}:${BASE_IMG_TAG} AS base # Install necessary dependencies using apt-get RUN apt-get update && apt-get install -y \ @@ -26,7 +26,7 @@ RUN apt-get update && apt-get install -y \ && apt-get clean # Install miniconda -RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh \ +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-$(arch).sh -O /tmp/miniconda.sh \ && bash /tmp/miniconda.sh -b -p /opt/conda \ && rm /tmp/miniconda.sh @@ -48,20 +48,20 @@ WORKDIR /workspace/examples/digital_fingerprinting/production COPY . /workspace/examples/digital_fingerprinting/production/ # Create a conda env with morpheus-dfp and any additional dependencies needed to run the examples -RUN conda env create --solver=libmamba -y --name morpheus-dfp --file ./conda/environments/dfp_example_cuda-125_arch-x86_64.yaml +RUN conda env create --solver=libmamba -y --name morpheus-dfp --file ./conda/environments/dfp_example_cuda-125_arch-$(arch).yaml ENTRYPOINT [ "/opt/conda/envs/morpheus-dfp/bin/tini", "--", "/workspace/examples/digital_fingerprinting/production/docker/entrypoint.sh" ] SHELL ["/bin/bash", "-c"] # ===== Setup for running unattended ===== -FROM base AS runtime +FROM --platform=$TARGETPLATFORM base AS runtime # Launch morpheus CMD ["./launch.sh"] # ===== Setup for running Jupyter ===== -FROM base AS jupyter +FROM --platform=$TARGETPLATFORM base AS jupyter # Install the jupyter specific requirements RUN source activate morpheus-dfp &&\ diff --git a/examples/digital_fingerprinting/production/README.md b/examples/digital_fingerprinting/production/README.md index f6757f659a..c9c9d5d18e 100644 --- a/examples/digital_fingerprinting/production/README.md +++ b/examples/digital_fingerprinting/production/README.md @@ -152,49 +152,8 @@ Run Azure Inference Pipeline: python dfp_azure_pipeline.py --train_users none --start_time "2022-08-30" --input_file="../../data/dfp/azure-inference-data/*.json" ``` -##### Module-based DFP pipelines +## Additional Information -The commands in the previous section run stage-based example DFP pipelines. The Morpheus 23.03 release introduced a new, more flexible module-based approach to build pipelines through the use of control messages. More information about modular DFP pipelines can be found [here](../../../docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md). +Additional information on the DFP pipeline, including how it can be customized for additional data sources can be found in the [Digital Fingerprinting Guide](../../../docs/source/developer_guide/guides/5_digital_fingerprinting.md) as well as the [Digital Fingerprinting Reference Guide](../../../docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md). -Commands to run equivalent module-based DFP pipelines can be found [here](../../../docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md#running-example-modular-dfp-pipelines). - -## Kubernetes deployment - -The Morpheus project also maintains Helm charts and container images for Kubernetes deployment of Morpheus and MLflow (both for serving and for the Triton plugin). These are located in the NVIDIA GPU Cloud (NGC) [public catalog](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/collections/morpheus_). - -### MLflow Helm chart - -MLflow for this production digital fingerprint use case can be installed from NGC using these same instructions for the [MLflow Triton Plugin from the Morpheus Cloud Deployment Guide](../../../docs/source/cloud_deployment_guide.md#install-morpheus-mlflow-triton-plugin). The chart and image can be used for both the Triton plugin and also MLflow server. - -### Production DFP Helm chart - -The deployment of the [Morpheus SDK Client](../../../docs/source/cloud_deployment_guide.md#install-morpheus-sdk-client) is also done _almost_ the same way as what's specified in the Cloud Deployment Guide. However, you would specify command arguments differently for this production DFP use case. - -Note: The published Morpheus image includes a minimal set of packages for launching JupyterLab but you will likely still want to update the Conda environment inside the running pod with the `conda_env.yml` file in this same directory to install other use case dependencies such as boto3 and s3fs. - -#### Notebooks - -``` -helm install --set ngc.apiKey="$API_KEY",sdk.args="cd /workspace/examples/digital_fingerprinting/production/morpheus && jupyter-lab --ip='*' --no-browser --allow-root --ServerApp.allow_origin='*'" morpheus-sdk-client/ -``` - -Make note of the Jupyter token by examining the logs of the SDK pod: -``` -kubectl logs sdk-cli- -``` - -The output should contain something similar to: - -``` - Or copy and paste one of these URLs: - http://localhost:8888/lab?token=d16c904468fdf666c5030e18fb82f840e531178bf716e575 - or http://127.0.0.1:8888/lab?token=d16c904468fdf666c5030e18fb82f840e531178bf716e575 -``` - -Open your browser to the reachable address and NodePort exposed by the pod (default value of 30888) and use the generated token to login into the notebook. - -#### Unattended - -``` -helm install --set ngc.apiKey="$API_KEY",sdk.args="cd /workspace/examples/digital_fingerprinting/production/morpheus && ./launch.sh --train_users=generic --duration=1d" morpheus-sdk-client/ -``` +The commands in the previous section run stage-based example DFP pipelines. The Morpheus 23.03 release introduced a new, more flexible module-based approach to build pipelines through the use of control messages. More information about modular DFP pipelines can be found at [Introduction to Modular Digital Fingerprinting Pipeline Guide](../../../docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md). Along with the commands to run equivalent module-based DFP pipelines are available at [Running Example Modular DFP Pipelines](../../../docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md#running-example-modular-dfp-pipelines). diff --git a/examples/digital_fingerprinting/production/mlflow/Dockerfile b/examples/digital_fingerprinting/production/mlflow/Dockerfile index 7fb7db3269..8b708a52ee 100644 --- a/examples/digital_fingerprinting/production/mlflow/Dockerfile +++ b/examples/digital_fingerprinting/production/mlflow/Dockerfile @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM python:3.10-slim-buster +FROM --platform=$TARGETPLATFORM python:3.10-slim-buster # Install curl for health check RUN apt update && \ diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md index 5be73c1c9f..d6e7b30330 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md @@ -42,7 +42,7 @@ Install additional required dependencies: ```bash conda env update --solver=libmamba \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-$(arch).yaml ``` diff --git a/examples/doca/vdb_realtime/README.md b/examples/doca/vdb_realtime/README.md index 10c35d711e..19e6981452 100644 --- a/examples/doca/vdb_realtime/README.md +++ b/examples/doca/vdb_realtime/README.md @@ -98,7 +98,7 @@ export NGC_API_KEY="" Then install basic requirements: ```bash -conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-x86_64.yaml --prune +conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-$(arch).yaml --prune ``` Run the RAG example to query the Milvus database: diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index c028595dd3..64f56376f7 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -17,7 +17,7 @@ limitations under the License. # GNN Fraud Detection Pipeline ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. Refer to the [Requirements](#requirements) section for more information. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-$(arch).yaml` or `conda/environments/examples_cuda-125_arch-$(arch).yaml` environment files. Refer to the [Requirements](#requirements) section for more information. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | @@ -32,7 +32,7 @@ Prior to running the GNN fraud detection pipeline, additional requirements must ```bash conda env update --solver=libmamba \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-$(arch).yaml ``` ## Running diff --git a/examples/llm/agents/README.md b/examples/llm/agents/README.md index 1e55a768a1..7da5b0b819 100644 --- a/examples/llm/agents/README.md +++ b/examples/llm/agents/README.md @@ -35,7 +35,7 @@ limitations under the License. - [Run example (Kafka Pipeline)](#run-example-kafka-pipeline) ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-$(arch).yaml` or `conda/environments/examples_cuda-125_arch-$(arch).yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | @@ -106,7 +106,7 @@ Install the required dependencies. ```bash conda env update --solver=libmamba \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-$(arch).yaml ``` diff --git a/examples/llm/completion/README.md b/examples/llm/completion/README.md index 76a8301038..72ecac5c4a 100644 --- a/examples/llm/completion/README.md +++ b/examples/llm/completion/README.md @@ -31,7 +31,7 @@ limitations under the License. - [Running the Morpheus Pipeline](#running-the-morpheus-pipeline) ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-$(arch).yaml` or `conda/environments/examples_cuda-125_arch-$(arch).yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | @@ -80,7 +80,7 @@ Install the required dependencies. ```bash conda env update --solver=libmamba \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-$(arch).yaml ``` diff --git a/examples/llm/rag/README.md b/examples/llm/rag/README.md index f5e6b647b4..cf5ae2bae2 100644 --- a/examples/llm/rag/README.md +++ b/examples/llm/rag/README.md @@ -18,7 +18,7 @@ limitations under the License. # Retrieval Augmented Generation (RAG) Pipeline ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. This example also requires the [VDB upload](../vdb_upload/README.md) pipeline to have been run previously. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-$(arch).yaml` or `conda/environments/examples_cuda-125_arch-$(arch).yaml` environment files. This example also requires the [VDB upload](../vdb_upload/README.md) pipeline to have been run previously. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | diff --git a/examples/llm/vdb_upload/README.md b/examples/llm/vdb_upload/README.md index f9d365f2fe..91dcef559f 100644 --- a/examples/llm/vdb_upload/README.md +++ b/examples/llm/vdb_upload/README.md @@ -34,7 +34,7 @@ limitations under the License. - [Exporting and Deploying a Different Model from Hugging Face](#exporting-and-deploying-a-different-model-from-hugging-face) ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-$(arch).yaml` or `conda/environments/examples_cuda-125_arch-$(arch).yaml` environment files. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | diff --git a/models/README.md b/models/README.md index 56b96740d1..5bc9eae482 100644 --- a/models/README.md +++ b/models/README.md @@ -59,6 +59,14 @@ In the root directory, the file `model-information.csv` contains the following i - **Version Ubuntu** - Ubuntu version used during training - **Version Transformers** - Transformers version used during training +## Generating TensorRT Models from ONNX +The Morpheus ONNX to TensorRT (TRT) conversion utility requires additional packages, which can be installed using the following command: +```bash +conda env update --solver=libmamba -n morpheus --file conda/environments/model-utils_cuda-125_arch-$(arch).yaml +``` + +For users wishing to use TRT models generated using this tool with Triton will then need to refer to the [Triton Model Repository](./triton-model-repo/README.md) documentation on how to deploy the models to Triton, and how to build the Morpheus Triton Server Models Container. + # Model Card Info ## Sensitive Information Detection (SID) ### Model Overview @@ -74,14 +82,12 @@ English text from PCAP payloads #### Output Multi-label sequence classification for 10 sensitive information categories ### Generating TRT Models from ONNX -The ONNX to TensorRT conversion utility requires additional packages, which can be installed using the following command: -```bash -conda env update --solver=libmamba -n morpheus --file conda/environments/model-utils_cuda-125_arch-x86_64.yaml -``` + For the best performance you need to compile a TensorRT engine file on each machine that it will be run on. To facilitate this, Morpheus contains a utility to input an ONNX file and export the TensorRT engine file. Sample command to generate the TensorRT engine file - ```bash -morpheus --log_level=info tools onnx-to-trt --input_model sid-models/sid-minibert-20230424.onnx --output_model ./model.plan --batches 1 8 --batches 1 16 --batches 1 32 --seq_length 256 --max_workspace_size 16000 +morpheus --log_level=info tools onnx-to-trt --input_model ${MORPHEUS_ROOT}/models/sid-models/sid-minibert-20230424.onnx --output_model ${MORPHEUS_ROOT}/models/sid-models/model.plan --batches 1 8 --batches 1 16 --batches 1 32 --seq_length 256 --max_workspace_size 16000 ``` + Note: If you get an out-of-memory error, reduce the `--max_workspace_size` argument until it will successfully run. ### References Well-Read Students Learn Better: On the Importance of Pre-training Compact Models, 2019, https://arxiv.org/abs/1908.08962 @@ -102,7 +108,7 @@ Binary sequence classification as phishing/spam or non-phishing/spam ### Generating TRT Models from ONNX For the best performance you need to compile a TensorRT engine file on each machine that it will be run on. To facilitate this, Morpheus contains a utility to input an ONNX file and export the TensorRT engine file. Sample command to generate the TensorRT engine file - ```bash -morpheus --log_level=info tools onnx-to-trt --input_model phishing-models/phishing-bert-20230517.onnx --output_model ./model.plan --batches 1 8 --batches 1 16 --batches 1 32 --seq_length 256 --max_workspace_size 16000 +morpheus --log_level=info tools onnx-to-trt --input_model ${MORPHEUS_ROOT}/models/phishing-models/phishing-bert-20230517.onnx --output_model ${MORPHEUS_ROOT}/models/phishing-models/model.plan --batches 1 8 --batches 1 16 --batches 1 32 --seq_length 128 --max_workspace_size 16000 ``` ### References - https://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection diff --git a/models/training-tuning-scripts/fraud-detection-models/README.md b/models/training-tuning-scripts/fraud-detection-models/README.md index 1843466e6c..5ecd179c7e 100644 --- a/models/training-tuning-scripts/fraud-detection-models/README.md +++ b/models/training-tuning-scripts/fraud-detection-models/README.md @@ -26,7 +26,7 @@ Install packages for training GNN model. ```bash mamba env update \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/model-utils-125_arch-x86_64.yaml + --file ./conda/environments/model-utils-125_arch-$(arch).yaml ``` ### Options for training and tuning models. diff --git a/models/triton-model-repo/README.md b/models/triton-model-repo/README.md index 44673d8eb8..3f22dc1abd 100644 --- a/models/triton-model-repo/README.md +++ b/models/triton-model-repo/README.md @@ -73,7 +73,7 @@ docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/model To load a TensorRT model, it first must be compiled with the `morpheus tools onnx-to-trt` utility. This utility requires additional packages to be installed. From the root of the Morpheus repo, install them with: ```bash -conda env update --solver=libmamba -n morpheus --file conda/environments/model-utils_cuda-125_arch-x86_64.yaml +conda env update --solver=libmamba -n morpheus --file conda/environments/model-utils_cuda-125_arch-$(arch).yaml ``` Then build the TensorRT model with (refer `triton-model-repo/sid-minibert-trt/1/README.md` for more info): diff --git a/python/morpheus/morpheus/stages/input/arxiv_source.py b/python/morpheus/morpheus/stages/input/arxiv_source.py index b686389426..48cb7b4bc9 100644 --- a/python/morpheus/morpheus/stages/input/arxiv_source.py +++ b/python/morpheus/morpheus/stages/input/arxiv_source.py @@ -37,7 +37,7 @@ IMPORT_ERROR_MESSAGE = ( "ArxivSource requires additional dependencies to be installed. Install them by running the following command: " "`conda env update --solver=libmamba -n morpheus" - "--file conda/environments/all_cuda-125_arch-x86_64.yaml --prune`") + "--file conda/environments/all_cuda-125_arch-$(arch).yaml --prune`") @register_stage("from-arxiv") @@ -47,7 +47,7 @@ class ArxivSource(GpuAndCpuMixin, PreallocatorMixin, SingleOutputSource): This stage requires several additional dependencies to be installed. Install them by running the following command: `conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-125_arch-x86_64.yaml --prune` + "--file conda/environments/all_cuda-125_arch-$(arch).yaml --prune` Parameters ---------- diff --git a/python/morpheus/morpheus/utils/onnx_to_trt.py b/python/morpheus/morpheus/utils/onnx_to_trt.py index a6714a655e..e04f403cd9 100644 --- a/python/morpheus/morpheus/utils/onnx_to_trt.py +++ b/python/morpheus/morpheus/utils/onnx_to_trt.py @@ -20,13 +20,15 @@ try: import tensorrt as trt -except ImportError: - logger.error("The onnx_to_trt module requires the TensorRT runtime and python package to be installed. " - "To install the `tensorrt` python package, follow the instructions located " - "here: https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing-pip") - raise +except ImportError as e: + raise ImportError( + "The ONNX to TensorRT conversion utility requires additional packages, which can be installed using the " + "following command:\n" + "conda env update --solver=libmamba -n morpheus --file " + "conda/environments/model-utils_cuda-125_arch-$(arch).yaml") from e +# pylint: disable=no-member def gen_engine(config: ConfigOnnxToTRT): """ This class converts an Onnx model to a TRT model. diff --git a/python/morpheus_llm/morpheus_llm/error.py b/python/morpheus_llm/morpheus_llm/error.py index 82cbd443ae..d233896451 100644 --- a/python/morpheus_llm/morpheus_llm/error.py +++ b/python/morpheus_llm/morpheus_llm/error.py @@ -15,4 +15,4 @@ IMPORT_ERROR_MESSAGE = ( "{package} not found. Install it and other additional dependencies by running the following command:\n" "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/examples_cuda-125_arch-x86_64.yaml`") + "--file conda/environments/examples_cuda-125_arch-$(arch).yaml`") diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index f1454eeefd..bf4c26350d 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -195,7 +195,7 @@ Separate benchmark tests are provided to measure performance of the example [Pro You can use the same Dev container created here to run the Production DFP benchmarks. You would just need to install additional dependencies as follows: ```bash -mamba env update \ +conda env update --solver=libmamba \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-$(arch).yaml ``` diff --git a/tests/conftest.py b/tests/conftest.py index 7f1b03c61b..dca63c459d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -53,7 +53,7 @@ OPT_DEP_SKIP_REASON = ( "This test requires the {package} package to be installed, to install this run:\n" - "`conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-x86_64.yaml`") + "`conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-$(arch).yaml`") def pytest_addoption(parser: pytest.Parser): diff --git a/tests/morpheus_llm/stages/arxiv/conftest.py b/tests/morpheus_llm/stages/arxiv/conftest.py index 5c1fc010fc..276b66f88c 100644 --- a/tests/morpheus_llm/stages/arxiv/conftest.py +++ b/tests/morpheus_llm/stages/arxiv/conftest.py @@ -23,7 +23,7 @@ SKIP_REASON = ( "Tests for the arxiv_source require a number of packages not installed in the Morpheus development " "environment. To install these run:\n" - "`conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-x86_64.yaml`") + "`conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-$(arch).yaml`") @pytest.fixture(name="arxiv", autouse=True, scope='session')