Skip to content

Commit

Permalink
Python 3.10 support (nv-morpheus#887)
Browse files Browse the repository at this point in the history
Python 3.10 support

closes nv-morpheus#881
closes nv-morpheus#876

Todo:
 - [ ] find a better way to `include`/`add_subdirectory` the `cudf_helpers` configuration that needed to be inlined due to changes in scikit-build/scikit-build#871

Authors:
  - Christopher Harris (https://github.com/cwharris)
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Devin Robison (https://github.com/drobison00)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: nv-morpheus#887
  • Loading branch information
cwharris authored Apr 25, 2023
1 parent 37d134c commit 7d075f5
Show file tree
Hide file tree
Showing 39 changed files with 353 additions and 589 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
uses: ./.github/workflows/ci_pipe.yml
with:
run_check: ${{ startsWith(github.ref_name, 'pull-request/') }}
container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-230413
test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-230413
container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-230414
test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-230414
secrets:
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ persistent=yes

# Minimum Python version to use for version dependent checks. Will default to
# the version used to run pylint.
py-version=3.8
py-version=3.10

# Discover python modules and packages in the file system subtree.
recursive=no
Expand Down
5 changes: 2 additions & 3 deletions ci/conda/recipes/morpheus/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@ cxx_compiler_version:
- 11.2

cuda_compiler:
- nvcc
- cuda-nvcc

cuda_compiler_version:
- 11.8

python:
- 3.8
- 3.9
- 3.10

boost:
- 1.74
Expand Down
27 changes: 17 additions & 10 deletions ci/conda/recipes/morpheus/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

{% set version = environ.get('GIT_VERSION', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version=environ.get('CONDA_PY', '3.8') %}
{% set py_version=environ.get('CONDA_PY', '3.10') %}
{% set cuda_version='.'.join(environ.get('CUDA', '11.8').split('.')[:2]) %}
{% set cuda_major=cuda_version.split('.')[0] %}
{% set rapids_version = "23.02" %}
Expand Down Expand Up @@ -51,28 +51,32 @@ outputs:
- {{ compiler("c") }}
- {{ compiler("cxx") }}
- {{ compiler("cuda") }}
- cmake 3.24
- ccache
- cmake 3.24
- cuda-cudart-dev # Needed by CMake to compile a test application
- ninja
host:
- cuda-python 11.8
- cudatoolkit {{ cuda_version }}.*
- cuda-cudart
- cudf {{ rapids_version }}
- cython >=0.29,<0.30
- libcudf {{ rapids_version }}
- librdkafka 1.7
- mrc {{ minor_version }}
- pip
- pyarrow * *_cuda # Ensure we get a CUDA build. Version determined by cuDF
- pybind11-stubgen 0.10.5
- python {{ python }}
- rapidjson 1.1
- scikit-build >=0.12
- scikit-build 0.17.1
- versioneer-518

# Remove cudatoolkit once `mamba repoquery whoneeds cudatoolkit` is empty. For now, we need to specify a version
- cudatoolkit {{ cuda_version }}.*
run:
# Runtime only requirements. This + setup.py is the definitive runtime requirement list
- {{ pin_compatible('cuda-cudart', min_pin='x.x', max_pin='x') }}
- click >=8
- configargparse 1.5.*
- cuda-python 11.8
- cudf
- cudf_kafka {{ rapids_version }}.*
- cupy # Version determined from cudf
Expand All @@ -84,24 +88,27 @@ outputs:
- libmrc
- mlflow >1.29,<2
- mrc
- networkx 2.8.*
- networkx 3.1.*
- numpydoc 1.4.*
- pandas 1.3.*
- pluggy 1.0.*
- pyarrow * *_cuda # Ensure we get a CUDA build. Version determined by cuDF
- python
- scikit-learn=0.23.1
- scikit-learn 1.2.2.*
- tqdm 4.*
- typing_utils 0.1.*
- watchdog 2.1.*
run_constrained:
# Since we dont explicitly require this but other packages might, constrain the versions.
- {{ pin_compatible('cudatoolkit', min_pin='x.x', max_pin='x') }}
test:
requires:
- cudatoolkit {{ cuda_version }}.*
- gputil
- pytest
- pytest-cov
- pytest-benchmark
# test that cuml can be installed in the env
- cuml {{ rapids_version }}
- cuml {{ rapids_version }}.*
source_files:
- docker/conda/environments/*
- pyproject.toml
Expand Down
2 changes: 1 addition & 1 deletion ci/conda/recipes/python-dbg/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ source:
# md5: c4b7100dcaace9d33ab1fda9a3a038d6
# If you want to build from the github source. This is quite a bit slower than pulling the tarball.
#git_url: https://github.com/python/cpython.git
#git_rev: 3.8
#git_rev: 3.10

build:
include_recipe: False
Expand Down
3 changes: 0 additions & 3 deletions ci/conda/recipes/run_conda_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,6 @@ if [[ "${CONDA_COMMAND}" == "mambabuild" || "${CONDA_COMMAND}" == "build" ]]; th
CONDA_ARGS_ARRAY+=("--build-id-pat" "{n}-{v}")
fi

# Choose default variants
CONDA_ARGS_ARRAY+=("--variants" "{python: 3.8}")

# And default channels (with optional channel alias)
CONDA_ARGS_ARRAY+=("-c" "${CONDA_CHANNEL_ALIAS:+"${CONDA_CHANNEL_ALIAS%/}/"}rapidsai")
CONDA_ARGS_ARRAY+=("-c" "${CONDA_CHANNEL_ALIAS:+"${CONDA_CHANNEL_ALIAS%/}/"}nvidia/label/cuda-11.8.0")
Expand Down
2 changes: 1 addition & 1 deletion ci/runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ARG CUDA_VER=11.8.0
ARG LINUX_DISTRO=ubuntu
ARG LINUX_VER=20.04
ARG PROJ_NAME=morpheus
ARG PYTHON_VER=3.8
ARG PYTHON_VER=3.10

# Configure the base docker img
FROM ${FROM_IMAGE}:cuda${CUDA_VER}-${LINUX_DISTRO}${LINUX_VER}-py${PYTHON_VER} AS base
Expand Down
9 changes: 7 additions & 2 deletions ci/scripts/github/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,20 @@ print_env_vars

function update_conda_env() {
rapids-logger "Checking for updates to conda env"
rapids-mamba-retry env update -n morpheus --prune -q --file ${MORPHEUS_ROOT}/docker/conda/environments/cuda${CUDA_VER}_dev.yml

# Deactivate the environment first before updating
conda deactivate

# Update the packages with --prune to remove any extra packages
rapids-mamba-retry env update -n morpheus --prune -q --file ${MORPHEUS_ROOT}/docker/conda/environments/cuda${CUDA_VER}_dev.yml

# Finally, reactivate
conda activate morpheus

rapids-logger "Final Conda Environment"
conda list
}


function fetch_base_branch() {
rapids-logger "Retrieving base branch from GitHub API"
[[ -n "$GH_TOKEN" ]] && CURL_HEADERS=('-H' "Authorization: token ${GH_TOKEN}")
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ARG CUDA_MINOR_VER=8
ARG CUDA_REV_VER=0
ARG LINUX_DISTRO=ubuntu
ARG LINUX_VER=20.04
ARG PYTHON_VER=3.8
ARG PYTHON_VER=3.10

# ============ Stage: base ============
# Configure the base conda environment
Expand Down
4 changes: 2 additions & 2 deletions docker/build_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ CUDA_MINOR_VER=${CUDA_MINOR_VER:-8}
CUDA_REV_VER=${CUDA_REV_VER:-0}
LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
LINUX_VER=${LINUX_VER:-20.04}
RAPIDS_VER=${RAPIDS_VER:-22.10}
PYTHON_VER=${PYTHON_VER:-3.8}
RAPIDS_VER=${RAPIDS_VER:-23.02}
PYTHON_VER=${PYTHON_VER:-3.10}
TENSORRT_VERSION=${TENSORRT_VERSION:-8.2.1.3}

DOCKER_ARGS="-t ${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG}"
Expand Down
17 changes: 9 additions & 8 deletions docker/conda/environments/cuda11.8_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ dependencies:
- cuda-compiler=11.8
- cuda-nvml-dev=11.8
- cudatoolkit=11.8
- cudf 23.02
- cupy=9.5.0
- cudf=23.02
- cupy=11.6.0
- cxx-compiler
- cython=0.29.24
- datacompy=0.8
Expand All @@ -61,28 +61,29 @@ dependencies:
- librdkafka=1.7.0
- mlflow>1.29,<2
- mrc=23.07
- myst-parser==0.17
- networkx=2.8
- myst-parser==1.0.0
- networkx=3.1
- ninja=1.10
- nodejs=17.4.0
- nodejs=18.15.0
- numba>=0.56.2
- numpydoc=1.4
- pandas=1.3
- pip
- pkg-config # for mrc cmake
- pluggy=1.0
- protobuf=4.21.*
- pyarrow * *_cuda # Ensure we get a CUDA build. Version determined by cuDF
- pybind11-stubgen=0.10.5
- pydot
- pytest
- pytest-benchmark>=4.0
- pytest-cov
- python-confluent-kafka=1.7.0
- python-graphviz
- python=3.8
- python=3.10
- rapidjson=1.1.0
- scikit-build=0.13
- scikit-learn=0.23.1
- scikit-build=0.17.1
- scikit-learn=1.2.2
- sphinx
- sphinx_rtd_theme
- sysroot_linux-64=2.17
Expand Down
15 changes: 8 additions & 7 deletions docs/source/developer_guide/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,17 @@ The following instructions are for developers who are getting started with the M

All of the following instructions assume several variables have been set:
- `MORPHEUS_ROOT`: The Morpheus repository has been checked out at a location specified by this variable. Any non-absolute paths are relative to `MORPHEUS_ROOT`.
- `PYTHON_VER`: The desired Python version. Minimum required is `3.8`
- `RAPIDS_VER`: The desired RAPIDS version for all RAPIDS libraries including cuDF and RMM. This is also used for Triton. If in doubt use `22.10`
- `PYTHON_VER`: The desired Python version. Minimum required is `3.10`
- `RAPIDS_VER`: The desired RAPIDS version for all RAPIDS libraries including cuDF and RMM. If in doubt use `23.02`
- `TRITONCLIENT_VERSION`: The desired Triton client. If in doubt use `22.10`
- `CUDA_VER`: The desired CUDA version to use. If in doubt use `11.8`


### Clone the repository and pull large file data from Git LFS

```bash
export PYTHON_VER=3.8
export RAPIDS_VER=22.10
export PYTHON_VER=3.10
export RAPIDS_VER=23.02
export CUDA_VER=11.8
export MORPHEUS_ROOT=$(pwd)/morpheus
git clone https://github.com/nv-morpheus/Morpheus.git $MORPHEUS_ROOT
Expand Down Expand Up @@ -187,8 +188,8 @@ Note: These instructions assume the user is using `mamba` instead of `conda` sin

1. Set up env variables and clone the repo:
```bash
export PYTHON_VER=3.8
export RAPIDS_VER=22.10
export PYTHON_VER=3.10
export RAPIDS_VER=23.02
export CUDA_VER=11.8
export MORPHEUS_ROOT=$(pwd)/morpheus
git clone https://github.com/nv-morpheus/Morpheus.git $MORPHEUS_ROOT
Expand Down Expand Up @@ -235,7 +236,7 @@ git submodule update --init --recursive
1. Optional: Install cuML
- Many users may wish to install cuML. Due to the complex dependency structure and versioning requirements, we need to specify exact versions of each package. The command to accomplish this is:
```bash
mamba install -c rapidsai -c nvidia -c conda-forge "cuda-python<=11.7.0" "libcusolver<=11.4.1.48" "libcusparse<12" cuml=22.10
mamba install -c rapidsai -c nvidia -c conda-forge cuml=23.02
```
1. Run Morpheus
```bash
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ Note the return tuple contains our newly constructed node, along with the unchan

![Morpheus node dependency diagram](img/sink_deps.png)

Similar to our previous examples, most of the actual business logic of the stage is contained in the `on_data` method. In this case, we grab a reference to the [cuDF](https://docs.rapids.ai/api/cudf/stable/) [DataFrame](https://docs.rapids.ai/api/cudf/stable/api_docs/dataframe.html) attached to the incoming message. We then serialize to an [io.StringIO](https://docs.python.org/3.8/library/io.html?highlight=stringio#io.StringIO) buffer, which is then sent to RabbitMQ.
Similar to our previous examples, most of the actual business logic of the stage is contained in the `on_data` method. In this case, we grab a reference to the [cuDF](https://docs.rapids.ai/api/cudf/stable/) [DataFrame](https://docs.rapids.ai/api/cudf/stable/api_docs/dataframe.html) attached to the incoming message. We then serialize to an [io.StringIO](https://docs.python.org/3.10/library/io.html?highlight=stringio#io.StringIO) buffer, which is then sent to RabbitMQ.

```python
def on_data(self, message: MessageMeta):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ The `DFPMLFlowModelWriterStage` ([examples/digital_fingerprinting/production/mor
| Argument | Type | Description |
| -------- | ---- | ----------- |
| `c` | `morpheus.config.Config` | Morpheus config object |
| `model_name_formatter` | `str` | Optional format string to control the name of models stored in MLflow, default is `dfp-{user_id}`. Currently available field names are: `user_id` and `user_md5` which is an md5 hexadecimal digest as returned by [`hash.hexdigest`](https://docs.python.org/3.8/library/hashlib.html?highlight=hexdigest#hashlib.hash.hexdigest). |
| `model_name_formatter` | `str` | Optional format string to control the name of models stored in MLflow, default is `dfp-{user_id}`. Currently available field names are: `user_id` and `user_md5` which is an md5 hexadecimal digest as returned by [`hash.hexdigest`](https://docs.python.org/3.10/library/hashlib.html?highlight=hexdigest#hashlib.hash.hexdigest). |
| `experiment_name_formatter` | `str` | Optional format string to control the experiment name for models stored in MLflow, default is `/dfp-models/{reg_model_name}`. Currently available field names are: `user_id`, `user_md5` and `reg_model_name` which is the model name as defined by `model_name_formatter` once the field names have been applied. |
| `databricks_permissions` | `dict` or `None` | Optional, when not `None` sets permissions needed when using a databricks hosted MLflow server |

Expand All @@ -352,7 +352,7 @@ For any user without an associated model in MLflow, the model for the generic us
| Argument | Type | Description |
| -------- | ---- | ----------- |
| `c` | `morpheus.config.Config` | Morpheus config object |
| `model_name_formatter` | `str` | Format string to control the name of models fetched from MLflow. Currently available field names are: `user_id` and `user_md5` which is an md5 hexadecimal digest as returned by [`hash.hexdigest`](https://docs.python.org/3.8/library/hashlib.html?highlight=hexdigest#hashlib.hash.hexdigest). |
| `model_name_formatter` | `str` | Format string to control the name of models fetched from MLflow. Currently available field names are: `user_id` and `user_md5` which is an md5 hexadecimal digest as returned by [`hash.hexdigest`](https://docs.python.org/3.10/library/hashlib.html?highlight=hexdigest#hashlib.hash.hexdigest). |

#### Filter Detection Stage (`FilterDetectionsStage`)
This stage filters the output from the inference stage for any anomalous messages. Logs which exceed the specified Z-Score will be passed onto the next stage. All remaining logs which are below the threshold will be dropped. For the purposes of the DFP pipeline, this stage is configured to use the `mean_abs_z` column of the DataFrame as the filter criteria.
Expand Down
65 changes: 53 additions & 12 deletions morpheus/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,73 @@
list(APPEND CMAKE_MESSAGE_CONTEXT "_lib")

########### morpheus ###########
set(MORPHEUS_LIB_ROOT ${CMAKE_CURRENT_SOURCE_DIR})

#----------morpheus_utils---------
include(cmake/libraries/morpheus_utils.cmake)

#----------cudf_helpers---------
include(cmake/libraries/cudf_helpers.cmake)
# This had to be inlined from cudf_helpers.cmake file because of scikit-build changes in
# https://github.com/scikit-build/scikit-build/pull/871
morpheus_add_cython_library(
cudf_helpers
PYX_FILE
"cudf_helpers.pyx"
INCLUDE_DIRS
"include"
LINK_TARGETS
cudf::cudf
Python::Module
Python::NumPy
OUTPUT_TARGET
cudf_helpers_target
)

execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)

target_include_directories(${cudf_helpers_target}
PRIVATE
"${PYARROW_INCLUDE_DIR}"
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/autogenerated/include> # Include the generated version file
)

# This target generates headers used by other parts of the code base.
# The C++ checks used in CI need these headers but don't require an actual build.
# The `morpheus_style_checks` target allows these to be generated without a full build of Morpheus.
add_dependencies(${PROJECT_NAME}_style_checks ${cudf_helpers_target})

# We don't have control over the C++ code that cython generates, suppress the volatile warning raised by the compiler
target_compile_options(${cudf_helpers_target} PRIVATE -Wno-volatile)

# Disable clang-tidy and IWYU for cython generated code
set_target_properties(
${cudf_helpers_target}
PROPERTIES
CXX_CLANG_TIDY ""
C_INCLUDE_WHAT_YOU_USE ""
CXX_INCLUDE_WHAT_YOU_USE ""
EXPORT_COMPILE_COMMANDS OFF
)

#----------lib_morpheus---------
include(cmake/libraries/morpheus.cmake)
include(cmake/libmorpheus.cmake)

########### py_morpheus ########
# Set the default link targets to avoid repeating this
morpheus_utils_python_package_set_default_link_targets(morpheus mrc::pymrc)
morpheus_utils_python_package_set_default_link_targets(morpheus)

# #----------morpheus._lib.common---------
morpheus_add_pybind11_module(common SOURCE_FILES common/module.cpp)

#----------morpheus._lib.stages---------
include(cmake/python_modules/stages.cmake)
morpheus_add_pybind11_module(stages SOURCE_FILES stages/module.cpp)

#----------morpheus._lib.messages---------
include(cmake/python_modules/messages.cmake)
morpheus_add_pybind11_module(messages SOURCE_FILES messages/module.cpp)

#----------morpheus._lib.modules---------
include(cmake/python_modules/modules.cmake)
morpheus_add_pybind11_module(modules SOURCE_FILES modules/module.cpp)

#----------morpheus._lib.common---------
include(cmake/python_modules/common.cmake)

if (MORPHEUS_BUILD_TESTS)
add_subdirectory(tests)
Expand Down
Loading

0 comments on commit 7d075f5

Please sign in to comment.