Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

updates for nm-magic-wand, nightly or release #247

Merged
merged 5 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/actions/nm-build-vllm/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ runs:
VENV="${{ inputs.venv }}-${COMMIT:0:7}"
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
# TODO: adjust when we need a proper release. use nightly now.
pip3 install nm-magic-wand-nightly
pip3 install -r requirements-cuda.txt -r requirements-build.txt
# build
SUCCESS=0
Expand Down
3 changes: 3 additions & 0 deletions .github/actions/nm-install-test-whl/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ runs:
pip3 install ${WHL}[sparse]
# report magic_wand version
MAGIC_WAND=$(pip3 show nm-magic-wand-nightly | grep "Version" | cut -d' ' -f2)
if [ -z "${MAGIC_WAND}" ]; then
MAGIC_WAND=$(pip3 show nm-magic-wand | grep "Version" | cut -d' ' -f2)
fi
echo "magic_wand=${MAGIC_WAND}" >> "$GITHUB_OUTPUT"
# test and collect code coverage
SUCCESS=0
Expand Down
17 changes: 14 additions & 3 deletions .github/actions/nm-set-env/action.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: set neuralmagic env
description: 'sets environment variables for neuralmagic'
inputs:
wf_category:
description: "categories: REMOTE, NIGHTLY, RELEASE"
required: true
hf_token:
description: 'Hugging Face home'
required: true
Expand All @@ -14,19 +17,27 @@ runs:
using: composite
steps:
- run: |
# setup.py defaults to making 'nightly' package with 'nightly' version
if [[ "${{inputs.wf_category}}" == "RELEASE" ]]; then
echo "NM_RELEASE_TYPE=${{inputs.wf_category}}" >> $GITHUB_ENV
fi
# CUDA
echo "TORCH_CUDA_ARCH_LIST=7.0 7.5 8.0 8.6 8.9 9.0+PTX" >> $GITHUB_ENV
echo "PATH=/usr/local/apps/pyenv/plugins/pyenv-virtualenv/shims:/usr/local/apps/pyenv/shims:/usr/local/apps/pyenv/bin:/usr/local/apps/nvm/versions/node/v19.9.0/bin:/usr/local/apps/nvm/versions/node/v16.20.2/bin:/usr/local/cuda-12.1/bin:/usr/local/cuda-12.1/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/${WHOAMI}/.local/bin:" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64::/usr/local/cuda-12.1/lib64:" >> $GITHUB_ENV
# HF Cache
echo "HF_TOKEN=${HF_TOKEN_SECRET}" >> $GITHUB_ENV
echo "HF_HOME=/EFS/hf_home" >> $GITHUB_ENV
# build
NUM_THREADS=$(./.github/scripts/determine-threading -G ${{ inputs.Gi_per_thread }})
echo "MAX_JOBS=${NUM_THREADS}" >> $GITHUB_ENV
echo "NVCC_THREADS=${{ inputs.nvcc_threads }}" >> $GITHUB_ENV
echo "VLLM_INSTALL_PUNICA_KERNELS=1" >> $GITHUB_ENV
echo "NCCL_IGNORE_DISABLED_P2P=1" >> $GITHUB_ENV
# pyenv
echo "PYENV_ROOT=/usr/local/apps/pyenv" >> $GITHUB_ENV
# testmo
echo "XDG_CONFIG_HOME=/usr/local/apps" >> $GITHUB_ENV
WHOAMI=$(whoami)
echo "PATH=/usr/local/apps/pyenv/plugins/pyenv-virtualenv/shims:/usr/local/apps/pyenv/shims:/usr/local/apps/pyenv/bin:/usr/local/apps/nvm/versions/node/v19.9.0/bin:/usr/local/apps/nvm/versions/node/v16.20.2/bin:/usr/local/cuda-12.1/bin:/usr/local/cuda-12.1/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/${WHOAMI}/.local/bin:" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64::/usr/local/cuda-12.1/lib64:" >> $GITHUB_ENV
echo "PROJECT_ID=12" >> $GITHUB_ENV
env:
HF_TOKEN_SECRET: ${{ inputs.hf_token }}
Expand Down
43 changes: 0 additions & 43 deletions .github/actions/nm-test-vllm/action.yml

This file was deleted.

13 changes: 1 addition & 12 deletions .github/scripts/build
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ usage() {
echo
echo "usage: ${0} <options>"
echo
echo " -a - pypi server address"
echo " -p - python version"
echo " -v - name for virtualenv"
echo " -h - this list of options"
Expand All @@ -17,15 +16,12 @@ PYPI_IP=
PYTHON=
VENV=

while getopts "ha:p:v:" OPT; do
while getopts "hp:v:" OPT; do
case "${OPT}" in
h)
usage
exit 1
;;
a)
PYPI_IP="${OPTARG}"
;;
p)
PYTHON="${OPTARG}"
;;
Expand All @@ -36,12 +32,6 @@ while getopts "ha:p:v:" OPT; do
done

# check if variables are valid
if [ -z "${PYPI_IP}" ]; then
echo "please provide 'pypi' server address"
usage
exit 1
fi

if [ -z "${PYTHON}" ]; then
echo "please provide python version, e.g. 3.10.12"
usage
Expand All @@ -55,6 +45,5 @@ if [ -z "${VENV}" ]; then
fi

source $(pyenv root)/versions/${PYTHON}/envs/${VENV}/bin/activate
pip3 install --index-url http://${PYPI_IP}:8080/ --trusted-host ${PYPI_IP} nm-magic-wand-nightly
pip3 install -r requirements-cuda.txt -r requirements-build.txt
pip3 install -e .
1 change: 1 addition & 0 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ jobs:
BUILD:
uses: ./.github/workflows/build.yml
with:
wf_category: ${{ inputs.wf_category }}
build_label: ${{ inputs.build_label }}
timeout: ${{ inputs.build_timeout }}
gitref: ${{ github.ref }}
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ on:
# makes workflow reusable
workflow_call:
inputs:
wf_category:
description: "categories: REMOTE, NIGHTLY, RELEASE"
type: string
default: "REMOTE"
build_label:
description: "requested runner label (specifies instance)"
type: string
Expand Down Expand Up @@ -31,6 +35,10 @@ on:
# makes workflow manually callable
workflow_dispatch:
inputs:
wf_category:
description: "categories: REMOTE, NIGHTLY, RELEASE"
type: string
default: "REMOTE"
build_label:
description: "requested runner label (specifies instance)"
type: string
Expand Down Expand Up @@ -83,6 +91,7 @@ jobs:
id: setenv
uses: ./.github/actions/nm-set-env/
with:
wf_category: ${{ inputs.wf_category }}
hf_token: ${{ secrets.NM_HF_TOKEN }}
Gi_per_thread: ${{ inputs.Gi_per_thread }}
nvcc_threads: ${{ inputs.nvcc_threads }}
Expand Down
25 changes: 0 additions & 25 deletions .github/workflows/gen-whl.yml

This file was deleted.

56 changes: 55 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,43 @@ concurrency:

jobs:

BUILD-TEST:
PYTHON-3-8:
uses: ./.github/workflows/build-test.yml
with:
wf_category: 'RELEASE'
python: 3.8.17
gitref: ${{ github.ref }}

test_label_solo: aws-avx2-32G-a10g-24G
test_label_multi: aws-avx2-192G-4-a10g-96G
test_timeout: 480
test_skip_list: neuralmagic/tests/skip-for-release.txt

benchmark_label: aws-avx2-32G-a10g-24G
benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
benchmark_timeout: 180
push_benchmark_results_to_gh_pages: ${{ inputs.push_benchmark_results_to_gh_pages }}
secrets: inherit

PYTHON-3-9:
uses: ./.github/workflows/build-test.yml
with:
wf_category: 'RELEASE'
python: 3.9.17
gitref: ${{ github.ref }}

test_label_solo: aws-avx2-32G-a10g-24G
test_label_multi: aws-avx2-192G-4-a10g-96G
test_timeout: 480
test_skip_list: neuralmagic/tests/skip-for-release.txt

benchmark_label: aws-avx2-32G-a10g-24G
benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
benchmark_timeout: 180
push_benchmark_results_to_gh_pages: ${{ inputs.push_benchmark_results_to_gh_pages }}
secrets: inherit

PYTHON-3-10:
uses: ./.github/workflows/build-test.yml
with:
wf_category: 'RELEASE'
Expand All @@ -34,3 +70,21 @@ jobs:
benchmark_timeout: 180
push_benchmark_results_to_gh_pages: ${{ inputs.push_benchmark_results_to_gh_pages }}
secrets: inherit

PYTHON-3-11:
uses: ./.github/workflows/build-test.yml
with:
wf_category: 'RELEASE'
python: 3.11.4
gitref: ${{ github.ref }}

test_label_solo: aws-avx2-32G-a10g-24G
test_label_multi: aws-avx2-192G-4-a10g-96G
test_timeout: 480
test_skip_list: neuralmagic/tests/skip-for-release.txt

benchmark_label: aws-avx2-32G-a10g-24G
benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
benchmark_timeout: 180
push_benchmark_results_to_gh_pages: ${{ inputs.push_benchmark_results_to_gh_pages }}
secrets: inherit
25 changes: 24 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# UPSTREAM SYNC: noqa is required for passing ruff.
# This file has been modified by Neural Magic

import datetime
import importlib.util
import io
import logging
Expand Down Expand Up @@ -306,9 +307,28 @@ def find_version(filepath: str) -> str:
raise RuntimeError("Unable to find version string.")


# Neuralmagic packaging ENV's
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could these be added to nm-vllm/vllm/envs.py?

This is the central location for env vars - perhaps not b/c this is setup.py, but just thought I would ask

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i can take a look

NM_RELEASE_TYPE = 'NM_RELEASE_TYPE'


def get_nm_vllm_package_name() -> str:
nm_release_type = os.getenv(NM_RELEASE_TYPE)
package_name = None
if nm_release_type == 'RELEASE':
package_name = 'nm-vllm'
else:
package_name = 'nm-vllm-nightly'
return package_name


def get_vllm_version() -> str:
version = find_version(get_path("vllm", "__init__.py"))

nm_release_type = os.getenv(NM_RELEASE_TYPE)
if nm_release_type != 'RELEASE':
date = datetime.date.today().strftime("%Y%m%d")
version += f'.{date}'

if _is_cuda():
cuda_version = str(get_nvcc_cuda_version())
if cuda_version != MAIN_CUDA_VERSION:
Expand Down Expand Up @@ -393,6 +413,9 @@ def _read_requirements(filename: str) -> List[str]:

# UPSTREAM SYNC: needed for sparsity
_sparsity_deps = ["nm-magic-wand-nightly"]
nm_release_type = os.getenv(NM_RELEASE_TYPE)
if nm_release_type == 'RELEASE':
_sparsity_deps = ["nm-magic-wand"]

package_data = {
"vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
Expand All @@ -402,7 +425,7 @@ def _read_requirements(filename: str) -> List[str]:
package_data["vllm"].append("*.so")

setup(
name="nm-vllm",
name=get_nm_vllm_package_name(),
version=get_vllm_version(),
author="vLLM Team, Neural Magic",
author_email="support@neuralmagic.com",
Expand Down
Loading