-
Notifications
You must be signed in to change notification settings - Fork 178
Optimum amd support #464
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimum amd support #464
Changes from all commits
6e47aef
be7cd61
8ceeab0
987d8c5
8f88be1
129fd4a
0e8b81b
16bb74f
93d7c27
83ec2f1
30da7fb
ae76a0c
829c8a4
d204b8a
018cbb0
e6d3fa4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,7 +33,42 @@ amd: | |
| # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all" | ||
| COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh | ||
| RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/rocm6.2" | ||
| poetry_extras: "all onnxruntime-gpu" | ||
| extra_installs_main: | | ||
| ARG GPU_ARCH | ||
| ENV GPU_ARCH=${GPU_ARCH} | ||
| # GPU architecture specific installations | ||
| RUN cd /opt/rocm/share/amd_smi && python -m pip wheel . --wheel-dir=/install | ||
| RUN apt update -y && apt install migraphx -y | ||
| RUN if [ "$GPU_ARCH" = "gfx90a" ] || [ "$GPU_ARCH" = "gfx942" ]; then \ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: Consider adding error handling and logging for failed installations. The script continues silently if any of the installation steps fail. |
||
| # OPTION1: Follow the steps here to install onnxruntime-rocm | ||
| # https://huggingface.co/docs/optimum/onnxruntime/usage_guides/amdgpu | ||
| . .venv/bin/activate && python -m pip uninstall onnxruntime -y \ | ||
| && python -m pip install /install/*.whl \ | ||
| && python -m pip install cmake onnx \ | ||
| && (curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y) \ | ||
| && (. $HOME/.cargo/env) \ | ||
| && git clone --single-branch --branch main --recursive https://github.com/Microsoft/onnxruntime onnxruntime \ | ||
| && cd onnxruntime \ | ||
| && (./build.sh --config Release --build_wheel --allow_running_as_root --update --build --parallel --cmake_extra_defines CMAKE_HIP_ARCHITECTURES=${GPU_ARCH} ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER) --use_rocm --use_migraphx --rocm_home=/opt/rocm) \ | ||
michaelfeil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| && python -m pip uninstall onnxruntime -y \ | ||
| && python -m pip install build/Linux/Release/dist/* \ | ||
| && cp -r /app/onnxruntime/build/Linux/Release/*.so /usr/local/lib/ \ | ||
| && cp -r /app/onnxruntime/build/Linux/Release/*.so.* /usr/local/lib/ \ | ||
michaelfeil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| && git clone https://github.com/huggingface/optimum-amd.git \ | ||
| && cd optimum-amd \ | ||
| && python -m pip install -e .; \ | ||
| elif [ "$GPU_ARCH" = "gfx1100" ]; then \ | ||
| # OPTION2: Install onnxruntime-rocm from the wheel | ||
| . .venv/bin/activate && python -m pip uninstall onnxruntime onnxruntime-rocm -y && python -m pip install "numpy<2" https://repo.radeon.com/rocm/manylinux/rocm-rel-6.2.3/onnxruntime_rocm-1.18.0-cp310-cp310-linux_x86_64.whl \ | ||
| && python -m pip install /install/*.whl \ | ||
| && git clone https://github.com/huggingface/optimum-amd.git /tmp-optimum \ | ||
| && cd /tmp-optimum \ | ||
| && python -m pip install .; \ | ||
| else \ | ||
| echo "Unsupported GPU_ARCH: ${GPU_ARCH}"; \ | ||
| exit(1); \ | ||
| fi | ||
| poetry_extras: "all" | ||
| python_version: python3.10 | ||
| extra_env_variables: | | ||
| # RUN conda init --reverse --all | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,8 @@ | |
| from huggingface_hub import HfApi, HfFolder # type: ignore | ||
| from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE # type: ignore | ||
|
|
||
| from infinity_emb._optional_imports import CHECK_ONNXRUNTIME | ||
| from infinity_emb._optional_imports import CHECK_ONNXRUNTIME, CHECK_OPTIMUM_AMD | ||
|
|
||
| from infinity_emb.log_handler import logger | ||
| from infinity_emb.primitives import Device | ||
|
|
||
|
|
@@ -57,6 +58,8 @@ def device_to_onnx(device: Device) -> str: | |
| elif device == Device.cuda: | ||
| if "ROCMExecutionProvider" in available: | ||
| return "ROCMExecutionProvider" | ||
| elif "MIGraphXExecutionProvider" in available: | ||
| return "MIGraphXExecutionProvider" | ||
| return "CUDAExecutionProvider" | ||
| elif device == Device.mps: | ||
| return "CoreMLExecutionProvider" | ||
|
|
@@ -67,6 +70,8 @@ def device_to_onnx(device: Device) -> str: | |
| return "TensorrtExecutionProvider" | ||
| elif "CUDAExecutionProvider" in available: | ||
| return "CUDAExecutionProvider" | ||
| elif "MIGraphXExecutionProvider" in available: | ||
| return "MIGraphXExecutionProvider" # swapped order of ROCM and MIGraphX | ||
| elif "ROCMExecutionProvider" in available: | ||
|
Comment on lines
+73
to
75
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: MIGraphX is prioritized over ROCM here but reversed in the CUDA section above. Consider using consistent ordering. |
||
| return "ROCMExecutionProvider" | ||
| elif "CoreMLExecutionProvider" in available: | ||
|
|
@@ -100,12 +105,8 @@ def optimize_model( | |
| revision (Optional[str], optional): The revision to use. Defaults to None. | ||
| trust_remote_code (bool, optional): Whether to trust the remote code. Defaults to True. | ||
| """ | ||
| CHECK_ONNXRUNTIME.mark_required() | ||
| path_folder = ( | ||
| Path(HUGGINGFACE_HUB_CACHE) / "infinity_onnx" / execution_provider / model_name_or_path | ||
| ) | ||
| OPTIMIZED_SUFFIX = "_optimized.onnx" | ||
| files_optimized = list(path_folder.glob(f"**/*{OPTIMIZED_SUFFIX}")) | ||
|
|
||
| ## If there is no need for optimization | ||
| if execution_provider == "TensorrtExecutionProvider": | ||
| return model_class.from_pretrained( | ||
| model_name_or_path, | ||
|
|
@@ -123,8 +124,28 @@ def optimize_model( | |
| # "trt_int8_enable": "quantize" in file_name, | ||
| }, | ||
| ) | ||
|
|
||
| elif execution_provider in ["ROCMExecutionProvider", "MIGraphXExecutionProvider"]: | ||
| CHECK_OPTIMUM_AMD.mark_required() | ||
| return model_class.from_pretrained( | ||
| model_name_or_path, | ||
| revision=revision, | ||
| trust_remote_code=trust_remote_code, | ||
| provider=execution_provider, | ||
| file_name=file_name, | ||
| ) | ||
|
|
||
| ## path to find if model has been optimized | ||
| CHECK_ONNXRUNTIME.mark_required() | ||
| path_folder = ( | ||
| Path(HUGGINGFACE_HUB_CACHE) / "infinity_onnx" / execution_provider / model_name_or_path | ||
| ) | ||
| OPTIMIZED_SUFFIX = "_optimized.onnx" | ||
| files_optimized = list(path_folder.glob(f"**/*{OPTIMIZED_SUFFIX}")) | ||
|
|
||
| logger.info(f"files_optimized: {files_optimized}") | ||
| if files_optimized: | ||
| file_optimized = files_optimized[0] | ||
| file_optimized = files_optimized[-1] | ||
|
Comment on lines
147
to
+148
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: Using files_optimized[-1] could be unstable if multiple optimized versions exist. Consider using version sorting or timestamps. |
||
| logger.info(f"Optimized model found at {file_optimized}, skipping optimization") | ||
| return model_class.from_pretrained( | ||
| file_optimized.parent.as_posix(), | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.