Skip to content

Commit

Permalink
Add compatible HuggingFace models to benchmark workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
Guang Yang committed Sep 13, 2024
1 parent c707e4c commit e81766a
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 56 deletions.
109 changes: 109 additions & 0 deletions .ci/scripts/test_hf_model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

# Input parameter: Hugging Face model repo (e.g., 'google/gemma-2b')
HF_MODEL_REPO=$1
UPLOAD_DIR=${2:-}

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python
fi
which "${PYTHON_EXECUTABLE}"

# Extract the model name from the HF_MODEL_REPO by splitting on '/' and replacing '_' with '-'
ET_MODEL_NAME=$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g')
# Add the suffix "_xnnpack_fp32" to the model name (currently supported delegate and dtype)
OUT_ET_MODEL_NAME="${ET_MODEL_NAME}_xnnpack_fp32"

# Files to be handled
TOKENIZER_FILE="tokenizer.model"
OUT_TOKENIZER_BIN_FILE="tokenizer.bin"

# Download the tokenizer model using Hugging Face hub
DOWNLOADED_TOKENIZER_FILE_PATH=$("${PYTHON_EXECUTABLE}" << EOF
from huggingface_hub import hf_hub_download
# Download the tokenizer file from the Hugging Face Hub
try:
downloaded_path = hf_hub_download(
repo_id='${HF_MODEL_REPO}',
filename='${TOKENIZER_FILE}'
)
print(downloaded_path)
except Exception as e:
print(f"Error: {str(e)}")
EOF
)

# Check if the tokenizer file was successfully downloaded
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"

# Convert the tokenizer to binary using the Python module
echo "Convert the tokenizer to binary format"
"${PYTHON_EXECUTABLE}" -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH" -o "./${OUT_TOKENIZER_BIN_FILE}"
ls "./${OUT_TOKENIZER_BIN_FILE}"
else
echo "Failed to download ${TOKENIZER_FILE} from ${HF_MODEL_REPO}."
exit 1
fi

# Export the Hugging Face model
echo "Export the Hugging Face model ${HF_MODEL_REPO} to ExecuTorch"
"${PYTHON_EXECUTABLE}" -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME"
ls -All "./${OUT_ET_MODEL_NAME}.pte"

if [ -n "$UPLOAD_DIR" ]; then
echo "Preparing for uploading generated artifacs"
zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${OUT_TOKENIZER_BIN_FILE}"
mkdir -p "${UPLOAD_DIR}"
mv model.zip "${UPLOAD_DIR}"
fi

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
retry cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
}

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
retry cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release
}

cmake_install_executorch_libraries
cmake_build_llama_runner

./cmake-out/examples/models/llama2/llama_main --model_path="${OUT_ET_MODEL_NAME}.pte" --tokenizer_path="${OUT_TOKENIZER_BIN_FILE}" --prompt="My name is"
6 changes: 5 additions & 1 deletion .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,11 @@ jobs:
BUILD_MODE="cmake"
DTYPE="fp32"
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then
# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
HF_MODEL_REPO=${{ matrix.model }}
PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.model }} ${ARTIFACTS_DIR_NAME}
elif [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
# Test llama2
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/apple-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ jobs:
BUILD_MODE="cmake"
DTYPE="fp32"
if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then
# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
HF_MODEL_REPO=${{ matrix.model }}
PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.model }} ${ARTIFACTS_DIR_NAME}
elif [[ ${{ matrix.model }} =~ ^stories* ]]; then
if [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
Expand Down
57 changes: 2 additions & 55 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -373,36 +373,6 @@ jobs:
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DPYTHON_EXECUTABLE=python \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
echo "Build llama runner"
dir="examples/models/llama2"
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DPYTHON_EXECUTABLE=python \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release
echo "::endgroup::"
echo "::group::Set up HuggingFace Dependencies"
Expand All @@ -415,29 +385,6 @@ jobs:
echo "::endgroup::"
echo "::group::Export to ExecuTorch"
TOKENIZER_FILE=tokenizer.model
TOKENIZER_BIN_FILE=tokenizer.bin
ET_MODEL_NAME=et_model
# Fetch the file using a Python one-liner
DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "
from huggingface_hub import hf_hub_download
# Download the file from the Hugging Face Hub
downloaded_path = hf_hub_download(
repo_id='${{ matrix.hf_model_repo }}',
filename='${TOKENIZER_FILE}'
)
print(downloaded_path)
")
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
ls ./tokenizer.bin
else
echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
exit 1
fi
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.hf_model_repo }}
echo "::endgroup::"

0 comments on commit e81766a

Please sign in to comment.