diff --git a/.ci/scripts/test_hf_model.sh b/.ci/scripts/test_hf_model.sh new file mode 100644 index 00000000000..cab7d88f920 --- /dev/null +++ b/.ci/scripts/test_hf_model.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +# shellcheck source=/dev/null +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + +# Input parameter: Hugging Face model repo (e.g., 'google/gemma-2b') +HF_MODEL_REPO=$1 +UPLOAD_DIR=${2:-} + +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then + PYTHON_EXECUTABLE=python +fi +which "${PYTHON_EXECUTABLE}" + +# Extract the model name from the HF_MODEL_REPO by splitting on '/' and replacing '_' with '-' +ET_MODEL_NAME=$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g') +# Add the suffix "_xnnpack_fp32" to the model name (currently supported delegate and dtype) +OUT_ET_MODEL_NAME="${ET_MODEL_NAME}_xnnpack_fp32" + +# Files to be handled +TOKENIZER_FILE="tokenizer.model" +OUT_TOKENIZER_BIN_FILE="tokenizer.bin" + +# Download the tokenizer model using Hugging Face hub +DOWNLOADED_TOKENIZER_FILE_PATH=$("${PYTHON_EXECUTABLE}" << EOF +from huggingface_hub import hf_hub_download + +# Download the tokenizer file from the Hugging Face Hub +try: + downloaded_path = hf_hub_download( + repo_id='${HF_MODEL_REPO}', + filename='${TOKENIZER_FILE}' + ) + print(downloaded_path) +except Exception as e: + print(f"Error: {str(e)}") +EOF +) + +# Check if the tokenizer file was successfully downloaded +if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then + echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH" + + # Convert the tokenizer to binary using the Python module + echo "Convert the tokenizer to binary format" + "${PYTHON_EXECUTABLE}" -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH" -o "./${OUT_TOKENIZER_BIN_FILE}" + ls "./${OUT_TOKENIZER_BIN_FILE}" +else + echo "Failed to download ${TOKENIZER_FILE} from ${HF_MODEL_REPO}." + exit 1 +fi + +# Export the Hugging Face model +echo "Export the Hugging Face model ${HF_MODEL_REPO} to ExecuTorch" +"${PYTHON_EXECUTABLE}" -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME" +ls -All "./${OUT_ET_MODEL_NAME}.pte" + +if [ -n "$UPLOAD_DIR" ]; then + echo "Preparing for uploading generated artifacs" + zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${OUT_TOKENIZER_BIN_FILE}" + mkdir -p "${UPLOAD_DIR}" + mv model.zip "${UPLOAD_DIR}" +fi + +cmake_install_executorch_libraries() { + echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" + rm -rf cmake-out + retry cmake \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ + -Bcmake-out . + cmake --build cmake-out -j9 --target install --config Release +} + +cmake_build_llama_runner() { + echo "Building llama runner" + dir="examples/models/llama2" + retry cmake \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ + -Bcmake-out/${dir} \ + ${dir} + cmake --build cmake-out/${dir} -j9 --config Release +} + +cmake_install_executorch_libraries +cmake_build_llama_runner + +./cmake-out/examples/models/llama2/llama_main --model_path="${OUT_ET_MODEL_NAME}.pte" --tokenizer_path="${OUT_TOKENIZER_BIN_FILE}" --prompt="My name is" diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index 6769c117af6..9ecfe6883e9 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -157,7 +157,11 @@ jobs: BUILD_MODE="cmake" DTYPE="fp32" - if [[ ${{ matrix.model }} =~ ^stories* ]]; then + if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then + # HuggingFace model. Assume the pattern is always like "/" + HF_MODEL_REPO=${{ matrix.model }} + PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.model }} ${ARTIFACTS_DIR_NAME} + elif [[ ${{ matrix.model }} =~ ^stories* ]]; then # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh # Test llama2 diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index e214e33ac1c..ae754e1b1e4 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -157,6 +157,11 @@ jobs: BUILD_MODE="cmake" DTYPE="fp32" + if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then + # HuggingFace model. Assume the pattern is always like "/" + HF_MODEL_REPO=${{ matrix.model }} + PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.model }} ${ARTIFACTS_DIR_NAME} + elif [[ ${{ matrix.model }} =~ ^stories* ]]; then if [[ ${{ matrix.model }} =~ ^stories* ]]; then # Install requirements for export_llama PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 9d50420e9f9..d781dbc6d17 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -373,36 +373,6 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake - - echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" - rm -rf cmake-out - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DPYTHON_EXECUTABLE=python \ - -Bcmake-out . - cmake --build cmake-out -j9 --target install --config Release - - echo "Build llama runner" - dir="examples/models/llama2" - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DPYTHON_EXECUTABLE=python \ - -Bcmake-out/${dir} \ - ${dir} - cmake --build cmake-out/${dir} -j9 --config Release echo "::endgroup::" echo "::group::Set up HuggingFace Dependencies" @@ -415,29 +385,6 @@ jobs: echo "::endgroup::" echo "::group::Export to ExecuTorch" - TOKENIZER_FILE=tokenizer.model - TOKENIZER_BIN_FILE=tokenizer.bin - ET_MODEL_NAME=et_model - # Fetch the file using a Python one-liner - DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c " - from huggingface_hub import hf_hub_download - # Download the file from the Hugging Face Hub - downloaded_path = hf_hub_download( - repo_id='${{ matrix.hf_model_repo }}', - filename='${TOKENIZER_FILE}' - ) - print(downloaded_path) - ") - if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then - echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH" - python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE} - ls ./tokenizer.bin - else - echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}." - exit 1 - fi - - python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME} - - cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is" + # HuggingFace model. Assume the pattern is always like "/" + PYTHON_EXECUTABLE=python bash .ci/scripts/test_hf_model.sh ${{ matrix.hf_model_repo }} echo "::endgroup::"