Add build_native.sh and add README.md (#481)

larryliu0820 · malfet · commit dd5df6588d60 · 2024-07-16T22:53:17.000-07:00
* Add build_native.sh and add README.md

Summary:

Added a script to build C++ runner for ET and AOTI. Updated README.md to
ask users to run it.

Made some improvement on building speed, by reducing duplicate build
command. Now we can rely on `install_requirements.sh` to install all of
the C++ libraries needed for runner.

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Revert custom ops change

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Add build_native.sh to CI job

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Add README for building native runner for aoti

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -246,7 +246,7 @@ jobs:
         export REPO_NAME=${{ matrix.repo_name }}
         bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
         echo "::endgroup::"
-      
+
         echo "::group::Convert checkpoint"
         bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
         echo "::endgroup::"
@@ -838,13 +838,11 @@ jobs:
           pip install -r requirements.txt
 
           export TORCHCHAT_ROOT=${PWD}
-          export ENABLE_ET_PYBIND=false
-          ./scripts/install_et.sh $ENABLE_ET_PYBIND
+          bash scripts/build_native.sh et
           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
           python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'
           python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")'
-          cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
-          cmake --build ./cmake-out --target et_run
+
       - name: Download checkpoints
         run: |
 
@@ -891,8 +889,8 @@ jobs:
           pip install -r requirements.txt
           pip list
 
-          cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
-          cmake --build ./cmake-out --target aoti_run
+          bash scripts/build_native.sh aoti
+
       - name: Download checkpoint
         run: |
           mkdir -p checkpoints/stories15M
diff --git a/README.md b/README.md
@@ -73,11 +73,10 @@ with `python3 torchchat.py remove llama3`.
   * [Run exported .so file via your own C++ application](#run-server)
      * in Chat mode
      * in Generate mode
-  * [Export for mobile via ExecuTorch](#export-executorch)
+  * [Export for mobile via ExecuTorch](#exporting-for-mobile-via-executorch)
+  * [Run exported ExecuTorch file on iOS or Android](#mobile-execution)
      * in Chat mode
      * in Generate mode
-  * [Run exported ExecuTorch file on iOS or Android](#run-mobile)
-
 
 ## Running via PyTorch / Python
 
@@ -235,6 +234,20 @@ python3 torchchat.py generate --dso-path stories15M.so --prompt "Hello my name i
 
 NOTE: The exported model will be large. We suggest you quantize the model, explained further down, before deploying the model on device.
 
+**Build Native Runner Binary**
+
+We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.so` file exported after following the previous [examples](#aoti-aot-inductor) section. To build the runner binary on your Mac or Linux:
+
+```bash
+scripts/build_native.sh aoti
+```
+
+Run:
+
+```bash
+cmake-out/aoti_run model.so -z tokenizer.model -i "Once upon a time"
+```
+
 ### ExecuTorch
 
 ExecuTorch enables you to optimize your model for execution on a mobile or embedded device, but can also be used on desktop for testing.
@@ -250,7 +263,7 @@ python3 torchchat.py export stories15M --output-pte-path stories15M.pte
 python3 torchchat.py generate --device cpu --pte-path stories15M.pte --prompt "Hello my name is"
 ```
 
-See below under [Mobile Execution](#run-mobile) if you want to deploy and execute a model in your iOS or Android app.
+See below under [Mobile Execution](#mobile-execution) if you want to deploy and execute a model in your iOS or Android app.
 
 
 
@@ -265,6 +278,20 @@ Read the [iOS documentation](docs/iOS.md) for more details on iOS.
 
 Read the [Android documentation](docs/Android.md) for more details on Android.
 
+**Build Native Runner Binary**
+
+We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.pte` file exported after following the previous [ExecuTorch](#executorch) section. Notice that this binary is for demo purpose, please follow the respective documentations, to see how to build a similar application on iOS and Android. To build the runner binary on your Mac or Linux:
+
+```bash
+scripts/build_native.sh et
+```
+
+Run:
+
+```bash
+cmake-out/et_run model.pte -z tokenizer.model -i "Once upon a time"
+```
+
 ## Fine-tuned models from torchtune
 
 torchchat supports running inference with models fine-tuned using [torchtune](https://github.com/pytorch/torchtune). To do so, we first need to convert the checkpoints into a format supported by torchchat.
diff --git a/runner/aoti.cmake b/runner/aoti.cmake
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 IF(DEFINED ENV{TORCHCHAT_ROOT})
     set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
 ELSE()
-    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
+    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 ENDIF()
 
 find_package(CUDA)
diff --git a/runner/et.cmake b/runner/et.cmake
@@ -15,24 +15,27 @@ ELSE()
   set(CMAKE_OUT_DIR "cmake-out")
 ENDIF()
 
-MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")
-
 IF(DEFINED ENV{TORCHCHAT_ROOT})
     set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
 ELSE()
-    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
+    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 ENDIF()
 
 project(Torchchat)
 
+IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  SET(CMAKE_INSTALL_PREFIX ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install CACHE PATH "Setting it to a default value" FORCE)
+ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+
 include(CMakePrintHelpers)
 include(runner/Utils.cmake)
 
 cmake_print_variables(TORCHCHAT_ROOT)
 
-MESSAGE(STATUS "Looking for excutorch in ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch")
-set(executorch_DIR ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch)
-find_package(executorch CONFIG PATHS ${executorch_DIR})
+MESSAGE(STATUS "Looking for excutorch in ${CMAKE_INSTALL_PREFIX}")
+
+find_package(executorch CONFIG HINTS ${CMAKE_INSTALL_PREFIX})
+
 if(executorch_FOUND)
   set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src)
 
@@ -46,21 +49,21 @@ if(executorch_FOUND)
   # Link ET runtime + extensions
   target_link_libraries(
     et_run PRIVATE
-          executorch
-          extension_module
-          ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch
-          optimized_kernels
-          quantized_kernels
-          portable_kernels
-          cpublas
-          eigen_blas
-          # The libraries below need to be whole-archived linked
-          optimized_native_cpu_ops_lib
-          quantized_ops_lib
-          xnnpack_backend
-          XNNPACK
-          pthreadpool
-          cpuinfo
+    executorch
+    extension_module
+    extension_data_loader
+    optimized_kernels
+    quantized_kernels
+    portable_kernels
+    cpublas
+    eigen_blas
+    # The libraries below need to be whole-archived linked
+    optimized_native_cpu_ops_lib
+    quantized_ops_lib
+    xnnpack_backend
+    XNNPACK
+    pthreadpool
+    cpuinfo
   )
   target_link_options_shared_lib(optimized_native_cpu_ops_lib)
   target_link_options_shared_lib(quantized_ops_lib)
@@ -73,8 +76,7 @@ if(executorch_FOUND)
   endif()
 
   target_link_libraries(et_run PRIVATE
-  "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")
-
+  "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/libcustom_ops.a>")
   # This one is needed for cpuinfo where it uses android specific log lib
   if(ANDROID)
     target_link_libraries(et_run PRIVATE log)
diff --git a/scripts/build_native.sh b/scripts/build_native.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Simple script to build native aoti and et runner
+# Function to display a help message
+
+set -ex
+
+show_help() {
+cat << EOF
+Usage: ${0##*/} [-h|--help] aoti|et
+This script builds native aoti and et runner for LLM.
+    -h|--help  Display this help and exit
+    aoti       Build native runner for aoti
+    et         Build native runner for et
+EOF
+}
+# Check if no arguments were passed
+if [ $# -eq 0 ]; then
+    echo "No arguments provided"
+    show_help
+    exit 1
+fi
+while (( "$#" )); do
+  case "$1" in
+    -h|--help)
+      show_help
+      exit 0
+      ;;
+    aoti)
+      echo "Building aoti native runner..."
+      TARGET="aoti"
+      shift
+      ;;
+    et)
+      echo "Building et native runner..."
+      TARGET="et"
+      shift
+      ;;
+    *)
+      echo "Invalid option: $1"
+      show_help
+      exit 1
+      ;;
+  esac
+done
+
+if [ -z "${TORCHCHAT_ROOT}" ]; then
+    # Get the absolute path of the current script
+    SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+    # Get the absolute path of the parent directory
+    TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")"
+fi
+
+if [ -z "${ET_BUILD_DIR}" ]; then
+    ET_BUILD_DIR="et-build"
+fi
+
+source "$TORCHCHAT_ROOT/scripts/install_utils.sh"
+
+if [[ "$TARGET" == "et" ]]; then
+    pushd ${TORCHCHAT_ROOT}
+    git submodule update --init
+    find_cmake_prefix_path
+    install_pip_dependencies
+    clone_executorch
+    install_executorch_libs false
+    popd
+fi
+
+# CMake commands
+cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
+cmake --build ./cmake-out --target "${TARGET}"_run
+
+printf "Build finished. Please run: \n./cmake-out model.<pte|so> -z tokenizer.model -i <prompt>"
diff --git a/scripts/install_et.sh b/scripts/install_et.sh
@@ -19,6 +19,5 @@ pushd ${TORCHCHAT_ROOT}
 find_cmake_prefix_path
 install_pip_dependencies
 clone_executorch
-install_executorch_python_libs $ENABLE_ET_PYBIND
-install_executorch
+install_executorch_libs $ENABLE_ET_PYBIND
 popd
diff --git a/scripts/install_utils.sh b/scripts/install_utils.sh
@@ -9,11 +9,7 @@ set -ex pipefail
 
 install_pip_dependencies() {
   echo "Intalling common pip packages"
-
-  pip3 install wheel
-  pip3 install "cmake>=3.19"
-  pip3 install ninja
-  pip3 install zstd
+  pip3 install wheel "cmake>=3.19" ninja zstd
   pushd ${TORCHCHAT_ROOT}
   pip3 install -r ./requirements.txt
   popd
@@ -60,6 +56,15 @@ install_executorch_python_libs() {
   popd
 }
 
+COMMON_CMAKE_ARGS="\
+    -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_ENABLE_LOGGING=ON \
+    -DEXECUTORCH_LOG_LEVEL=Info \
+    -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_QUANTIZED=ON"
+
 install_executorch() {
   # AOT lib has to be build for model export
   # So by default it is built, and you can explicitly opt-out
@@ -96,20 +101,25 @@ install_executorch() {
   echo "Inside: ${PWD}"
   rm -rf ${CMAKE_OUT_DIR}
   mkdir ${CMAKE_OUT_DIR}
-  cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_ENABLE_LOGGING=ON \
-        -DEXECUTORCH_LOG_LEVEL=Info \
+  cmake ${COMMON_CMAKE_ARGS} \
+        -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
         -DEXECUTORCH_BUILD_CUSTOM_OPS_AOT=${EXECUTORCH_BUILD_CUSTOM_OPS_AOT_VAR} \
         -DEXECUTORCH_BUILD_CUSTOM=${EXECUTORCH_BUILD_CUSTOM_VAR} \
-        -DEXECUTORCH_BUILD_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_XNNPACK=ON \
-        -DEXECUTORCH_BUILD_QUANTIZED=ON \
         ${CROSS_COMPILE_ARGS} \
         -S . -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build ${CMAKE_OUT_DIR}
   cmake --install ${CMAKE_OUT_DIR} --prefix ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install
   popd
 }
+
+install_executorch_libs() {
+  # Install executorch python and C++ libs
+  export CMAKE_ARGS="\
+    ${COMMON_CMAKE_ARGS} \
+    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+    -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install"
+  export CMAKE_BUILD_ARGS="--target install"
+
+  install_executorch_python_libs $1
+}