-
Notifications
You must be signed in to change notification settings - Fork 607
Add quantized op support to llama runner #3062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f6b2fa6
a0e1193
6670a16
020dc4e
38cfb8d
3291cb7
41abbb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" | |
MODEL_NAME=$1 # stories110M.pt | ||
BUILD_TOOL=$2 # buck2 or cmake | ||
DTYPE=$3 # fp16 or fp32 | ||
MODE=${4:-"xnnpack"} # portable or xnnpack | ||
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe | ||
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args | ||
echo "Expecting atleast 4 positional arguments" | ||
echo "Usage: [...]" | ||
|
@@ -37,7 +37,7 @@ if [[ -z "${MODE:-}" ]]; then | |
exit 1 | ||
fi | ||
|
||
if [[ "${MODE}" =~ xnnpack.* ]]; then | ||
if [[ "${MODE}" =~ .*xnnpack.* ]]; then | ||
XNNPACK=ON | ||
else | ||
XNNPACK=OFF | ||
|
@@ -49,6 +49,12 @@ else | |
CUSTOM=OFF | ||
fi | ||
|
||
if [[ "${MODE}" =~ .*qe.* ]]; then | ||
QE=ON | ||
else | ||
QE=OFF | ||
fi | ||
|
||
if [[ -z "${BUCK:-}" ]]; then | ||
BUCK=buck2 | ||
fi | ||
|
@@ -84,7 +90,6 @@ cmake_build_llama_runner() { | |
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \ | ||
-DEXECUTORCH_BUILD_OPTIMIZED=ON \ | ||
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ | ||
-DEXECUTORCH_BUILD_OPTIMIZED=ON \ | ||
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ | ||
-Bcmake-out/${dir} \ | ||
${dir} | ||
|
@@ -126,9 +131,15 @@ fi | |
# Export model. | ||
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte" | ||
echo "Exporting ${EXPORTED_MODEL_NAME}" | ||
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}" | ||
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then | ||
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128" | ||
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv" | ||
if [[ "${XNNPACK}" == "ON" ]]; then | ||
EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128" | ||
fi | ||
if [[ "${CUSTOM}" == "ON" ]]; then | ||
EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache" | ||
fi | ||
if [[ "${QE}" == "ON" ]]; then | ||
EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for adding tests! |
||
fi | ||
# Add dynamically linked library location | ||
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..) | |
set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch) | ||
|
||
include(${EXECUTORCH_ROOT}/build/Utils.cmake) | ||
include(${EXECUTORCH_ROOT}/build/Codegen.cmake) | ||
|
||
if(NOT PYTHON_EXECUTABLE) | ||
resolve_python_executable() | ||
|
@@ -91,6 +92,7 @@ add_subdirectory(runner) | |
if(EXECUTORCH_USE_TIKTOKEN) | ||
# find RE2 for tokenizer | ||
set(ABSL_ENABLE_INSTALL ON) | ||
set(ABSL_PROPAGATE_CXX_STD ON) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh we depend on abseil for tiktoken? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, tiktoken -> re2 -> abseil There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no tests using this path yet right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not yet |
||
set(_pic_flag | ||
${CMAKE_POSITION_INDEPENDENT_CODE}) | ||
set(CMAKE_POSITION_INDEPENDENT_CODE ON) | ||
|
@@ -118,6 +120,26 @@ else() | |
target_link_options_shared_lib(portable_ops_lib) | ||
endif() | ||
|
||
# quantized ops yaml file operation | ||
merge_yaml( | ||
FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/ops/quantized.yaml | ||
FALLBACK_YAML ${EXECUTORCH_ROOT}/kernels/quantized/quantized.yaml | ||
OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) | ||
|
||
gen_selected_ops("${CMAKE_CURRENT_BINARY_DIR}/merged.yaml" "" "") | ||
generate_bindings_for_kernels( | ||
FUNCTIONS_YAML ${CMAKE_CURRENT_BINARY_DIR}/merged.yaml) | ||
message("Generated files ${gen_command_sources}") | ||
|
||
# quantized_merge_ops_lib: Register quantized op kernels into the runtime | ||
gen_operators_lib( | ||
"quantized_merge_ops_lib" | ||
KERNEL_LIBS quantized_kernels | ||
DEPS executorch) | ||
target_include_directories(quantized_merge_ops_lib PUBLIC ${_common_include_directories}) | ||
target_link_options_shared_lib(quantized_merge_ops_lib) | ||
list(APPEND link_libraries quantized_kernels quantized_merge_ops_lib) | ||
|
||
if(EXECUTORCH_BUILD_CUSTOM) | ||
target_link_options_shared_lib(custom_ops) | ||
list(APPEND link_libraries custom_ops) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: does += operator work?