Skip to content

Commit 781f1ae

Browse files
committed
Update on "[Executorch] parallelize op_choose_qparams"
When doing prefill for quantized kv cache, with large prefill length, parallelizing this op helps. Differential Revision: [D84962234](https://our.internmc.facebook.com/intern/diff/D84962234/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D84962234/)! [ghstack-poisoned]
2 parents c69dd4b + b370f31 commit 781f1ae

File tree

191 files changed

+5529
-2527
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

191 files changed

+5529
-2527
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
4361747abfc55e40e929396ed986efe775d745f9
1+
d03e90c2cd9048e6d9a75285c0355f033cd016fc
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
556fc09a9f67f24ca5591ec049c5d0c347c5f62a
1+
b31bad1b8f1331bf43d47f46602cf6141db56844

.ci/docker/requirements-ci.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ sphinx-reredirects==0.1.4
3030
matplotlib>=3.9.4
3131
sphinx-copybutton==0.5.2
3232
# PyTorch Theme
33-
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
34-
33+
pytorch_sphinx_theme2==0.2.0
3534
# script unit test requirements
3635
yaspin==3.1.0

.ci/scripts/export_model_cuda_artifact.sh

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Arguments:
1717
hf_model HuggingFace model ID (required)
1818
Supported models:
1919
- mistralai/Voxtral-Mini-3B-2507
20-
- openai/whisper-small
20+
- openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
2121
- google/gemma-3-4b-it
2222
2323
quant_name Quantization type (optional, default: non-quantized)
@@ -62,13 +62,17 @@ case "$HF_MODEL" in
6262
PREPROCESSOR_FEATURE_SIZE="128"
6363
PREPROCESSOR_OUTPUT="voxtral_preprocessor.pte"
6464
;;
65-
openai/whisper-small)
65+
openai/whisper-*)
6666
MODEL_NAME="whisper"
6767
TASK="automatic-speech-recognition"
6868
MAX_SEQ_LEN=""
6969
EXTRA_PIP="librosa"
70-
PREPROCESSOR_FEATURE_SIZE="80"
7170
PREPROCESSOR_OUTPUT="whisper_preprocessor.pte"
71+
if [[ "$HF_MODEL" == *"large-v3"* ]]; then
72+
PREPROCESSOR_FEATURE_SIZE="128"
73+
else
74+
PREPROCESSOR_FEATURE_SIZE="80"
75+
fi
7276
;;
7377
google/gemma-3-4b-it)
7478
MODEL_NAME="gemma3"
@@ -80,7 +84,7 @@ case "$HF_MODEL" in
8084
;;
8185
*)
8286
echo "Error: Unsupported model '$HF_MODEL'"
83-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
87+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it"
8488
exit 1
8589
;;
8690
esac

.ci/scripts/test_model_cuda_e2e.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Arguments:
1717
hf_model HuggingFace model ID (required)
1818
Supported models:
1919
- mistralai/Voxtral-Mini-3B-2507
20-
- openai/whisper-small
20+
- openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
2121
- google/gemma-3-4b-it
2222
2323
quant_name Quantization type (required)
@@ -91,13 +91,13 @@ case "$HF_MODEL" in
9191
AUDIO_FILE="poem.wav"
9292
IMAGE_PATH=""
9393
;;
94-
openai/whisper-small)
95-
MODEL_NAME="whisper"
94+
openai/whisper-*)
95+
MODEL_NAME="${HF_MODEL#openai/}"
9696
RUNNER_TARGET="whisper_runner"
9797
RUNNER_PATH="whisper"
9898
EXPECTED_OUTPUT="Mr. Quilter is the apostle of the middle classes"
9999
PREPROCESSOR="whisper_preprocessor.pte"
100-
TOKENIZER_URL="https://huggingface.co/openai/whisper-small/resolve/main" # @lint-ignore
100+
TOKENIZER_URL="https://huggingface.co/${HF_MODEL}/resolve/main" # @lint-ignore
101101
TOKENIZER_FILE=""
102102
AUDIO_URL=""
103103
AUDIO_FILE="output.wav"
@@ -117,7 +117,7 @@ case "$HF_MODEL" in
117117
;;
118118
*)
119119
echo "Error: Unsupported model '$HF_MODEL'"
120-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
120+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it"
121121
exit 1
122122
;;
123123
esac
@@ -142,7 +142,7 @@ fi
142142
# Download test files
143143
if [ "$AUDIO_URL" != "" ]; then
144144
curl -L $AUDIO_URL -o ${MODEL_DIR}/$AUDIO_FILE
145-
elif [ "$MODEL_NAME" = "whisper" ]; then
145+
elif [[ "$MODEL_NAME" == *whisper* ]]; then
146146
conda install -y -c conda-forge "ffmpeg<8"
147147
pip install datasets soundfile torchcodec
148148
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
@@ -179,8 +179,8 @@ case "$MODEL_NAME" in
179179
voxtral)
180180
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
181181
;;
182-
whisper)
183-
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
182+
whisper-*)
183+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR --model_name ${MODEL_NAME}"
184184
;;
185185
gemma3)
186186
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --image_path $IMAGE_PATH"

.ci/scripts/test_qnn_static_llm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ elif [[ "${TASK_NAME}" == "stories_260k_bc" ]]; then
8181
fi
8282

8383
elif [[ "${TASK_NAME}" == "smollm2_135m" ]]; then
84-
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_smollm2 --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
84+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_llm_model --model_name smollm2_135m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
8585
exit_code1=$?
8686
if [ $exit_code1 -ne 0 ]; then
8787
exit 1

.github/workflows/cuda.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ jobs:
104104
name: "Voxtral-Mini-3B-2507"
105105
- repo: "openai"
106106
name: "whisper-small"
107+
- repo: "openai"
108+
name: "whisper-large-v3-turbo"
107109
- repo: "google"
108110
name: "gemma-3-4b-it"
109111
quant:
@@ -223,6 +225,8 @@ jobs:
223225
name: "Voxtral-Mini-3B-2507"
224226
- repo: "openai"
225227
name: "whisper-small"
228+
- repo: "openai"
229+
name: "whisper-large-v3-turbo"
226230
- repo: "google"
227231
name: "gemma-3-4b-it"
228232
quant:

.github/workflows/doc-build.yml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,15 @@ jobs:
4646
export CHANNEL=nightly
4747
fi
4848
49-
# Get the version of ExecuTorch from REF_NAME and save as ET_VERSION_DOCS
50-
# ET_VERSION_DOCS will be pulled during the doc build to add to the version dropdown
51-
# on the website. See docs/source/conf.py for details
52-
49+
# Set RELEASE environment variable for tagged releases
5350
GITHUB_REF=${{ github.ref }}
54-
echo "$GITHUB_REF"
55-
export ET_VERSION_DOCS="${GITHUB_REF}"
56-
echo "$ET_VERSION_DOCS"
51+
if [[ "${GITHUB_REF}" =~ ^refs/tags/v[0-9]+\.[0-9]+ ]]; then
52+
export RELEASE=true
53+
echo "Building release docs (RELEASE=true)"
54+
else
55+
export RELEASE=false
56+
echo "Building main docs (RELEASE=false)"
57+
fi
5758
5859
set -eux
5960

.lintrunner.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,3 +449,24 @@ command = [
449449
"--",
450450
"@{{PATHSFILE}}",
451451
]
452+
453+
[[linter]]
454+
code = 'ETVKNODEBUG'
455+
include_patterns = [
456+
"backends/vulkan/**/*.glsl",
457+
]
458+
command = [
459+
'python',
460+
'-m',
461+
'lintrunner_adapters',
462+
'run',
463+
'grep_linter',
464+
'--pattern=((DEBUG_MODE)|(GL_EXT_debug_printf))',
465+
'--linter-name=ETVKNODEBUG',
466+
'--error-name=Using DEBUG_MODE or GL_EXT_debug_printf in Vulkan shader',
467+
"""--error-description=\
468+
#define DEBUG_MODE or #extension GL_EXT_debug_printf should only be used during development!
469+
""",
470+
'--',
471+
'@{{PATHSFILE}}',
472+
]

.mypy.ini

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ ignore_missing_imports = True
7474
[mypy-pytorch_sphinx_theme]
7575
ignore_missing_imports = True
7676

77+
[mypy-pytorch_sphinx_theme2]
78+
ignore_missing_imports = True
79+
80+
[mypy-executorch.version]
81+
ignore_missing_imports = True
82+
7783
[mypy-ruamel]
7884
ignore_missing_imports = True
7985

0 commit comments

Comments
 (0)