Skip to content

Commit 1b15dbc

Browse files
authored
Merge branch 'main' into patch-40
2 parents 0cb73e5 + 07768cf commit 1b15dbc

File tree

9 files changed

+78
-7
lines changed

9 files changed

+78
-7
lines changed

.github/workflows/more-tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ on:
99

1010
jobs:
1111
test-cuda:
12+
permissions:
13+
id-token: write
14+
contents: read
1215
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1316
with:
1417
runner: linux.g5.4xlarge.nvidia.gpu

.github/workflows/periodic.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ jobs:
108108
set -eux
109109
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "periodic" --backend "gpu"
110110
test-gpu:
111+
permissions:
112+
id-token: write
113+
contents: read
111114
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
112115
name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }})
113116
needs: gather-models-gpu

.github/workflows/pull.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ jobs:
215215
set -eux
216216
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu"
217217
test-gpu-compile:
218+
permissions:
219+
id-token: write
220+
contents: read
218221
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
219222
name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
220223
needs: gather-models-gpu
@@ -250,6 +253,9 @@ jobs:
250253
echo "::endgroup::"
251254
252255
test-gpu-aoti-bfloat16:
256+
permissions:
257+
id-token: write
258+
contents: read
253259
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
254260
name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
255261
needs: gather-models-gpu
@@ -286,6 +292,9 @@ jobs:
286292
echo "::endgroup::"
287293
288294
test-gpu-aoti-float32:
295+
permissions:
296+
id-token: write
297+
contents: read
289298
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
290299
name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
291300
needs: gather-models-gpu
@@ -327,6 +336,9 @@ jobs:
327336
echo "::endgroup::"
328337
329338
test-gpu-aoti-float16:
339+
permissions:
340+
id-token: write
341+
contents: read
330342
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
331343
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
332344
needs: gather-models-gpu
@@ -369,6 +381,9 @@ jobs:
369381
echo "::endgroup::"
370382
371383
test-gpu-eval-sanity-check:
384+
permissions:
385+
id-token: write
386+
contents: read
372387
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
373388
name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
374389
needs: gather-models-gpu
@@ -1011,6 +1026,9 @@ jobs:
10111026
echo "Tests complete."
10121027
10131028
test-build-runner-et-android:
1029+
permissions:
1030+
id-token: write
1031+
contents: read
10141032
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
10151033
with:
10161034
runner: linux.4xlarge

.github/workflows/run-readme-periodic.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ on:
1010

1111
jobs:
1212
test-readme:
13+
permissions:
14+
id-token: write
15+
contents: read
1316
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1417
secrets: inherit
1518
with:
@@ -39,6 +42,9 @@ jobs:
3942
4043
4144
test-quantization-any:
45+
permissions:
46+
id-token: write
47+
contents: read
4248
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
4349
with:
4450
runner: linux.g5.4xlarge.nvidia.gpu
@@ -66,6 +72,9 @@ jobs:
6672
echo "::endgroup::"
6773
6874
test-gguf-any:
75+
permissions:
76+
id-token: write
77+
contents: read
6978
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
7079
secrets: inherit
7180
with:

.github/workflows/run-readme-pr-mps.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
1111
with:
1212
runner: macos-m1-14
13-
timeout-minutes: 50
13+
timeout: 50
1414
script: |
1515
conda create -y -n test-readme-mps-macos python=3.10.11 llvm-openmp
1616
conda activate test-readme-mps-macos
@@ -36,7 +36,7 @@ jobs:
3636
test-quantization-mps-macos:
3737
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
3838
with:
39-
runner: macos-m1-14
39+
runner: macos-m1-14
4040
script: |
4141
set -x
4242
conda create -y -n test-quantization-mps-macos python=3.10.11

.github/workflows/run-readme-pr.yml

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ on:
99

1010
jobs:
1111
test-readme-any:
12+
permissions:
13+
id-token: write
14+
contents: read
1215
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1316
with:
1417
runner: linux.g5.4xlarge.nvidia.gpu
@@ -28,6 +31,9 @@ jobs:
2831
echo "::endgroup::"
2932
3033
test-readme-cpu:
34+
permissions:
35+
id-token: write
36+
contents: read
3137
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3238
with:
3339
runner: linux.g5.4xlarge.nvidia.gpu
@@ -47,6 +53,9 @@ jobs:
4753
echo "::endgroup::"
4854
4955
test-quantization-any:
56+
permissions:
57+
id-token: write
58+
contents: read
5059
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
5160
with:
5261
runner: linux.g5.4xlarge.nvidia.gpu
@@ -66,6 +75,9 @@ jobs:
6675
echo "::endgroup::"
6776
6877
test-quantization-cpu:
78+
permissions:
79+
id-token: write
80+
contents: read
6981
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
7082
with:
7183
runner: linux.g5.4xlarge.nvidia.gpu
@@ -80,6 +92,9 @@ jobs:
8092
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
8193
8294
test-gguf-any:
95+
permissions:
96+
id-token: write
97+
contents: read
8398
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
8499
with:
85100
runner: linux.g5.4xlarge.nvidia.gpu
@@ -99,6 +114,9 @@ jobs:
99114
echo "::endgroup::"
100115
101116
test-gguf-cpu:
117+
permissions:
118+
id-token: write
119+
contents: read
102120
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
103121
with:
104122
runner: linux.g5.4xlarge.nvidia.gpu
@@ -119,6 +137,9 @@ jobs:
119137
120138
121139
test-advanced-any:
140+
permissions:
141+
id-token: write
142+
contents: read
122143
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
123144
with:
124145
runner: linux.g5.4xlarge.nvidia.gpu
@@ -139,6 +160,9 @@ jobs:
139160
140161
141162
test-advanced-cpu:
163+
permissions:
164+
id-token: write
165+
contents: read
142166
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
143167
with:
144168
runner: linux.g5.4xlarge.nvidia.gpu
@@ -158,6 +182,9 @@ jobs:
158182
echo "::endgroup::"
159183
160184
test-evaluation-any:
185+
permissions:
186+
id-token: write
187+
contents: read
161188
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
162189
with:
163190
runner: linux.g5.4xlarge.nvidia.gpu
@@ -177,6 +204,9 @@ jobs:
177204
echo "::endgroup::"
178205
179206
test-evaluation-cpu:
207+
permissions:
208+
id-token: write
209+
contents: read
180210
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
181211
with:
182212
runner: linux.g5.4xlarge.nvidia.gpu
@@ -196,6 +226,9 @@ jobs:
196226
echo "::endgroup::"
197227
198228
test-multimodal-any:
229+
permissions:
230+
id-token: write
231+
contents: read
199232
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
200233
with:
201234
runner: linux.g5.4xlarge.nvidia.gpu
@@ -215,6 +248,9 @@ jobs:
215248
echo "::endgroup::"
216249
217250
test-multimodal-cpu:
251+
permissions:
252+
id-token: write
253+
contents: read
218254
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
219255
with:
220256
runner: linux.g5.4xlarge.nvidia.gpu
@@ -269,4 +305,4 @@ jobs:
269305
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
270306
echo "::endgroup::"
271307
272-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native
308+
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native

.github/workflows/runner-cuda-dtype.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ on:
99

1010
jobs:
1111
test-runner-aot-cuda:
12+
permissions:
13+
id-token: write
14+
contents: read
1215
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1316
with:
1417
runner: linux.g5.4xlarge.nvidia.gpu

install/.pins/et-pin.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
98e4dd524f2cb08414ee015b27616229cabc06ba
1+
9c043290ad3944268290e015c3063bc411e6ef6b

torchchat/export.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ def export_for_server(
125125
)
126126
from executorch.exir.tracer import Value
127127

128-
from torch._export import capture_pre_autograd_graph
129128
from torch.export import export, export_for_training, ExportedProgram
130129

131130
from torchchat.model import apply_rotary_emb, Attention
@@ -223,7 +222,7 @@ def forward(self, x, freqs_cis, mask, input_pos=None, cache_lane: int = 0):
223222
return self.wo(output)
224223

225224
def replace_attention_with_custom_sdpa_attention(module: nn.Module):
226-
from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # noqa
225+
from executorch.extension.llm.custom_ops import custom_ops # noqa
227226

228227
for name, child in module.named_children():
229228
if isinstance(child, Attention):
@@ -316,7 +315,7 @@ def export_for_et(model, device, output_path) -> str:
316315
with torch.nn.attention.sdpa_kernel(
317316
[torch.nn.attention.SDPBackend.MATH]
318317
), torch.no_grad():
319-
m = capture_pre_autograd_graph(model, input, dynamic_shapes=dynamic_shapes)
318+
m = export_for_training(model, input, dynamic_shapes=dynamic_shapes).module()
320319

321320
edge_manager = export_to_edge(
322321
m,

0 commit comments

Comments
 (0)