Skip to content

Commit 4da1fed

Browse files
authored
Merge branch 'main' into bump-experimental-lowbit-test
2 parents 1b74bcf + ffb4350 commit 4da1fed

File tree

133 files changed

+2006
-815
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+2006
-815
lines changed

.github/workflows/build-wheels_m1.yml

Lines changed: 0 additions & 74 deletions
This file was deleted.

.github/workflows/build_wheels_aarch64_linux.yml

Lines changed: 0 additions & 87 deletions
This file was deleted.

.github/workflows/build_wheels_linux.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ jobs:
3030
with-cuda: enable
3131
with-rocm: enable
3232
with-xpu: enable
33-
# please note: excluding 3.13t for aarch64 builds for now
34-
python-versions: '["3.9", "3.10", "3.11", "3.12", "3.13"]'
33+
# Note: if free-threaded python is required add py3.13t here
34+
python-versions: '["3.9"]'
3535

3636
build:
3737
needs: generate-matrix

.github/workflows/build_wheels_windows.yml

Lines changed: 0 additions & 97 deletions
This file was deleted.

.github/workflows/dashboard_perf_test.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,19 @@ jobs:
4242
4343
mkdir -p ${{ runner.temp }}/benchmark-results
4444
# llama3 - compile baseline
45-
${CONDA_RUN} python benchmarks/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
45+
${CONDA_RUN} python torchao/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
4646
4747
# llama3 - autoquant
48-
${CONDA_RUN} python benchmarks/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --quantization autoquant --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
48+
${CONDA_RUN} python torchao/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --quantization autoquant --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
4949
5050
# skipping SAM because of https://hud.pytorch.org/pr/pytorch/ao/1407
5151
# # SAM
5252
# ${CONDA_RUN} pip install git+https://github.com/pytorch-labs/segment-anything-fast.git@main
5353
# # SAM compile baselilne
54-
# ${CONDA_RUN} sh benchmarks/_models/sam/setup.sh
55-
# ${CONDA_RUN} python benchmarks/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
54+
# ${CONDA_RUN} sh torchao/_models/sam/setup.sh
55+
# ${CONDA_RUN} python torchao/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
5656
57-
# ${CONDA_RUN} python benchmarks/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --compression autoquant --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
57+
# ${CONDA_RUN} python torchao/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --compression autoquant --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
5858
5959
# SAM 2.1
6060
# ${CONDA_RUN} sh scripts/download_sam2_ckpts.sh ${CHECKPOINT_PATH}/sam2

.github/workflows/float8nocompile_test.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@ on:
77
- 'gh/**'
88
paths:
99
- 'torchao/prototype/float8nocompile/**'
10-
- '!torchao/prototype/float8nocompile/**'
1110
pull_request:
1211
branches:
1312
- main
1413
- 'gh/**'
1514
paths:
1615
- 'torchao/prototype/float8nocompile/**'
17-
- '!torchao/prototype/float8nocompile/**'
1816

1917
concurrency:
2018
group: floatnocompile_test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}

.github/workflows/torchao_experimental_test.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
- name: Install requirements
3434
run: |
3535
conda activate venv
36-
pip install --extra-index-url "https://download.pytorch.org/whl/nightly/cpu" torch=="2.7.0.dev20250131"
36+
pip install torch --index-url "https://download.pytorch.org/whl/nightly/cpu"
3737
pip install numpy
3838
pip install pytest
3939
USE_CPP=1 pip install .
@@ -53,8 +53,8 @@ jobs:
5353
run: |
5454
conda activate venv
5555
pushd torchao/experimental/ops/tests
56-
# sh build_and_run_tests.sh
57-
# rm -rf /tmp/cmake-out
56+
sh build_and_run_tests.sh
57+
rm -rf /tmp/cmake-out
5858
popd
5959
6060
test-mps-ops:

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ torchao just works with `torch.compile()` and `FSDP2` over most PyTorch models o
1919

2020
### Post Training Quantization
2121

22-
Quantizing and Sparsifying your models is a 1 liner that should work on any model with an `nn.Linear` including your favorite HuggingFace model. You can find a more comprehensive usage instructions [here](torchao/quantization/), sparsity [here](/benchmarks/_models/sam/README.md) and a HuggingFace inference example [here](scripts/hf_eval.py)
22+
Quantizing and Sparsifying your models is a 1 liner that should work on any model with an `nn.Linear` including your favorite HuggingFace model. You can find a more comprehensive usage instructions [here](torchao/quantization/), sparsity [here](/torchao/_models/sam/README.md) and a HuggingFace inference example [here](scripts/hf_eval.py)
2323

2424
For inference, we have the option of
2525
1. Quantize only the weights: works best for memory bound models
@@ -52,7 +52,7 @@ We also provide a developer facing API so you can implement your own quantizatio
5252

5353
We've added kv cache quantization and other features in order to enable long context length (and necessarily memory efficient) inference.
5454

55-
In practice these features alongside int4 weight only quantization allow us to **reduce peak memory by ~55%**, meaning we can Llama3.1-8B inference with a **130k context length with only 18.9 GB of peak memory.** More details can be found [here](benchmarks/_models/llama/README.md)
55+
In practice these features alongside int4 weight only quantization allow us to **reduce peak memory by ~55%**, meaning we can Llama3.1-8B inference with a **130k context length with only 18.9 GB of peak memory.** More details can be found [here](torchao/_models/llama/README.md)
5656

5757
## Training
5858

benchmarks/_models/llama/__init__.py

Whitespace-only changes.

benchmarks/_models/sam/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)