Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/torchao_experimental_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
pip install numpy
pip install pytest
pip install parameterized
USE_CPP=1 pip install .
USE_CPP=1 TOCHAO_BUILD_KLEIDIAI=1 pip install .
- name: Run python tests
run: |
conda activate venv
Expand Down
6 changes: 6 additions & 0 deletions torchao/dtypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@
MarlinQQQLayout,
MarlinQQQTensor,
MarlinSparseLayout,
PackedLinearInt8DynamicActivationIntxWeightLayout,
QDQLayout,
SemiSparseLayout,
TensorCoreTiledLayout,
UintxLayout,
to_affine_quantized_packed_linear_int8_dynamic_activation_intx_weight,
to_marlinqqq_quantized_intx,
)
from .utils import (
Expand Down Expand Up @@ -54,4 +57,7 @@
"Int4CPULayout",
"CutlassInt4PackedLayout",
"CutlassSemiSparseLayout",
"QDQLayout",
"PackedLinearInt8DynamicActivationIntxWeightLayout",
"to_affine_quantized_packed_linear_int8_dynamic_activation_intx_weight",
]
20 changes: 20 additions & 0 deletions torchao/dtypes/affine_quantized_tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,25 @@
_linear_fp_act_int4_weight_sparse_marlin_check,
_linear_fp_act_int4_weight_sparse_marlin_impl,
)
from torchao.dtypes.uintx.packed_linear_int8_dynamic_activation_intx_weight_layout import (
_linear_check as _linear_int8_act_intx_weight_packed_check,
)
from torchao.dtypes.uintx.packed_linear_int8_dynamic_activation_intx_weight_layout import (
_linear_impl as _linear_int8_act_intx_weight_packed_impl,
)
from torchao.dtypes.uintx.plain_layout import (
PlainAQTTensorImpl,
_linear_fp_act_int8_weight_check,
_linear_fp_act_int8_weight_impl,
_linear_int8_act_int8_weight_check,
_linear_int8_act_int8_weight_impl,
)
from torchao.dtypes.uintx.q_dq_layout import (
_linear_check as _linear_q_dq_check,
)
from torchao.dtypes.uintx.q_dq_layout import (
_linear_impl as _linear_q_dq_impl,
)
from torchao.dtypes.uintx.semi_sparse_layout import (
_linear_int8_act_int8_weight_semi_structured_sparse_check,
_linear_int8_act_int8_weight_semi_structured_sparse_impl,
Expand Down Expand Up @@ -199,6 +211,14 @@ def _register_aqt_quantized_linear_dispatches():
_linear_fp_act_uint4_weight_cpu_check,
_linear_fp_act_uint4_weight_cpu_impl,
),
(
_linear_int8_act_intx_weight_packed_check,
_linear_int8_act_intx_weight_packed_impl,
),
(
_linear_q_dq_check,
_linear_q_dq_impl,
),
]:
register_aqt_quantized_linear_dispatch(dispatch_condition, impl)

Expand Down
10 changes: 10 additions & 0 deletions torchao/dtypes/uintx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@
from .marlin_sparse_layout import (
MarlinSparseLayout,
)
from .packed_linear_int8_dynamic_activation_intx_weight_layout import (
PackedLinearInt8DynamicActivationIntxWeightLayout,
to_affine_quantized_packed_linear_int8_dynamic_activation_intx_weight,
)
from .q_dq_layout import (
QDQLayout,
)
from .semi_sparse_layout import (
SemiSparseLayout,
)
Expand All @@ -36,4 +43,7 @@
"MarlinQQQTensor",
"to_marlinqqq_quantized_intx",
"CutlassInt4PackedLayout",
"to_affine_quantized_packed_linear_int8_dynamic_activation_intx_weight",
"PackedLinearInt8DynamicActivationIntxWeightLayout",
"QDQLayout",
]
Loading
Loading