pytorch · xiaowangintel · May 13, 2025 · May 15, 2025 · May 15, 2025 · May 20, 2025
diff --git a/test/quantization/test_quant_primitives.py b/test/quantization/test_quant_primitives.py
@@ -139,6 +139,8 @@ def _groupwise_affine_quantize_tensor_from_qparams(
     if TORCH_VERSION_AT_LEAST_2_5:
         if (not (check_cpu_version(w.device))) and (not (check_xpu_version(w.device))):
             w_int4x8 = (w_int4x8[::, ::2] << 4 | w_int4x8[::, 1::2]).to(torch.uint8)
+        if (check_xpu_version(w.device)):
+            w_int4x8 = (w_int4x8[::, 1::2] << 4 | w_int4x8[::, ::2]).to(torch.uint8)
 
     return w_int4x8
 
@@ -752,6 +754,8 @@ def test_groupwise_affine_dequantize_tensor_from_qparams(self):
                     not (check_xpu_version(input.device))
                 ):
                     input_tmp = (input[::, ::2] << 4 | input[::, 1::2]).to(torch.uint8)
+                if (check_xpu_version(input.device)):
+                    input_tmp = (input[::, 1::2] << 4 | input[::, ::2]).to(torch.uint8)
                 w_bf16 = groupwise_affine_dequantize_tensor_from_qparams(
                     input_tmp, scales, zeros, n_bit, groupsize, zero_point_domain
                 )

diff --git a/torchao/dtypes/uintx/int4_xpu_layout.py b/torchao/dtypes/uintx/int4_xpu_layout.py
@@ -246,14 +246,15 @@ def from_plain(
     ):
         assert isinstance(_layout, Int4XPULayout)
 
-        from torchao.quantization.utils import convert_weight_to_int4pack_xpu
-
         if TORCH_VERSION_AT_LEAST_2_8:
             assert int_data.dtype == torch.int32, (
                 "torch.ops.aten._convert_weight_to_int4pack_for_cpu expects `int32` dtype"
             )
-            packed_weight = convert_weight_to_int4pack_xpu(
-                int_data, zero_point.dtype != scale.dtype
+            packed_weight = (int_data[::, 1::2] << 4 | int_data[::, ::2]).to(
+                torch.uint8
+            )
+            packed_weight = torch.ops.aten._convert_weight_to_int4pack(
+                packed_weight.contiguous(), 8
             )
         else:
             assert False, "INT4 not supported on XPU until 2.8"