Description
Describe the bug
self.ipex_linear = ipex.llm.quantization.IPEXWeightOnlyQuantizedLinear.from_weight(self.qweight,
../../anaconda3/envs/autoround/lib/python3.10/site-packages/intel_extension_for_pytorch/llm/quantization/woq_linear.py:63: in from_weight
woq_linear_impl = woq_linear_impl_cls.from_weight(
../../anaconda3/envs/autoround/lib/python3.10/site-packages/intel_extension_for_pytorch/nn/modules/weight_only_quantization.py:452: in from_weight
return cls.from_int4_weight(
../../anaconda3/envs/autoround/lib/python3.10/site-packages/intel_extension_for_pytorch/nn/modules/weight_only_quantization.py:387: in from_int4_weight
qlinear._op_context = torch.ops.ipex_prepack.weight_only_qlinear_prepack_int4(
self = <OpOverloadPacket(op='ipex_prepack.weight_only_qlinear_prepack_int4')>
args = (tensor([[ 1785551018, -1972918597, -1182219673, ..., -1736939623,
2023065163, -1705592454],
[ 1484...-9.489e-05, -4.463e-04, -2.480e-04, -7.629e-04, 1.228e-03, -8.774e-04],
dtype=torch.bfloat16), None, None, ...)
kwargs = {}
def __call__(self, /, *args, **kwargs):
# overloading __call__ to ensure torch.ops.foo.bar()
# is still callable from JIT
# We save the function ptr as the `op` attribute on
# OpOverloadPacket to access it here.
# Directly calling OverloadPacket goes into C++, which will check
# the schema and cause an error for torchbind op when inputs consist of FakeScriptObject so we
# intercept it here and call TorchBindOpverload instead.
if self._has_torchbind_op_overload and _must_dispatch_in_python(args, kwargs):
return _call_overload_packet_from_python(self, args, kwargs)
return self._op(*args, **(kwargs or {}))
E IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
../../anaconda3/envs/autoround/lib/python3.10/site-packages/torch/_ops.py:1158: IndexError
group_size 32 and 128 are fine
Versions
2.7 cpu