Skip to content

Commit eb5cdcd

Browse files
authored
fix device bug (#873)
Signed-off-by: He, Xin3 <xin3.he@intel.com>
1 parent 7210791 commit eb5cdcd

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

auto_round/compressors/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1420,7 +1420,7 @@ def _quantize_layer_via_rtn(self, name: str) -> None:
14201420
m.zp = None
14211421
else:
14221422
try:
1423-
m.to(self.device)
1423+
m = m.to(self.device)
14241424
m = WrapperLinear(
14251425
m,
14261426
enable_minmax_tuning=False,
@@ -1858,6 +1858,7 @@ def _quantize_layers(self, layer_names: list, layer_inputs: dict) -> None:
18581858
from auto_round.data_type import QUANT_FUNC_WITH_DTYPE
18591859

18601860
layer = get_module(self.model, layer_name)
1861+
layer = layer.to(self.device)
18611862
if _is_fp8_model(self.model):
18621863
new_layer = convert_fp8_layer_to_linear(layer, self.amp_dtype).to(self.device)
18631864
set_module(self.model, layer_name, new_layer)

0 commit comments

Comments
 (0)