change the method of detecting linear (#849)

n1ck-guo · web-flow · commit 3437e3a22e40 · 2025-09-26T10:45:48.000+08:00
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
@@ -766,7 +766,7 @@ def _check_compatibility(self) -> None:
             and any(key in fmt for fmt in self.formats for key in ("auto_round", "auto_gptq", "auto_awq"))
         ):
             for n, m in self.model.named_modules():
-                if isinstance(m, self.supported_types):
+                if type(m) in self.supported_types:
                     if m.weight.shape[0] % 32 != 0 or m.weight.shape[1] % 32 != 0:
                         self.layer_config[n] = {"bits": 16}
                         logger.info(
@@ -1991,7 +1991,7 @@ def _set_layerwise_config(self, layer_config: dict) -> bool:
         is_gguf = hasattr(self, "formats") and any("gguf" in format_ for format_ in self.formats)
         for n, m in self.model.named_modules():
             # Skip unsupported types
-            if not isinstance(m, supported_types) and m.__class__.__name__ not in self.inner_supported_types:
+            if type(m) not in supported_types and m.__class__.__name__ not in self.inner_supported_types:
                 if n in self.layer_config:
                     if not isinstance(m, torch.nn.Embedding):
                         logger.warning(f"{n} is not supported, layer_config {n}: {layer_config[n]} will be ignored.")
@@ -2495,7 +2495,7 @@ def _replace_forward(self):
         from functools import partial
 
         for n, m in self.model.named_modules():
-            if n in self.to_cached_layers and not isinstance(m, tuple(self.supported_types)):  ##block
+            if n in self.to_cached_layers and type(m) not in self.supported_types:  ##block
                 m.orig_forward = m.forward
                 m.forward = partial(self._get_block_forward_func(n), m)
             elif n in self.to_cached_layers:  ##linear layer or conv1d layer
@@ -3219,7 +3219,7 @@ def _get_quantized_layer_names_outside_blocks(self) -> list:
             if layer is None:
                 logger.error(f"could not find layer {key} in the model, exit...")
                 exit(-1)
-            if isinstance(layer, tuple(self.supported_types)) and check_to_quantized(self.layer_config[key]):
+            if type(layer) in self.supported_types and check_to_quantized(self.layer_config[key]):
                 layer_names.append(key)
 
         return layer_names
diff --git a/auto_round/export/export_to_autogptq/export.py b/auto_round/export/export_to_autogptq/export.py
@@ -73,7 +73,7 @@ def pack_layer(name, model, backend, device=None):
         return
     layer = get_module(model, name)
 
-    if not isinstance(layer, SUPPORTED_LAYER_TYPES):  # already packed
+    if type(layer) not in SUPPORTED_LAYER_TYPES:  # already packed
         return
 
     orig_device = layer.weight.device  # must place after 74
@@ -86,13 +86,13 @@ def pack_layer(name, model, backend, device=None):
 
     QuantLinear = get_autogptq_packing_qlinear(backend, bits, group_size, sym)
 
-    if isinstance(layer, nn.Linear):
+    if type(layer) == nn.Linear:
         in_features = layer.in_features
         out_features = layer.out_features
-    elif isinstance(layer, nn.Conv2d):
+    elif type(layer) == nn.Conv2d:
         in_features = layer.in_channels
         out_features = layer.out_channels
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):
+    elif type(layer) == transformers.pytorch_utils.Conv1D:
         in_features = layer.weight.shape[0]
         out_features = layer.weight.shape[1]
 
diff --git a/auto_round/export/export_to_autogptq/qlinear_triton.py b/auto_round/export/export_to_autogptq/qlinear_triton.py
@@ -85,9 +85,9 @@ def pack(self, linear, scales, zeros, g_idx=None, device=None):
         self.scales = scales_t.clone().half()
 
         W = linear.weight.data.to(device).clone()
-        if isinstance(linear, nn.Conv2d):
+        if type(linear) == nn.Conv2d:
             W = W.flatten(1)
-        if isinstance(linear, transformers.pytorch_utils.Conv1D):
+        if type(linear) == transformers.pytorch_utils.Conv1D:
             W = W.t()
 
         repeat_scales = scales.to(device).repeat_interleave(self.group_size, 1)
diff --git a/auto_round/export/export_to_autoround/export.py b/auto_round/export/export_to_autoround/export.py
@@ -118,13 +118,13 @@ def pack_qact_layer(name, model):
 
     QuantLinear = auto_round.export.export_to_autoround.qlinear_triton_act.QuantLinear
 
-    if isinstance(layer, nn.Linear):
+    if type(layer) == nn.Linear:
         in_features = layer.in_features
         out_features = layer.out_features
-    elif isinstance(layer, nn.Conv2d):
+    elif type(layer) == nn.Conv2d:
         in_features = layer.in_channels
         out_features = layer.out_channels
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):
+    elif type(layer) == transformers.pytorch_utils.Conv1D:
         in_features = layer.weight.shape[0]
         out_features = layer.weight.shape[1]
     bias = layer.bias is not None
@@ -181,7 +181,7 @@ def pack_layer(layer_name, model, backend, device=None):
     if hasattr(layer, "orig_layer"):
         layer = layer.orig_layer
 
-    if not isinstance(layer, SUPPORTED_LAYER_TYPES):  ##already packed
+    if type(layer) not in SUPPORTED_LAYER_TYPES:  ##already packed
         return
 
     if int(layer.act_bits) <= 8:
@@ -200,13 +200,13 @@ def pack_layer(layer_name, model, backend, device=None):
     zp = layer.zp
     QuantLinear = dynamic_import_quant_linear_for_packing(backend, bits, group_size, sym, act_bits)
 
-    if isinstance(layer, nn.Linear):
+    if type(layer) == nn.Linear:
         in_features = layer.in_features
         out_features = layer.out_features
-    elif isinstance(layer, nn.Conv2d):
+    elif type(layer) == nn.Conv2d:
         in_features = layer.in_channels
         out_features = layer.out_channels
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):
+    elif type(layer) == transformers.pytorch_utils.Conv1D:
         in_features = layer.weight.shape[0]
         out_features = layer.weight.shape[1]
     bias = layer.bias is not None
diff --git a/auto_round/export/export_to_autoround/export_to_fp8.py b/auto_round/export/export_to_autoround/export_to_fp8.py
@@ -92,7 +92,7 @@ def pack_layer(layer_name, model, data_type, device=None):
     if hasattr(layer, "orig_layer"):
         layer = layer.orig_layer
 
-    if not isinstance(layer, SUPPORTED_LAYER_TYPES):  ##already packed
+    if type(layer) not in SUPPORTED_LAYER_TYPES:  ##already packed
         return
 
     if not check_to_quantized(layer):
@@ -119,13 +119,13 @@ def pack_layer(layer_name, model, data_type, device=None):
     q_weight = revert_tensor_by_pad(q_weight, orig_shape=orig_shape, pad_len=pad_len)
     q_weight = torch.clamp(q_weight, info.min, info.max)
     q_weight = q_weight.to(torch_dtype)
-    if isinstance(layer, torch.nn.Linear):
+    if type(layer) == torch.nn.Linear:
         in_features = layer.in_features
         out_features = layer.out_features
     # elif isinstance(layer, nn.Conv2d):
     #     in_features = layer.in_channels
     #     out_features = layer.out_channels
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):
+    elif type(layer) == transformers.pytorch_utils.Conv1D:
         in_features = layer.weight.shape[0]
         out_features = layer.weight.shape[1]
     bias = layer.bias
diff --git a/auto_round/export/export_to_autoround/export_to_nvfp_mxfp.py b/auto_round/export/export_to_autoround/export_to_nvfp_mxfp.py
@@ -54,7 +54,7 @@ def pack_layer(name, model, backend, device=None):
     if name == "lm_head":  # TODO: Check vLLM inference status to determine whether to enable this feature
         return
     layer = get_module(model, name)
-    if not isinstance(layer, SUPPORTED_LAYER_TYPES) and not isinstance(layer, WrapperWALayer):  ##already packed
+    if type(layer) not in SUPPORTED_LAYER_TYPES and not isinstance(layer, WrapperWALayer):  ##already packed
         return
 
     if isinstance(layer, WrapperWALayer):  # revert WrapperWALayer for offline usage
@@ -83,13 +83,13 @@ def pack_layer(name, model, backend, device=None):
 
     # QuantLinear = get_fp_qlinear(backend, bits, group_size, sym)
 
-    if isinstance(layer, nn.Linear):
+    if type(layer) == nn.Linear:
         in_features = layer.in_features
         out_features = layer.out_features
-    elif isinstance(layer, nn.Conv2d):
+    elif type(layer) == nn.Conv2d:
         in_features = layer.in_channels
         out_features = layer.out_channels
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):
+    elif type(layer) == transformers.pytorch_utils.Conv1D:
         in_features = layer.weight.shape[0]
         out_features = layer.weight.shape[1]
 
@@ -172,7 +172,7 @@ def save_quantized_as_fp(output_dir, inplace=True, **kwargs):
     if is_nv_fp(act_data_type) and "static_gs" in str(act_data_type).lower():
         # generate static input_global_scale
         for n, m in model.named_modules():
-            if isinstance(m, SUPPORTED_LAYER_TYPES):
+            if type(m) in SUPPORTED_LAYER_TYPES:
                 layer = m
                 if layer.act_bits < 8 and not getattr(layer, "input_global_scale", None):
                     assert hasattr(layer, "act_max")
diff --git a/auto_round/export/export_to_autoround/qlinear_fp.py b/auto_round/export/export_to_autoround/qlinear_fp.py
@@ -136,9 +136,9 @@ def pack(self, linear, scales, zeros=None, g_idx=None, global_scale=None, input_
             self.bias = linear.bias.detach().to(torch.float16)
 
         W = linear.weight.data.detach().to(device)
-        if isinstance(linear, nn.Conv2d):
+        if type(linear) == nn.Conv2d:
             W = W.flatten(1)
-        if isinstance(linear, transformers.pytorch_utils.Conv1D):
+        if type(linear) == transformers.pytorch_utils.Conv1D:
             W = W.t()
 
         tensor, orig_shape, pad_len = reshape_pad_tensor_by_group_size(W, self.group_size)
diff --git a/auto_round/export/export_to_autoround/qlinear_triton_act.py b/auto_round/export/export_to_autoround/qlinear_triton_act.py
@@ -129,9 +129,9 @@ def pack(self, linear, scales, zeros, act_scales, w_bf16_to_fp8_scale, g_idx=Non
         self.scales = scales_t.clone().half()
 
         W = linear.weight.data.to(device).clone()
-        if isinstance(linear, nn.Conv2d):
+        if type(linear) == nn.Conv2d:
             W = W.flatten(1)
-        if isinstance(linear, transformers.pytorch_utils.Conv1D):
+        if type(linear) == transformers.pytorch_utils.Conv1D:
             W = W.t()
 
         repeat_scales = scales.to(device).repeat_interleave(self.group_size, 1)
diff --git a/auto_round/export/export_to_awq/export.py b/auto_round/export/export_to_awq/export.py
@@ -50,7 +50,7 @@ def pack_layer(name, model, backend, device=None):
         return
     layer = get_module(model, name)
 
-    if not isinstance(layer, SUPPORTED_LAYER_TYPES):  ##already packed
+    if type(layer) not in SUPPORTED_LAYER_TYPES:  ##already packed
         return
 
     bits = layer.bits
diff --git a/auto_round/export/export_to_itrex/export.py b/auto_round/export/export_to_itrex/export.py
@@ -227,13 +227,13 @@ def pack_model(
             else:
                 scale = scale.to(dtype=convert_dtype)
                 zp = zp.to(dtype=torch.int32) if isinstance(zp, torch.Tensor) else zp
-        if isinstance(m, transformers.pytorch_utils.Conv1D):
+        if type(m) == transformers.pytorch_utils.Conv1D:
             fp_weight = fp_weight.t_().contiguous()
         int_weight = quant_weight_w_scale(fp_weight, scale, zp, group_size, fp_weight.device)
-        if isinstance(m, torch.nn.Linear):
+        if type(m) == torch.nn.Linear:
             in_features = m.in_features
             out_features = m.out_features
-        elif isinstance(m, transformers.pytorch_utils.Conv1D):
+        elif type(m) == transformers.pytorch_utils.Conv1D:
             in_features = m.weight.shape[0]
             out_features = m.weight.shape[1]
         int_weight = int_weight.type(torch.int32)
diff --git a/auto_round/export/export_to_llmcompressor/export_to_fp.py b/auto_round/export/export_to_llmcompressor/export_to_fp.py
@@ -54,7 +54,7 @@ def pack_layer(name, model, backend, device=None):
     if name == "lm_head":  # TODO: Check vLLM inference status to determine whether to enable this feature
         return
     layer = get_module(model, name)
-    if not isinstance(layer, SUPPORTED_LAYER_TYPES) and not isinstance(layer, WrapperWALayer):  ##already packed
+    if type(layer) not in SUPPORTED_LAYER_TYPES and not isinstance(layer, WrapperWALayer):  ##already packed
         return
 
     if isinstance(layer, WrapperWALayer):  # revert WrapperWALayer for offline usage
@@ -83,13 +83,13 @@ def pack_layer(name, model, backend, device=None):
 
     # QuantLinear = get_fp_qlinear(backend, bits, group_size, sym)
 
-    if isinstance(layer, nn.Linear):
+    if type(layer) == nn.Linear:
         in_features = layer.in_features
         out_features = layer.out_features
-    elif isinstance(layer, nn.Conv2d):
+    elif type(layer) == nn.Conv2d:
         in_features = layer.in_channels
         out_features = layer.out_channels
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):
+    elif type(layer) == transformers.pytorch_utils.Conv1D:
         in_features = layer.weight.shape[0]
         out_features = layer.weight.shape[1]
 
@@ -167,7 +167,7 @@ def save_quantized_as_fp(output_dir, inplace=True, **kwargs):
     if is_nv_fp(act_data_type) and "static_gs" in str(act_data_type).lower():
         # generate static input_global_scale
         for n, m in model.named_modules():
-            if isinstance(m, SUPPORTED_LAYER_TYPES):
+            if type(m) in SUPPORTED_LAYER_TYPES:
                 layer = m
                 if layer.act_bits < 8 and not getattr(layer, "input_global_scale", None):
                     assert hasattr(layer, "act_max")
diff --git a/auto_round/export/export_to_llmcompressor/export_to_static_fp.py b/auto_round/export/export_to_llmcompressor/export_to_static_fp.py
@@ -62,7 +62,7 @@ def pack_layer(layer_name: str, model: torch.nn.Module, data_type: str, device:
     if hasattr(layer, "orig_layer"):
         layer = layer.orig_layer
 
-    if not isinstance(layer, SUPPORTED_LAYER_TYPES):  ##already packed
+    if type(layer) not in SUPPORTED_LAYER_TYPES:  ##already packed
         return
 
     if not check_to_quantized(layer):
@@ -89,10 +89,10 @@ def pack_layer(layer_name: str, model: torch.nn.Module, data_type: str, device:
     q_weight = revert_tensor_by_pad(q_weight, orig_shape=orig_shape, pad_len=pad_len)
     q_weight = torch.clamp(q_weight, info.min, info.max)
     q_weight = q_weight.to(torch_dtype)
-    if isinstance(layer, torch.nn.Linear):
+    if type(layer) == torch.nn.Linear:
         in_features = layer.in_features
         out_features = layer.out_features
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):
+    elif type(layer) == transformers.pytorch_utils.Conv1D:
         in_features = layer.weight.shape[0]
         out_features = layer.weight.shape[1]
     bias = layer.bias
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
@@ -241,7 +241,7 @@ def get_layer_config(model, quantization_config):
     # Get layer names that will be quantized
     layer_names = []
     for n, m in model.named_modules():
-        if not isinstance(m, SUPPORTED_LAYER_TYPES):
+        if type(m) not in SUPPORTED_LAYER_TYPES:
             continue
         if check_start_with_block_name(n, quant_block_list):
             layer_names.append(n)
@@ -350,9 +350,9 @@ def _replace_by_quant_layers(
 
 def _get_layer_features(layer):
     """Extracts input and output feature dimensions for supported layers."""
-    if isinstance(layer, nn.Linear):
+    if type(layer) == nn.Linear:
         return layer.in_features, layer.out_features
-    elif isinstance(layer, Conv1D):  # TODO: Verify correctness
+    elif type(layer) == Conv1D:  # TODO: Verify correctness
         return layer.weight.shape[0], layer.weight.shape[1]
     return None, None  # Unsupported layer type
 
diff --git a/auto_round/utils.py b/auto_round/utils.py
@@ -776,7 +776,7 @@ def get_layer_names_in_block(
     if class_names is None:
         class_names = []
     for n, m in model.named_modules():
-        if isinstance(m, supported_types) or (class_names is not None and m.__class__.__name__ in class_names):
+        if type(m) in supported_types or (class_names is not None and m.__class__.__name__ in class_names):
             m.tmp_name = n
     layers_in_block = []
     if bool(quant_block_list):
@@ -1066,7 +1066,7 @@ def get_fp_layer_names(model, fp_layers):
     fp_layers = fp_layers.replace(" ", "").split(",")
     all_layer_names = []
     for n, m in model.named_modules():
-        if isinstance(m, (torch.nn.Linear, transformers.pytorch_utils.Conv1D)):
+        if type(m) in SUPPORTED_LAYER_TYPES:
             all_layer_names.append(n)
     not_to_quantized_layers = []
 
@@ -1104,7 +1104,7 @@ def check_awq_gemm_compatibility(model, bits, group_size, sym, layer_configs=Non
     if bits != 4:
         return False, "AutoAWQ GEMM kernel only supports 4 bits"
     for n, m in model.named_modules():
-        if isinstance(m, transformers.pytorch_utils.Conv1D):
+        if type(m) == transformers.pytorch_utils.Conv1D:
             return False, "AutoAWQ GEMM kernel does not support conv1d"
 
     layer_names = get_layer_names_in_block(model)
@@ -1180,13 +1180,13 @@ def is_debug_mode():
 
 def get_layer_features(layer):
     """Extracts input and output feature dimensions for supported layers."""
-    if isinstance(layer, torch.nn.Linear):
+    if type(layer) == torch.nn.Linear:
         return layer.in_features, layer.out_features
-    elif isinstance(layer, transformers.pytorch_utils.Conv1D):  # TODO: Verify correctness
+    elif type(layer) == transformers.pytorch_utils.Conv1D:  # TODO: Verify correctness
         return layer.weight.shape[0], layer.weight.shape[1]
     elif isinstance(layer, torch.nn.Embedding):
         return layer.num_embeddings, layer.embedding_dim
-    elif deepspeed_exists and isinstance(layer, (LinearLayer, LinearAllreduce)):
+    elif deepspeed_exists and type(layer) in (LinearLayer, LinearAllreduce):
         return layer.weight.shape[1], layer.weight.shape[0]  # (input_dim, output_dim)
     return None, None  # Unsupported layer type
 
@@ -1367,7 +1367,7 @@ def _is_fp8_model(model: torch.nn.Module) -> bool:
 def _is_fp8_linear(module: torch.nn.Module) -> bool:
     if hasattr(module, "is_fp8_linear"):
         return module.is_fp8_linear
-    if not (isinstance(module, torch.nn.Linear) or module.__class__.__name__ == "FP8Linear"):
+    if not (type(module) == torch.nn.Linear or module.__class__.__name__ == "FP8Linear"):
         return False
     if module.weight is None:
         return False
@@ -1912,7 +1912,7 @@ def _set_config(config, target_config):
             continue
         new_type = GGUF_CONFIG[target_gguf_format]["mostly"]
         layer = get_module(model, layer_name)
-        if isinstance(layer, transformers.pytorch_utils.Conv1D):
+        if type(layer) == transformers.pytorch_utils.Conv1D:
             input_features = layer.weight.shape[0]
         else:
             input_features = layer.weight.shape[-1]
diff --git a/auto_round/wrapper.py b/auto_round/wrapper.py
diff --git a/auto_round_extension/ipex/qlinear_ipex_gptq.py b/auto_round_extension/ipex/qlinear_ipex_gptq.py
diff --git a/auto_round_extension/torch/qlinear_torch.py b/auto_round_extension/torch/qlinear_torch.py
diff --git a/auto_round_extension/torch/qlinear_torch_zp.py b/auto_round_extension/torch/qlinear_torch_zp.py
diff --git a/auto_round_extension/triton/qlinear_tritonv2.py b/auto_round_extension/triton/qlinear_tritonv2.py
diff --git a/docs/step_by_step.md b/docs/step_by_step.md