Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions auto_round/compressors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ def _check_compatibility(self) -> None:
and any(key in fmt for fmt in self.formats for key in ("auto_round", "auto_gptq", "auto_awq"))
):
for n, m in self.model.named_modules():
if isinstance(m, self.supported_types):
if type(m) in self.supported_types:
if m.weight.shape[0] % 32 != 0 or m.weight.shape[1] % 32 != 0:
self.layer_config[n] = {"bits": 16}
logger.info(
Expand Down Expand Up @@ -1991,7 +1991,7 @@ def _set_layerwise_config(self, layer_config: dict) -> bool:
is_gguf = hasattr(self, "formats") and any("gguf" in format_ for format_ in self.formats)
for n, m in self.model.named_modules():
# Skip unsupported types
if not isinstance(m, supported_types) and m.__class__.__name__ not in self.inner_supported_types:
if type(m) not in supported_types and m.__class__.__name__ not in self.inner_supported_types:
if n in self.layer_config:
if not isinstance(m, torch.nn.Embedding):
logger.warning(f"{n} is not supported, layer_config {n}: {layer_config[n]} will be ignored.")
Expand Down Expand Up @@ -2495,7 +2495,7 @@ def _replace_forward(self):
from functools import partial

for n, m in self.model.named_modules():
if n in self.to_cached_layers and not isinstance(m, tuple(self.supported_types)): ##block
if n in self.to_cached_layers and type(m) not in self.supported_types: ##block
m.orig_forward = m.forward
m.forward = partial(self._get_block_forward_func(n), m)
elif n in self.to_cached_layers: ##linear layer or conv1d layer
Expand Down Expand Up @@ -3219,7 +3219,7 @@ def _get_quantized_layer_names_outside_blocks(self) -> list:
if layer is None:
logger.error(f"could not find layer {key} in the model, exit...")
exit(-1)
if isinstance(layer, tuple(self.supported_types)) and check_to_quantized(self.layer_config[key]):
if type(layer) in self.supported_types and check_to_quantized(self.layer_config[key]):
layer_names.append(key)

return layer_names
Expand Down
8 changes: 4 additions & 4 deletions auto_round/export/export_to_autogptq/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def pack_layer(name, model, backend, device=None):
return
layer = get_module(model, name)

if not isinstance(layer, SUPPORTED_LAYER_TYPES): # already packed
if type(layer) not in SUPPORTED_LAYER_TYPES: # already packed
return

orig_device = layer.weight.device # must place after 74
Expand All @@ -86,13 +86,13 @@ def pack_layer(name, model, backend, device=None):

QuantLinear = get_autogptq_packing_qlinear(backend, bits, group_size, sym)

if isinstance(layer, nn.Linear):
if type(layer) == nn.Linear:
in_features = layer.in_features
out_features = layer.out_features
elif isinstance(layer, nn.Conv2d):
elif type(layer) == nn.Conv2d:
in_features = layer.in_channels
out_features = layer.out_channels
elif isinstance(layer, transformers.pytorch_utils.Conv1D):
elif type(layer) == transformers.pytorch_utils.Conv1D:
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]

Expand Down
4 changes: 2 additions & 2 deletions auto_round/export/export_to_autogptq/qlinear_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ def pack(self, linear, scales, zeros, g_idx=None, device=None):
self.scales = scales_t.clone().half()

W = linear.weight.data.to(device).clone()
if isinstance(linear, nn.Conv2d):
if type(linear) == nn.Conv2d:
W = W.flatten(1)
if isinstance(linear, transformers.pytorch_utils.Conv1D):
if type(linear) == transformers.pytorch_utils.Conv1D:
W = W.t()

repeat_scales = scales.to(device).repeat_interleave(self.group_size, 1)
Expand Down
14 changes: 7 additions & 7 deletions auto_round/export/export_to_autoround/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,13 @@ def pack_qact_layer(name, model):

QuantLinear = auto_round.export.export_to_autoround.qlinear_triton_act.QuantLinear

if isinstance(layer, nn.Linear):
if type(layer) == nn.Linear:
in_features = layer.in_features
out_features = layer.out_features
elif isinstance(layer, nn.Conv2d):
elif type(layer) == nn.Conv2d:
in_features = layer.in_channels
out_features = layer.out_channels
elif isinstance(layer, transformers.pytorch_utils.Conv1D):
elif type(layer) == transformers.pytorch_utils.Conv1D:
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]
bias = layer.bias is not None
Expand Down Expand Up @@ -181,7 +181,7 @@ def pack_layer(layer_name, model, backend, device=None):
if hasattr(layer, "orig_layer"):
layer = layer.orig_layer

if not isinstance(layer, SUPPORTED_LAYER_TYPES): ##already packed
if type(layer) not in SUPPORTED_LAYER_TYPES: ##already packed
return

if int(layer.act_bits) <= 8:
Expand All @@ -200,13 +200,13 @@ def pack_layer(layer_name, model, backend, device=None):
zp = layer.zp
QuantLinear = dynamic_import_quant_linear_for_packing(backend, bits, group_size, sym, act_bits)

if isinstance(layer, nn.Linear):
if type(layer) == nn.Linear:
in_features = layer.in_features
out_features = layer.out_features
elif isinstance(layer, nn.Conv2d):
elif type(layer) == nn.Conv2d:
in_features = layer.in_channels
out_features = layer.out_channels
elif isinstance(layer, transformers.pytorch_utils.Conv1D):
elif type(layer) == transformers.pytorch_utils.Conv1D:
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]
bias = layer.bias is not None
Expand Down
6 changes: 3 additions & 3 deletions auto_round/export/export_to_autoround/export_to_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def pack_layer(layer_name, model, data_type, device=None):
if hasattr(layer, "orig_layer"):
layer = layer.orig_layer

if not isinstance(layer, SUPPORTED_LAYER_TYPES): ##already packed
if type(layer) not in SUPPORTED_LAYER_TYPES: ##already packed
return

if not check_to_quantized(layer):
Expand All @@ -119,13 +119,13 @@ def pack_layer(layer_name, model, data_type, device=None):
q_weight = revert_tensor_by_pad(q_weight, orig_shape=orig_shape, pad_len=pad_len)
q_weight = torch.clamp(q_weight, info.min, info.max)
q_weight = q_weight.to(torch_dtype)
if isinstance(layer, torch.nn.Linear):
if type(layer) == torch.nn.Linear:
in_features = layer.in_features
out_features = layer.out_features
# elif isinstance(layer, nn.Conv2d):
# in_features = layer.in_channels
# out_features = layer.out_channels
elif isinstance(layer, transformers.pytorch_utils.Conv1D):
elif type(layer) == transformers.pytorch_utils.Conv1D:
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]
bias = layer.bias
Expand Down
10 changes: 5 additions & 5 deletions auto_round/export/export_to_autoround/export_to_nvfp_mxfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def pack_layer(name, model, backend, device=None):
if name == "lm_head": # TODO: Check vLLM inference status to determine whether to enable this feature
return
layer = get_module(model, name)
if not isinstance(layer, SUPPORTED_LAYER_TYPES) and not isinstance(layer, WrapperWALayer): ##already packed
if type(layer) not in SUPPORTED_LAYER_TYPES and not isinstance(layer, WrapperWALayer): ##already packed
return

if isinstance(layer, WrapperWALayer): # revert WrapperWALayer for offline usage
Expand Down Expand Up @@ -83,13 +83,13 @@ def pack_layer(name, model, backend, device=None):

# QuantLinear = get_fp_qlinear(backend, bits, group_size, sym)

if isinstance(layer, nn.Linear):
if type(layer) == nn.Linear:
in_features = layer.in_features
out_features = layer.out_features
elif isinstance(layer, nn.Conv2d):
elif type(layer) == nn.Conv2d:
in_features = layer.in_channels
out_features = layer.out_channels
elif isinstance(layer, transformers.pytorch_utils.Conv1D):
elif type(layer) == transformers.pytorch_utils.Conv1D:
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]

Expand Down Expand Up @@ -172,7 +172,7 @@ def save_quantized_as_fp(output_dir, inplace=True, **kwargs):
if is_nv_fp(act_data_type) and "static_gs" in str(act_data_type).lower():
# generate static input_global_scale
for n, m in model.named_modules():
if isinstance(m, SUPPORTED_LAYER_TYPES):
if type(m) in SUPPORTED_LAYER_TYPES:
layer = m
if layer.act_bits < 8 and not getattr(layer, "input_global_scale", None):
assert hasattr(layer, "act_max")
Expand Down
4 changes: 2 additions & 2 deletions auto_round/export/export_to_autoround/qlinear_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ def pack(self, linear, scales, zeros=None, g_idx=None, global_scale=None, input_
self.bias = linear.bias.detach().to(torch.float16)

W = linear.weight.data.detach().to(device)
if isinstance(linear, nn.Conv2d):
if type(linear) == nn.Conv2d:
W = W.flatten(1)
if isinstance(linear, transformers.pytorch_utils.Conv1D):
if type(linear) == transformers.pytorch_utils.Conv1D:
W = W.t()

tensor, orig_shape, pad_len = reshape_pad_tensor_by_group_size(W, self.group_size)
Expand Down
4 changes: 2 additions & 2 deletions auto_round/export/export_to_autoround/qlinear_triton_act.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ def pack(self, linear, scales, zeros, act_scales, w_bf16_to_fp8_scale, g_idx=Non
self.scales = scales_t.clone().half()

W = linear.weight.data.to(device).clone()
if isinstance(linear, nn.Conv2d):
if type(linear) == nn.Conv2d:
W = W.flatten(1)
if isinstance(linear, transformers.pytorch_utils.Conv1D):
if type(linear) == transformers.pytorch_utils.Conv1D:
W = W.t()

repeat_scales = scales.to(device).repeat_interleave(self.group_size, 1)
Expand Down
2 changes: 1 addition & 1 deletion auto_round/export/export_to_awq/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def pack_layer(name, model, backend, device=None):
return
layer = get_module(model, name)

if not isinstance(layer, SUPPORTED_LAYER_TYPES): ##already packed
if type(layer) not in SUPPORTED_LAYER_TYPES: ##already packed
return

bits = layer.bits
Expand Down
6 changes: 3 additions & 3 deletions auto_round/export/export_to_itrex/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,13 @@ def pack_model(
else:
scale = scale.to(dtype=convert_dtype)
zp = zp.to(dtype=torch.int32) if isinstance(zp, torch.Tensor) else zp
if isinstance(m, transformers.pytorch_utils.Conv1D):
if type(m) == transformers.pytorch_utils.Conv1D:
fp_weight = fp_weight.t_().contiguous()
int_weight = quant_weight_w_scale(fp_weight, scale, zp, group_size, fp_weight.device)
if isinstance(m, torch.nn.Linear):
if type(m) == torch.nn.Linear:
in_features = m.in_features
out_features = m.out_features
elif isinstance(m, transformers.pytorch_utils.Conv1D):
elif type(m) == transformers.pytorch_utils.Conv1D:
in_features = m.weight.shape[0]
out_features = m.weight.shape[1]
int_weight = int_weight.type(torch.int32)
Expand Down
10 changes: 5 additions & 5 deletions auto_round/export/export_to_llmcompressor/export_to_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def pack_layer(name, model, backend, device=None):
if name == "lm_head": # TODO: Check vLLM inference status to determine whether to enable this feature
return
layer = get_module(model, name)
if not isinstance(layer, SUPPORTED_LAYER_TYPES) and not isinstance(layer, WrapperWALayer): ##already packed
if type(layer) not in SUPPORTED_LAYER_TYPES and not isinstance(layer, WrapperWALayer): ##already packed
return

if isinstance(layer, WrapperWALayer): # revert WrapperWALayer for offline usage
Expand Down Expand Up @@ -83,13 +83,13 @@ def pack_layer(name, model, backend, device=None):

# QuantLinear = get_fp_qlinear(backend, bits, group_size, sym)

if isinstance(layer, nn.Linear):
if type(layer) == nn.Linear:
in_features = layer.in_features
out_features = layer.out_features
elif isinstance(layer, nn.Conv2d):
elif type(layer) == nn.Conv2d:
in_features = layer.in_channels
out_features = layer.out_channels
elif isinstance(layer, transformers.pytorch_utils.Conv1D):
elif type(layer) == transformers.pytorch_utils.Conv1D:
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]

Expand Down Expand Up @@ -167,7 +167,7 @@ def save_quantized_as_fp(output_dir, inplace=True, **kwargs):
if is_nv_fp(act_data_type) and "static_gs" in str(act_data_type).lower():
# generate static input_global_scale
for n, m in model.named_modules():
if isinstance(m, SUPPORTED_LAYER_TYPES):
if type(m) in SUPPORTED_LAYER_TYPES:
layer = m
if layer.act_bits < 8 and not getattr(layer, "input_global_scale", None):
assert hasattr(layer, "act_max")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def pack_layer(layer_name: str, model: torch.nn.Module, data_type: str, device:
if hasattr(layer, "orig_layer"):
layer = layer.orig_layer

if not isinstance(layer, SUPPORTED_LAYER_TYPES): ##already packed
if type(layer) not in SUPPORTED_LAYER_TYPES: ##already packed
return

if not check_to_quantized(layer):
Expand All @@ -89,10 +89,10 @@ def pack_layer(layer_name: str, model: torch.nn.Module, data_type: str, device:
q_weight = revert_tensor_by_pad(q_weight, orig_shape=orig_shape, pad_len=pad_len)
q_weight = torch.clamp(q_weight, info.min, info.max)
q_weight = q_weight.to(torch_dtype)
if isinstance(layer, torch.nn.Linear):
if type(layer) == torch.nn.Linear:
in_features = layer.in_features
out_features = layer.out_features
elif isinstance(layer, transformers.pytorch_utils.Conv1D):
elif type(layer) == transformers.pytorch_utils.Conv1D:
in_features = layer.weight.shape[0]
out_features = layer.weight.shape[1]
bias = layer.bias
Expand Down
6 changes: 3 additions & 3 deletions auto_round/inference/convert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def get_layer_config(model, quantization_config):
# Get layer names that will be quantized
layer_names = []
for n, m in model.named_modules():
if not isinstance(m, SUPPORTED_LAYER_TYPES):
if type(m) not in SUPPORTED_LAYER_TYPES:
continue
if check_start_with_block_name(n, quant_block_list):
layer_names.append(n)
Expand Down Expand Up @@ -350,9 +350,9 @@ def _replace_by_quant_layers(

def _get_layer_features(layer):
"""Extracts input and output feature dimensions for supported layers."""
if isinstance(layer, nn.Linear):
if type(layer) == nn.Linear:
return layer.in_features, layer.out_features
elif isinstance(layer, Conv1D): # TODO: Verify correctness
elif type(layer) == Conv1D: # TODO: Verify correctness
return layer.weight.shape[0], layer.weight.shape[1]
return None, None # Unsupported layer type

Expand Down
16 changes: 8 additions & 8 deletions auto_round/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ def get_layer_names_in_block(
if class_names is None:
class_names = []
for n, m in model.named_modules():
if isinstance(m, supported_types) or (class_names is not None and m.__class__.__name__ in class_names):
if type(m) in supported_types or (class_names is not None and m.__class__.__name__ in class_names):
m.tmp_name = n
layers_in_block = []
if bool(quant_block_list):
Expand Down Expand Up @@ -1066,7 +1066,7 @@ def get_fp_layer_names(model, fp_layers):
fp_layers = fp_layers.replace(" ", "").split(",")
all_layer_names = []
for n, m in model.named_modules():
if isinstance(m, (torch.nn.Linear, transformers.pytorch_utils.Conv1D)):
if type(m) in SUPPORTED_LAYER_TYPES:
all_layer_names.append(n)
not_to_quantized_layers = []

Expand Down Expand Up @@ -1104,7 +1104,7 @@ def check_awq_gemm_compatibility(model, bits, group_size, sym, layer_configs=Non
if bits != 4:
return False, "AutoAWQ GEMM kernel only supports 4 bits"
for n, m in model.named_modules():
if isinstance(m, transformers.pytorch_utils.Conv1D):
if type(m) == transformers.pytorch_utils.Conv1D:
return False, "AutoAWQ GEMM kernel does not support conv1d"

layer_names = get_layer_names_in_block(model)
Expand Down Expand Up @@ -1180,13 +1180,13 @@ def is_debug_mode():

def get_layer_features(layer):
"""Extracts input and output feature dimensions for supported layers."""
if isinstance(layer, torch.nn.Linear):
if type(layer) == torch.nn.Linear:
return layer.in_features, layer.out_features
elif isinstance(layer, transformers.pytorch_utils.Conv1D): # TODO: Verify correctness
elif type(layer) == transformers.pytorch_utils.Conv1D: # TODO: Verify correctness
return layer.weight.shape[0], layer.weight.shape[1]
elif isinstance(layer, torch.nn.Embedding):
return layer.num_embeddings, layer.embedding_dim
elif deepspeed_exists and isinstance(layer, (LinearLayer, LinearAllreduce)):
elif deepspeed_exists and type(layer) in (LinearLayer, LinearAllreduce):
return layer.weight.shape[1], layer.weight.shape[0] # (input_dim, output_dim)
return None, None # Unsupported layer type

Expand Down Expand Up @@ -1367,7 +1367,7 @@ def _is_fp8_model(model: torch.nn.Module) -> bool:
def _is_fp8_linear(module: torch.nn.Module) -> bool:
if hasattr(module, "is_fp8_linear"):
return module.is_fp8_linear
if not (isinstance(module, torch.nn.Linear) or module.__class__.__name__ == "FP8Linear"):
if not (type(module) == torch.nn.Linear or module.__class__.__name__ == "FP8Linear"):
return False
if module.weight is None:
return False
Expand Down Expand Up @@ -1912,7 +1912,7 @@ def _set_config(config, target_config):
continue
new_type = GGUF_CONFIG[target_gguf_format]["mostly"]
layer = get_module(model, layer_name)
if isinstance(layer, transformers.pytorch_utils.Conv1D):
if type(layer) == transformers.pytorch_utils.Conv1D:
input_features = layer.weight.shape[0]
else:
input_features = layer.weight.shape[-1]
Expand Down
Loading