Add typo checker (#846)

XuehaoSun · pre-commit-ci[bot] · web-flow · commit 9d2ee6987233 · 2025-09-26T14:33:30.000+08:00
* Add typo checker Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -51,6 +51,11 @@ repos:
         additional_dependencies:
           - tomli
 
+  - repo: https://github.com/crate-ci/typos
+    rev: v1.36.2
+    hooks:
+      - id: typos
+
   - repo: https://github.com/pycqa/isort
     rev: 6.0.1
     hooks:
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ AutoRound
 <h3> Advanced Quantization Algorithm for LLMs</h3>
 
 [![python](https://img.shields.io/badge/python-3.10%2B-blue)](https://github.com/intel/auto-round)
-[![version](https://img.shields.io/badge/release-0.7.0-green)](https://github.com/intel/auto-round)
+[![version](https://img.shields.io/badge/release-0.7.1-green)](https://github.com/intel/auto-round)
 [![license](https://img.shields.io/badge/license-Apache%202-9C27B0)](https://github.com/intel/auto-round/blob/main/LICENSE)
 <a href="https://huggingface.co/Intel">
 <img alt="Model Checkpoints" src="https://img.shields.io/badge/%F0%9F%A4%97%20HF-Models-F57C00">
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
@@ -94,7 +94,7 @@
     set_module,
     to_device,
     to_dtype,
-    unsupport_meta_device,
+    unsupported_meta_device,
 )
 from auto_round.wrapper import WrapperLinear, WrapperMultiblock, unwrapper_block, unwrapper_layer, wrapper_block
 
@@ -260,7 +260,7 @@ def __init__(
         elif tokenizer is None and iters > 0:
             raise ValueError("A tokenizer must be set for non-str model input")
         self.low_cpu_mem_usage = bool(low_cpu_mem_usage)
-        if unsupport_meta_device(model):
+        if unsupported_meta_device(model):
             raise RuntimeError(
                 "AutoRound does not support parameters on meta device. "
                 "Please use more GPUs by setting `--device 0,1,2,3` or just place the model on CPU."
@@ -345,7 +345,7 @@ def __init__(
         elif tokenizer is None and iters > 0:
             raise ValueError("A tokenizer must be set for non-str model input")
         self.low_cpu_mem_usage = bool(low_cpu_mem_usage)
-        if unsupport_meta_device(model):
+        if unsupported_meta_device(model):
             raise RuntimeError(
                 "AutoRound does not support parameters on meta device. "
                 "Please use more GPUs by setting `--device_map 0,1,2,3` or just place the model on CPU."
@@ -624,20 +624,20 @@ def _set_auto_device_map_in_block(self, block: torch.nn.Module, input_ids: list[
         device_0_memory = get_device_memory(
             self.device_list[0] if hasattr(self, "device_list") and self.device_list else 0
         )
-        block_memory, input_ouput_memory = estimate_tuning_block_mem(block, input_ids)
+        block_memory, input_output_memory = estimate_tuning_block_mem(block, input_ids)
         if self.low_gpu_mem_usage:
-            input_ouput_memory = 0
+            input_output_memory = 0
 
         mem_per_param_scale = 13 if self.mem_per_param_scale is None else self.mem_per_param_scale
         if self.iters == 0:
             mem_per_param_scale = 1  # for rtn
 
-        if (block_memory * mem_per_param_scale + input_ouput_memory) < device_0_memory:
+        if (block_memory * mem_per_param_scale + input_output_memory) < device_0_memory:
             return  # fit in one GPU
 
         device_map = {}
         device_memory = {device: get_device_memory(int(device.split(":")[1])) for device in cuda_devices}
-        device_memory[device_0] = device_0_memory - input_ouput_memory
+        device_memory[device_0] = device_0_memory - input_output_memory
 
         device_idx = 0
         # First, fill device 0 to its maximum capacity, then distribute the remaining layers evenly across other devices
@@ -864,7 +864,7 @@ def remove_duplicates(lst):
                         format = "auto_round:auto_awq"
                 elif is_nv_fp(self.data_type) or is_mx_fp(self.data_type):
                     format = f"auto_round:{self.data_type}"
-                elif is_static_wfp8afp8(self):  # staic wfp8afp8
+                elif is_static_wfp8afp8(self):  # static wfp8afp8
                     format = f"auto_round:{AutoRoundFormat.FP8_STATIC.value}"
                 elif self.data_type == "fp" and self.bits == 8 and self.act_bits >= 16:  # woq fp8
                     format = f"auto_round:{AutoRoundFormat.FP8.value}"
diff --git a/auto_round/export/export_to_gguf/convert_hf_to_gguf.py b/auto_round/export/export_to_gguf/convert_hf_to_gguf.py
@@ -7458,7 +7458,7 @@ def set_gguf_parameters(self):
         layer_norm_eps = hparams["layer_norm_epsilon"]
         intermediate_size = hparams["intermediate_size"] if "intermediate_size" in hparams else 4 * embed_dim
         num_layers = hparams["num_layers"]
-        # ignore for now as EXAONE-3.0-7.8B-Instruct attentino_dropout is 0.0
+        # ignore for now as EXAONE-3.0-7.8B-Instruct attention_dropout is 0.0
         # attention_dropout_rate = hparams["attention_dropout"]
         # ignore for now as EXAONE-3.0-7.8B-Instruct embed_dropout is 0.0
         # embed_dropout_rate = hparams["embed_dropout"]
@@ -7707,7 +7707,7 @@ def __init__(self, *args, **kwargs):
     def get_attn_layers(self):
         # Explicit list of layer type names
         if layer_types := self.hparams.get("layer_types"):
-            return [i for i, typ in enumerate(layer_types) if typ == "attention"]
+            return [i for i, layer_type in enumerate(layer_types) if layer_type == "attention"]
 
         # Layer types indicated by index or period
         attn_layers = self.hparams.get("attn_layer_indices", [])
diff --git a/auto_round/export/export_to_gguf/packing.py b/auto_round/export/export_to_gguf/packing.py
@@ -751,11 +751,11 @@ def q6_k_quant_block(blocks: np.array, scale=None, d_scale=None, original=False,
 
     tmp_L = all_L.reshape(nb, 4, 64) & 0xF
     output_ql = (tmp_L[:, ::2] | (tmp_L[:, 1::2] << 4)).reshape(nb, QK_K // 2).cpu().numpy().astype(np.uint8)
-    ouptut_qh = (all_L >> 4).reshape(nb, 2, 4, 32) << torch.tensor([0, 2, 4, 6], device=all_L.device).reshape(
+    output_qh = (all_L >> 4).reshape(nb, 2, 4, 32) << torch.tensor([0, 2, 4, 6], device=all_L.device).reshape(
         1, 1, 4, 1
     )
     output_qh = (
-        np.bitwise_or.reduce(ouptut_qh.cpu().numpy(), axis=2, dtype=np.uint8)  # pylint: disable=E1121
+        np.bitwise_or.reduce(output_qh.cpu().numpy(), axis=2, dtype=np.uint8)  # pylint: disable=E1121
         .reshape(nb, QK_K // 4)
         .astype(np.uint8)
     )  # pylint: disable=E1121
diff --git a/auto_round/inference/auto_quantizer.py b/auto_round/inference/auto_quantizer.py
@@ -270,8 +270,8 @@ def post_init(self):
             raise ValueError("group_size must be greater than 0 or equal to -1")
 
     def get_loading_attributes(self):
-        loading_attibutes_dict = {"backend": self.backend}
-        return loading_attibutes_dict
+        loading_attributes_dict = {"backend": self.backend}
+        return loading_attributes_dict
 
     def to_dict(self):
         config_dict = super().to_dict()
diff --git a/auto_round/inference/backend.py b/auto_round/inference/backend.py
@@ -788,7 +788,7 @@ def find_backend(backend: str, orig_backend: str = None):
         target_info = BackendInfos[key]
         if (
             target_info.packing_format == orig_info.packing_format
-            or orig_info.packing_format in target_info.convertable_format
+            or orig_info.packing_format in target_info.convertible_format
         ):
             return key
 
diff --git a/auto_round/low_cpu_mem/utils.py b/auto_round/low_cpu_mem/utils.py
@@ -423,16 +423,16 @@ def _layer_wise_to(module, name, device_or_dtype):
             module.get_bias = partial(_get_value, name, "bias")
         module.update = partial(_update, name, module)
 
-    def _repalce_to(module, name):
+    def _replace_to(module, name):
         if len(module._modules) > 0:
             for n, m in module.named_children():
                 if len(name) > 0:
                     n = name + "." + n
-                _repalce_to(m, n)
+                _replace_to(m, n)
         module.ori_to = module.to
         module.to = partial(_layer_wise_to, module, name)
 
-    _repalce_to(empty_model, "")
+    _replace_to(empty_model, "")
 
 
 def load_model_with_hooks(
diff --git a/auto_round/testing_utils.py b/auto_round/testing_utils.py
@@ -47,7 +47,7 @@ def is_itrex_available():
     return importlib.util.find_spec("intel_extension_for_transformers") is not None
 
 
-def is_flash_attn_avaliable():
+def is_flash_attn_available():
     return importlib.util.find_spec("flash_attn") is not None
 
 
@@ -203,7 +203,7 @@ def require_vlm_env(test_case):
 
     env_check = True
     # pip install flash-attn --no-build-isolation
-    env_check &= is_flash_attn_avaliable()
+    env_check &= is_flash_attn_available()
 
     # pip install git+https://github.com/haotian-liu/LLaVA.git@v1.2.2
     env_check &= importlib.util.find_spec("llava") is not None
diff --git a/auto_round/utils.py b/auto_round/utils.py
@@ -220,7 +220,7 @@ def get_scale_shape(weight, group_size):
     return shape
 
 
-def unsupport_meta_device(model):
+def unsupported_meta_device(model):
     """Checks if the model is a valid model for auto_round.
 
     Args:
@@ -810,8 +810,8 @@ def is_autoround_exllamav2_available():
 def get_library_version(library_name):
     from packaging.version import Version
 
-    python_vesion = Version(sys.version.split()[0])
-    if python_vesion < Version("3.8"):
+    python_version = Version(sys.version.split()[0])
+    if python_version < Version("3.8"):
         import warnings
 
         warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -1290,7 +1290,7 @@ def _gguf_args_check(args_or_ar, formats: list[str] = None, model_type=ModelType
 
     pattern = re.compile(r"q\d_k")
     pre_dq_format = ""
-    unsupport_list, reset_list = [], []
+    unsupported_list, reset_list = [], []
     for format in GGUF_CONFIG:
         if format in formats:
             if format == "q6_k_s":
@@ -1303,7 +1303,7 @@ def _gguf_args_check(args_or_ar, formats: list[str] = None, model_type=ModelType
                 else:
                     pre_dq_format = format
 
-            unsupport_list, reset_list = [], []
+            unsupported_list, reset_list = [], []
             gguf_config = GGUF_CONFIG[format]
             for k, v in gguf_config.items():
                 if not hasattr(args_or_ar, k):
@@ -1315,12 +1315,12 @@ def _gguf_args_check(args_or_ar, formats: list[str] = None, model_type=ModelType
                     k = "asym"
                     v = not v
                 if getattr(args_or_ar, k) != v:
-                    unsupport_list.append(f"{k}={getattr(args_or_ar, k)}")
+                    unsupported_list.append(f"{k}={getattr(args_or_ar, k)}")
                     reset_list.append(f"{k}={v}")
                     setattr(args_or_ar, k, v)
-            if len(unsupport_list) > 0:
+            if len(unsupported_list) > 0:
                 logger.info(
-                    f"format {format} does not support for {', '.join(unsupport_list)},"
+                    f"format {format} does not support for {', '.join(unsupported_list)},"
                     f" reset to {', '.join(reset_list)}."
                 )
     # Removed obsolete commented-out block for improved readability and maintainability.
@@ -2415,7 +2415,7 @@ def module_match_name_list(module, name_list):
     elif module_match_name_list(module, ["DBRXMoeSparseMoeBlock"]):
         return ["w1_linear", "w2_linear", "v1_linear"]
     else:
-        # assuing w1, w2, w3 by default
+        # assuming w1, w2, w3 by default
         return ["w1", "w2", "w3"]
 
 
diff --git a/docs/paper_acc.md b/docs/paper_acc.md
@@ -3,7 +3,7 @@ To ensure a fair comparison as much as possible and alleviate overfitting in per
 
 Due to memory constraints, we maintained the original sequence length of 512 for AWQ, while for GPTQ，Omniquant and our approach, a sequence length of 2048 is used. And HQQ is a data free method, no need to calibrate.
 
-For GPTQ, we have enabled act-order and true-seqential, and also activated static group in scenarios where group_size!=-1. The notation GPTQ* indicates that we adjusted the random seed or data preprocessing to address issues related to the non-positive definite Hessian matrix or other issues.
+For GPTQ, we have enabled act-order and true-sequential, and also activated static group in scenarios where group_size!=-1. The notation GPTQ* indicates that we adjusted the random seed or data preprocessing to address issues related to the non-positive definite Hessian matrix or other issues.
 
 For Omniquant, we adhere to the official settings, which include running for 20 epochs and disabling 'let'. We conducted calibration tests using sample sizes of 512 and 128, as well as a sample size of 512 with a batch size of 4. Our findings show that using a sample size of 512 typically results in comparable or slight higher performance for models <=13B. Therefore, we present the results based on the sample size of 512. For 70B models, due the the NAN loss issue and to reduce the tuning cost, we adopted 128 samples for calibration.
 
diff --git a/docs/step_by_step.md b/docs/step_by_step.md
@@ -119,7 +119,7 @@ AutoRound supports several Schemes:
 - **NVFP4**(data_type:nvfp4,act_data_type:nvfp4,static_global_scale,group_size 16)
 - **MXFP4**(**Research feature,no real kernel**, data_type:mxfp4,act_data_type:mxfp4,rceil,group_size 32)
 - **FPW8A16**(**Research feature,no real kernel**, data_type:fp8,act_data_type 16:,group_size 0->per tensor )
-- **FP8_STATIC**(**Research feature,no real kernel**, data_type:fp8,act_data_type:fp8,group_size -1 ->per channel, act_group_size=0->per tenosr)
+- **FP8_STATIC**(**Research feature,no real kernel**, data_type:fp8,act_data_type:fp8,group_size -1 ->per channel, act_group_size=0->per tensor)
 
 Besides, you could modify the `group_size`, `bits`, `sym` and many other configs you want, though there are maybe no real kernels.
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,4 +1,5 @@
 [tool.codespell]
+skip = 'pyproject.toml,.azure-pipelines/scripts/codeScan/codespell/autoround_dict.txt'
 ignore-words = ".azure-pipelines/scripts/codeScan/codespell/autoround_dict.txt"
 
 [tool.isort]
@@ -10,6 +11,15 @@ extend_skip_glob = ["**/__init__.py"]
 [tool.black]
 line-length = 120
 
+[tool.typos]
+[tool.typos.files]
+extend-exclude = [
+    ".azure-pipelines/scripts/codeScan/codespell/autoround_dict.txt"
+]
+[tool.typos.default.extend-words]
+ue = "ue"
+endianess = "endianess"
+
 [tool.ruff]
 # Exclude a variety of commonly ignored directories.
 exclude = [
diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py
@@ -72,7 +72,7 @@ def test_remove_whole_block(self):
         )
         autoround.quantize()
 
-    def test_consective_quant(self):
+    def test_consecutive_quant(self):
         bits, group_size, sym = 4, -1, False
         autoround = AutoRound(
             self.model,