remove is_hpu_supported and refactor HPU UT

xinhe3 · xinhe3 · commit 3754b303dbb2 · 2025-09-18T08:52:00.000+03:00
Signed-off-by: xinhe3 &lt;xinhe3@habana.ai&gt;
diff --git a/.azure-pipelines/scripts/ut/run_ut_hpu.sh b/.azure-pipelines/scripts/ut/run_ut_hpu.sh
@@ -7,7 +7,7 @@ export TQDM_MININTERVAL=60
 pip install pytest-cov pytest-html
 pip list
 
-cd /auto-round/test/test_cpu || exit 1
+cd /auto-round/test/test_hpu || exit 1
 find . -type f -exec sed -i '/sys\.path\.insert(0, "\.\.")/d' {} +
 
 export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
@@ -19,8 +19,8 @@ LOG_DIR=/auto-round/log_dir
 mkdir -p ${LOG_DIR}
 ut_log_name=${LOG_DIR}/ut.log
 
-find . -name "test*hpu_only.py" | sed "s,\.\/,python -m pytest --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html  --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_lazy.sh
-find . -name "test*hpu_only.py" | sed "s,\.\/,python -m pytest --mode compile --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html  --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_compile.sh
+find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html  --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_lazy.sh
+find . -name "test*.py" | sed "s,\.\/,python -m pytest --mode compile --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html  --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_compile.sh
 
 cat run_lazy.sh
 bash run_lazy.sh 2>&1 | tee ${ut_log_name}
diff --git a/auto_round/data_type/utils.py b/auto_round/data_type/utils.py
@@ -224,9 +224,9 @@ def float8_e4m3fn_hpu_ste(x: torch.Tensor):
 
 @lru_cache(None)
 def get_gaudi_fp8_ste_func():
-    from auto_round.utils import is_hpu_supported
+    from auto_round.utils import is_hpex_available
 
-    if is_hpu_supported():
+    if is_hpex_available():
         fn = float8_e4m3fn_hpu_ste
         logger.warning_once("Using HPU STE for FP8")
     else:
diff --git a/auto_round/inference/auto_quantizer.py b/auto_round/inference/auto_quantizer.py
@@ -42,7 +42,7 @@
 from transformers.utils.quantization_config import AwqConfig, GPTQConfig, QuantizationConfigMixin, QuantizationMethod
 
 from auto_round.inference.convert_model import convert_hf_model, infer_target_device, post_init
-from auto_round.utils import is_hpu_supported
+from auto_round.utils import is_hpex_available
 
 logger = getLogger(__name__)
 import sys
@@ -126,7 +126,7 @@ def from_config(cls, quantization_config: Union[QuantizationConfigMixin, Dict],
                 f"Unknown quantization type, got {quant_method} - supported types are:"
                 f" {list(AUTO_QUANTIZER_MAPPING.keys())}"
             )
-        if "auto-round" in quant_method or is_hpu_supported():  # pragma: no cover
+        if "auto-round" in quant_method or is_hpex_available():  # pragma: no cover
             target_cls = AutoRoundQuantizer
         else:
             target_cls = AUTO_QUANTIZER_MAPPING[quant_method]
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
@@ -39,7 +39,7 @@
     get_block_names,
     get_layer_names_in_block,
     get_module,
-    is_hpu_supported,
+    is_hpex_available,
     set_module,
 )
 
@@ -165,7 +165,7 @@ def get_available_devices():
     if torch.cuda.is_available():
         devices.append("cuda")
 
-    if is_hpu_supported():
+    if is_hpex_available():
         devices.append("hpu")
 
     if hasattr(torch, "xpu") and torch.xpu.is_available():
diff --git a/auto_round/utils.py b/auto_round/utils.py
@@ -798,15 +798,6 @@ def is_autoround_exllamav2_available():
     return res
 
 
-@lru_cache(None)
-def is_hpu_supported():  # pragma: no cover
-    try:
-        import habana_frameworks.torch.core as htcore  # pylint: disable=E0401
-    except ImportError as e:
-        return False
-    return True
-
-
 def get_library_version(library_name):
     from packaging.version import Version
 
@@ -924,7 +915,7 @@ def _clear_memory_for_cpu_and_cuda(tensor=None):
 
 @torch._dynamo.disable()
 def clear_memory(tensor=None):
-    if is_hpu_supported():
+    if is_hpex_available():
         # hpu does not have empty_cache
         return
     else:
diff --git a/test/test_hpu/test_auto_round.py b/test/test_hpu/test_auto_round.py
@@ -2,7 +2,7 @@
 import torch
 from _test_helpers import is_pytest_mode_compile, is_pytest_mode_lazy
 
-from auto_round.utils import is_hpu_supported
+from auto_round.utils import is_hpex_available
 
 
 def run_opt_125m_on_hpu():
@@ -28,13 +28,13 @@ def run_opt_125m_on_hpu():
     assert q_model is not None, "Expected q_model to be not None"
 
 
-@pytest.mark.skipif(not is_hpu_supported(), reason="HPU is not supported")
+@pytest.mark.skipif(not is_hpex_available(), reason="HPU is not supported")
 @pytest.mark.skipif(not is_pytest_mode_lazy(), reason="Only for lazy mode")
 def test_opt_125m_lazy_mode():
     run_opt_125m_on_hpu()
 
 
-@pytest.mark.skipif(not is_hpu_supported(), reason="HPU is not supported")
+@pytest.mark.skipif(not is_hpex_available(), reason="HPU is not supported")
 @pytest.mark.skipif(not is_pytest_mode_compile(), reason="Only for compile mode")
 def test_opt_125m_compile_mode():
     torch._dynamo.reset()
diff --git a/test/test_hpu/test_inference.py b/test/test_hpu/test_inference.py
@@ -18,7 +18,7 @@ def __iter__(self):
             yield torch.ones([1, 10], dtype=torch.long)
 
 
-def is_hpu_supported():
+def is_hpex_available():
     try:
         import habana_frameworks.torch.core as htcore  # pylint: disable=E0401
     except ImportError as e:
@@ -40,7 +40,7 @@ def tearDownClass(self):
         shutil.rmtree("runs", ignore_errors=True)
 
     def test_autogptq_format_hpu_inference(self):
-        if not is_hpu_supported():
+        if not is_hpex_available():
             return
         try:
             import auto_gptq
@@ -73,7 +73,7 @@ def test_autogptq_format_hpu_inference(self):
         shutil.rmtree("./saved", ignore_errors=True)
 
     def test_autoround_format_hpu_inference(self):
-        if not is_hpu_supported():
+        if not is_hpex_available():
             return
         bits, group_size, sym = 4, 128, False
         autoround = AutoRound(