refine the device_module

zxd1997066 · zxd1997066 · commit 38ece90fd68f · 2025-11-10T14:05:34.000+08:00
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -74,7 +74,6 @@
 )
 
 _DEVICE = auto_detect_device()
-device_module = torch.get_device_module(_DEVICE)
 
 try:
     import gemlite  # noqa: F401
@@ -499,6 +498,7 @@ def test_quantized_tensor_subclass_save_load_map_location(self):
 
     @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
     def test_quantized_model_streaming(self):
+        device_module = torch.get_device_module(_DEVICE)
         def reset_memory():
             gc.collect()
             device_module.empty_cache()
@@ -1109,6 +1109,7 @@ def test_non_fqn_config_filter_fn_none(self):
 
     @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
     def test_quantized_model_streaming_fqn_config(self):
+        device_module = torch.get_device_module(_DEVICE)
         def reset_memory():
             gc.collect()
             device_module.empty_cache()
diff --git a/test/quantization/test_quant_primitives.py b/test/quantization/test_quant_primitives.py
@@ -42,8 +42,6 @@
 torch.manual_seed(_SEED)
 
 _DEVICE = auto_detect_device()
-device_module = torch.get_device_module(_DEVICE)
-
 
 # Helper function to run a function twice
 # and verify that the result is the same.
@@ -599,6 +597,7 @@ def test_choose_qparams_tensor_asym_eps(self):
     def test_get_group_qparams_symmetric_memory(self):
         """Check the memory usage of the op"""
         weight = torch.randn(1024, 1024).to(device=_DEVICE)
+        device_module = torch.get_device_module(_DEVICE)
         original_mem_use = device_module.memory_allocated()
         n_bit = 4
         groupsize = 128