Skip to content

add INC_FORCE_DEVICE introduction #1988

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion docs/source/3x/PyTorch.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ Deep Learning</a></td>
</table>

2. How to set different configuration for specific op_name or op_type?
> INC extends a `set_local` method based on the global configuration object to set custom configuration.
> Neural Compressor extends a `set_local` method based on the global configuration object to set custom configuration.

```python
def set_local(self, operator_name_or_list: Union[List, str, Callable], config: BaseConfig) -> BaseConfig:
Expand All @@ -264,3 +264,15 @@ Deep Learning</a></td>
quant_config.set_local(".*mlp.*", RTNConfig(bits=8)) # For layers with "mlp" in their names, set bits=8
quant_config.set_local("Conv1d", RTNConfig(dtype="fp32")) # For Conv1d layers, do not quantize them.
```

3. How to specify an accelerator?

> Neural Compressor provides automatic accelerator detection, including HPU, XPU, CUDA, and CPU.

> The automatically detected accelerator may not be suitable for some special cases, such as poor performance, memory limitations. In such situations, users can override the detected accelerator by setting the environment variable `INC_TARGET_DEVICE`.

> Usage:

```bash
export INC_TARGET_DEVICE=cpu
```
26 changes: 13 additions & 13 deletions neural_compressor/torch/utils/auto_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,19 +395,19 @@ def mark_step(self):
def auto_detect_accelerator(device_name="auto") -> Auto_Accelerator:
"""Automatically detects and selects the appropriate accelerator.

Force use the cpu on node has both cpu and gpu: `FORCE_DEVICE=cpu` python main.py ...
The `FORCE_DEVICE` is case insensitive.
The environment variable `FORCE_DEVICE` has higher priority than the `device_name`.
Force use the cpu on node has both cpu and gpu: `INC_TARGET_DEVICE=cpu` python main.py ...
The `INC_TARGET_DEVICE` is case insensitive.
The environment variable `INC_TARGET_DEVICE` has higher priority than the `device_name`.
TODO: refine the docs and logic later
"""
# 1. Get the device setting from environment variable `FORCE_DEVICE`.
FORCE_DEVICE = os.environ.get("FORCE_DEVICE", None)
if FORCE_DEVICE:
FORCE_DEVICE = FORCE_DEVICE.lower()
# 2. If the `FORCE_DEVICE` is set and the accelerator is available, use it.
if FORCE_DEVICE and accelerator_registry.get_accelerator_cls_by_name(FORCE_DEVICE) is not None:
logger.warning("Force use %s accelerator.", FORCE_DEVICE)
return accelerator_registry.get_accelerator_cls_by_name(FORCE_DEVICE)()
# 1. Get the device setting from environment variable `INC_TARGET_DEVICE`.
INC_TARGET_DEVICE = os.environ.get("INC_TARGET_DEVICE", None)
if INC_TARGET_DEVICE:
INC_TARGET_DEVICE = INC_TARGET_DEVICE.lower()
# 2. If the `INC_TARGET_DEVICE` is set and the accelerator is available, use it.
if INC_TARGET_DEVICE and accelerator_registry.get_accelerator_cls_by_name(INC_TARGET_DEVICE) is not None:
logger.warning("Force use %s accelerator.", INC_TARGET_DEVICE)
return accelerator_registry.get_accelerator_cls_by_name(INC_TARGET_DEVICE)()
# 3. If the `device_name` is set and the accelerator is available, use it.
if device_name != "auto":
if accelerator_registry.get_accelerator_cls_by_name(device_name) is not None:
Expand All @@ -425,8 +425,8 @@ def auto_detect_accelerator(device_name="auto") -> Auto_Accelerator:


# Force use cpu accelerator even if cuda is available.
# FORCE_DEVICE = "cpu" python ...
# INC_TARGET_DEVICE = "cpu" python ...
# or
# FORCE_DEVICE = "CPU" python ...
# INC_TARGET_DEVICE = "CPU" python ...
# or
# CUDA_VISIBLE_DEVICES="" python ...
4 changes: 2 additions & 2 deletions test/3x/torch/quantization/weight_only/test_hqq.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def setup_class(cls):
@pytest.fixture
def force_use_cpu(self, monkeypatch):
# Force use CPU
monkeypatch.setenv("FORCE_DEVICE", "cpu")
monkeypatch.setenv("INC_TARGET_DEVICE", "cpu")

@pytest.fixture
def force_not_half(self, monkeypatch):
Expand Down Expand Up @@ -194,7 +194,7 @@ def test_hqq_module(
if device_name == "cuda" and not torch.cuda.is_available():
pytest.skip("Skipping CUDA test because cuda is not available")
if device_name == "cpu":
os.environ["FORCE_DEVICE"] = "cpu"
os.environ["INC_TARGET_DEVICE"] = "cpu"
hqq_global_option.use_half = False

_common_hqq_test(
Expand Down
16 changes: 9 additions & 7 deletions test/3x/torch/utils/test_auto_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
@pytest.mark.skipif(not HPU_Accelerator.is_available(), reason="HPEX is not available")
class TestHPUAccelerator:
def test_cuda_accelerator(self):
assert os.environ.get("FORCE_DEVICE", None) is None, "FORCE_DEVICE shouldn't be set. HPU is the first priority."
assert (
os.environ.get("INC_TARGET_DEVICE", None) is None
), "INC_TARGET_DEVICE shouldn't be set. HPU is the first priority."
accelerator = auto_detect_accelerator()
assert accelerator.current_device() == 0, f"{accelerator.current_device()}"
assert accelerator.current_device_name() == "hpu:0"
Expand Down Expand Up @@ -47,10 +49,10 @@ class TestXPUAccelerator:
@pytest.fixture
def force_use_xpu(self, monkeypatch):
# Force use xpu
monkeypatch.setenv("FORCE_DEVICE", "xpu")
monkeypatch.setenv("INC_TARGET_DEVICE", "xpu")

def test_xpu_accelerator(self, force_use_xpu):
print(f"FORCE_DEVICE: {os.environ.get('FORCE_DEVICE', None)}")
print(f"INC_TARGET_DEVICE: {os.environ.get('INC_TARGET_DEVICE', None)}")
accelerator = auto_detect_accelerator()
assert accelerator.current_device() == 0, f"{accelerator.current_device()}"
assert accelerator.current_device_name() == "xpu:0"
Expand Down Expand Up @@ -79,10 +81,10 @@ class TestCPUAccelerator:
@pytest.fixture
def force_use_cpu(self, monkeypatch):
# Force use CPU
monkeypatch.setenv("FORCE_DEVICE", "cpu")
monkeypatch.setenv("INC_TARGET_DEVICE", "cpu")

def test_cpu_accelerator(self, force_use_cpu):
print(f"FORCE_DEVICE: {os.environ.get('FORCE_DEVICE', None)}")
print(f"INC_TARGET_DEVICE: {os.environ.get('INC_TARGET_DEVICE', None)}")
accelerator = auto_detect_accelerator()
assert accelerator.current_device() == "cpu", f"{accelerator.current_device()}"
assert accelerator.current_device_name() == "cpu"
Expand All @@ -99,10 +101,10 @@ class TestCUDAAccelerator:
@pytest.fixture
def force_use_cuda(self, monkeypatch):
# Force use CUDA
monkeypatch.setenv("FORCE_DEVICE", "cuda")
monkeypatch.setenv("INC_TARGET_DEVICE", "cuda")

def test_cuda_accelerator(self, force_use_cuda):
print(f"FORCE_DEVICE: {os.environ.get('FORCE_DEVICE', None)}")
print(f"INC_TARGET_DEVICE: {os.environ.get('INC_TARGET_DEVICE', None)}")
accelerator = auto_detect_accelerator()
assert accelerator.current_device() == 0, f"{accelerator.current_device()}"
assert accelerator.current_device_name() == "cuda:0"
Expand Down
Loading