Skip to content

Get default config based on the auto-detect CPU type #1904

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions neural_compressor/common/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,6 @@ class Mode(Enum):
PREPARE = "prepare"
CONVERT = "convert"
QUANTIZE = "quantize"


SERVER_PROCESSOR_BRAND_KEY_WORLD_LST = ["Xeon"]
132 changes: 130 additions & 2 deletions neural_compressor/common/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""The utility of common module."""

import collections
import enum
import importlib
import subprocess
import time
Expand All @@ -26,7 +27,7 @@
import psutil
from prettytable import PrettyTable

from neural_compressor.common.utils import Mode, TuningLogger, logger
from neural_compressor.common.utils import Mode, TuningLogger, constants, logger

__all__ = [
"set_workspace",
Expand All @@ -41,6 +42,9 @@
"CpuInfo",
"default_tuning_logger",
"call_counter",
"cpu_info",
"ProcessorType",
"detect_processor_type_based_on_hw",
"Statistics",
]

Expand Down Expand Up @@ -92,7 +96,7 @@ def __call__(self, *args, **kwargs):

@singleton
class CpuInfo(object):
"""CPU info collection."""
"""Get CPU Info."""

def __init__(self):
"""Get whether the cpu numerical format is bf16, the number of sockets, cores and cores per socket."""
Expand All @@ -113,6 +117,39 @@ def __init__(self):
b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\xC3", # mov eax, 7 # cpuid # ret
)
self._bf16 = bool(eax & (1 << 5))
self._info = info
self._brand_raw = info.get("brand_raw", "")
# detect the below info when needed
self._cores = None
self._sockets = None
self._cores_per_socket = None

@property
def brand_raw(self):
"""Get the brand name of the CPU."""
return self._brand_raw

@brand_raw.setter
def brand_raw(self, brand_name):
"""Set the brand name of the CPU."""
self._brand_raw = brand_name

@staticmethod
def _detect_cores():
physical_cores = psutil.cpu_count(logical=False)
return physical_cores

@property
def cores(self):
"""Get the number of cores in platform."""
if self._cores is None:
self._cores = self._detect_cores()
return self._cores

@cores.setter
def cores(self, num_of_cores):
"""Set the number of cores in platform."""
self._cores = num_of_cores

@property
def bf16(self):
Expand All @@ -124,6 +161,60 @@ def vnni(self):
"""Get whether it is vnni."""
return self._vnni

@property
def cores_per_socket(self) -> int:
"""Get the cores per socket."""
if self._cores_per_socket is None:
self._cores_per_socket = self.cores // self.sockets
return self._cores_per_socket

@property
def sockets(self):
"""Get the number of sockets in platform."""
if self._sockets is None:
self._sockets = self._get_number_of_sockets()
return self._sockets

@sockets.setter
def sockets(self, num_of_sockets):
"""Set the number of sockets in platform."""
self._sockets = num_of_sockets

def _get_number_of_sockets(self) -> int:
if "arch" in self._info and "ARM" in self._info["arch"]: # pragma: no cover
return 1

num_sockets = None
cmd = "cat /proc/cpuinfo | grep 'physical id' | sort -u | wc -l"
if psutil.WINDOWS:
cmd = r'wmic cpu get DeviceID | C:\Windows\System32\find.exe /C "CPU"'
elif psutil.MACOS: # pragma: no cover
cmd = "sysctl -n machdep.cpu.core_count"

num_sockets = None
try:
with subprocess.Popen(
args=cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=False,
) as proc:
proc.wait()
if proc.stdout:
for line in proc.stdout:
num_sockets = int(line.decode("utf-8", errors="ignore").strip())
except Exception as e:
logger.error("Failed to get number of sockets: %s" % e)
if isinstance(num_sockets, int) and num_sockets >= 1:
return num_sockets
else:
logger.warning("Failed to get number of sockets, return 1 as default.")
return 1


cpu_info = CpuInfo()


def dump_elapsed_time(customized_msg=""):
"""Get the elapsed time for decorated functions.
Expand Down Expand Up @@ -236,6 +327,43 @@ def wrapper(*args, **kwargs):
return wrapper


class ProcessorType(enum.Enum):
Client = "Client"
Server = "Server"


def detect_processor_type_based_on_hw():
"""Detects the processor type based on the hardware configuration.

Returns:
ProcessorType: The detected processor type (Server or Client).
"""
# Detect the processor type based on below conditions:
# If there are more than one sockets, it is a server.
# If the brand name includes key word in `SERVER_PROCESSOR_BRAND_KEY_WORLD_LST`, it is a server.
# If the memory size is greater than 32GB, it is a server.
log_mgs = "Processor type detected as {processor_type} due to {reason}."
if cpu_info.sockets > 1:
logger.info(log_mgs.format(processor_type=ProcessorType.Server.value, reason="there are more than one sockets"))
return ProcessorType.Server
elif any(brand in cpu_info.brand_raw for brand in constants.SERVER_PROCESSOR_BRAND_KEY_WORLD_LST):
logger.info(
log_mgs.format(processor_type=ProcessorType.Server.value, reason=f"the brand name is {cpu_info.brand_raw}.")
)
return ProcessorType.Server
elif psutil.virtual_memory().total / (1024**3) > 32:
logger.info(
log_mgs.format(processor_type=ProcessorType.Server.value, reason="the memory size is greater than 32GB")
)
return ProcessorType.Server
else:
logger.info(
"Processor type detected as %s, pass `processor_type='server'` to override it if needed.",
ProcessorType.Client.value,
)
return ProcessorType.Client


class Statistics:
"""The statistics printer."""

Expand Down
48 changes: 30 additions & 18 deletions neural_compressor/torch/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import torch

import neural_compressor.torch.utils as torch_utils
from neural_compressor.common.base_config import (
BaseConfig,
config_registry,
Expand Down Expand Up @@ -219,14 +220,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]
dtype=["int4", "nf4"], use_sym=[True, False], group_size=[32, 128], use_mse_search=[False, True]
)

@classmethod
def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "RTNConfig"]:
pre_defined_configs: Dict[torch_utils.ProcessorType, RTNConfig] = {}
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
return pre_defined_configs

def get_default_rtn_config() -> RTNConfig:
"""Generate the default rtn config.

Returns:
the default rtn config.
"""
return RTNConfig()
def get_default_rtn_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
process_type = torch_utils.get_processor_type_from_user_config(processor_type)
return RTNConfig.get_predefined_configs()[process_type]


def get_default_double_quant_config(type="BNB_NF4"):
Expand Down Expand Up @@ -378,14 +382,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig
# TODO fwk owner needs to update it.
return GPTQConfig(act_order=[True, False], use_sym=[False, True])

@classmethod
def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "GPTQConfig"]:
pre_defined_configs: Dict[torch_utils.ProcessorType, GPTQConfig] = {}
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
return pre_defined_configs

def get_default_gptq_config() -> GPTQConfig:
"""Generate the default gptq config.

Returns:
the default gptq config.
"""
return GPTQConfig()
def get_default_gptq_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
process_type = torch_utils.get_processor_type_from_user_config(processor_type)
return GPTQConfig.get_predefined_configs()[process_type]


######################## AWQ Config ###############################
Expand Down Expand Up @@ -725,6 +732,7 @@ def __init__(
not_use_best_mse: bool = False,
dynamic_max_gap: int = -1,
scale_dtype: str = "fp16",
use_layer_wise: bool = False,
white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
):
"""Init AUTOROUND weight-only quantization config.
Expand Down Expand Up @@ -777,6 +785,7 @@ def __init__(
self.not_use_best_mse = not_use_best_mse
self.dynamic_max_gap = dynamic_max_gap
self.scale_dtype = scale_dtype
self.use_layer_wise = use_layer_wise
self._post_init()

@classmethod
Expand All @@ -803,14 +812,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "AutoRoundConfig", List["AutoR
# TODO fwk owner needs to update it.
return AutoRoundConfig(bits=[4, 6])

@classmethod
def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "AutoRoundConfig"]:
pre_defined_configs: Dict[torch_utils.ProcessorType, AutoRoundConfig] = {}
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
return pre_defined_configs

def get_default_AutoRound_config() -> AutoRoundConfig:
"""Generate the default AUTOROUND config.

Returns:
the default AUTOROUND config.
"""
return AutoRoundConfig()
def get_default_AutoRound_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
process_type = torch_utils.get_processor_type_from_user_config(processor_type)
return AutoRoundConfig.get_predefined_configs()[process_type]


######################## MX Config ###############################
Expand Down
41 changes: 39 additions & 2 deletions neural_compressor/torch/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,21 @@
# limitations under the License.


from typing import Callable, Dict, List, Tuple, Union
import enum
from typing import Callable, Dict, List, Optional, Tuple, Union

import psutil
import torch
from typing_extensions import TypeAlias

from neural_compressor.common.utils import Mode, Statistics, logger
from neural_compressor.common.utils import (
Mode,
ProcessorType,
Statistics,
cpu_info,
detect_processor_type_based_on_hw,
logger,
)

OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]

Expand Down Expand Up @@ -235,3 +244,31 @@ def get_model_device(model: torch.nn.Module):
"""
for n, p in model.named_parameters():
return p.data.device.type # p.data.device == device(type='cpu')


def get_processor_type_from_user_config(user_processor_type: Optional[Union[str, ProcessorType]] = None):
"""Get the processor type.

Get the processor type based on the user configuration or automatically detect it based on the hardware.

Args:
user_processor_type (Optional[Union[str, ProcessorType]]): The user-specified processor type. Defaults to None.

Returns:
ProcessorType: The detected or user-specified processor type.

Raises:
AssertionError: If the user-specified processor type is not supported.
NotImplementedError: If the processor type is not recognized.
"""
if user_processor_type is None:
processor_type = detect_processor_type_based_on_hw()
elif isinstance(user_processor_type, ProcessorType):
processor_type = user_processor_type
elif isinstance(user_processor_type, str):
user_processor_type = user_processor_type.lower().capitalize()
assert user_processor_type in ProcessorType.__members__, f"Unsupported processor type: {user_processor_type}"
processor_type = ProcessorType(user_processor_type)
else:
raise NotImplementedError(f"Unsupported processor type: {user_processor_type}")
return processor_type
Loading
Loading