Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .azure-pipelines/scripts/ut/run_ut_hpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export TQDM_MININTERVAL=60
pip install pytest-cov pytest-html
pip list

cd /auto-round/test/test_cpu || exit 1
cd /auto-round/test/test_hpu || exit 1
find . -type f -exec sed -i '/sys\.path\.insert(0, "\.\.")/d' {} +

export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
Expand All @@ -19,8 +19,8 @@ LOG_DIR=/auto-round/log_dir
mkdir -p ${LOG_DIR}
ut_log_name=${LOG_DIR}/ut.log

find . -name "test*hpu_only.py" | sed "s,\.\/,python -m pytest --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_lazy.sh
find . -name "test*hpu_only.py" | sed "s,\.\/,python -m pytest --mode compile --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_compile.sh
find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_lazy.sh
find . -name "test*.py" | sed "s,\.\/,python -m pytest --mode compile --cov=\"${auto_round_path}\" --cov-report term --html=report.html --self-contained-html --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run_compile.sh

cat run_lazy.sh
bash run_lazy.sh 2>&1 | tee ${ut_log_name}
Expand Down
14 changes: 7 additions & 7 deletions auto_round/compressors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@
infer_bits_by_data_type,
init_cache,
is_debug_mode,
is_hpex_available,
is_mx_fp,
is_nv_fp,
is_optimum_habana_available,
is_standard_fp,
is_static_wfp8afp8,
is_wfp8afp8,
Expand Down Expand Up @@ -380,8 +380,8 @@ def __init__(
self._check_configs()
torch.set_printoptions(precision=3, sci_mode=True)

if is_optimum_habana_available():
logger.info("optimum Habana is available, import htcore explicitly.")
if is_hpex_available():
logger.info("habana_frameworks is available, import htcore explicitly.")
import habana_frameworks.torch.core as htcore # pylint: disable=E0401
import habana_frameworks.torch.hpu as hthpu # pylint: disable=E0401]

Expand Down Expand Up @@ -3279,7 +3279,7 @@ def _scale_loss_and_backward(self, scaler: Any, loss: torch.Tensor) -> torch.Ten
"""
scale_loss = loss * 1000
scale_loss.backward()
if is_optimum_habana_available():
if is_hpex_available():
htcore.mark_step()
return scale_loss

Expand All @@ -3296,7 +3296,7 @@ def _step(self, scaler: Any, optimizer: Any, lr_schedule: Any):
"""
optimizer.step()
# for hpu
if is_optimum_habana_available():
if is_hpex_available():
htcore.mark_step()
optimizer.zero_grad()
lr_schedule.step()
Expand Down Expand Up @@ -3478,7 +3478,7 @@ def _scale_loss_and_backward(self, scaler, loss):
loss = scaler.scale(loss)

loss.backward()
if is_optimum_habana_available():
if is_hpex_available():
htcore.mark_step()
return loss

Expand All @@ -3492,5 +3492,5 @@ def _step(self, scaler, optimizer, lr_schedule):
optimizer.step()
optimizer.zero_grad()
lr_schedule.step()
if is_optimum_habana_available():
if is_hpex_available():
htcore.mark_step()
4 changes: 2 additions & 2 deletions auto_round/data_type/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,9 @@ def float8_e4m3fn_hpu_ste(x: torch.Tensor):

@lru_cache(None)
def get_gaudi_fp8_ste_func():
from auto_round.utils import is_hpu_supported
from auto_round.utils import is_hpex_available

if is_hpu_supported():
if is_hpex_available():
fn = float8_e4m3fn_hpu_ste
logger.warning_once("Using HPU STE for FP8")
else:
Expand Down
4 changes: 2 additions & 2 deletions auto_round/inference/auto_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from transformers.utils.quantization_config import AwqConfig, GPTQConfig, QuantizationConfigMixin, QuantizationMethod

from auto_round.inference.convert_model import convert_hf_model, infer_target_device, post_init
from auto_round.utils import is_hpu_supported
from auto_round.utils import is_hpex_available

logger = getLogger(__name__)
import sys
Expand Down Expand Up @@ -126,7 +126,7 @@ def from_config(cls, quantization_config: Union[QuantizationConfigMixin, Dict],
f"Unknown quantization type, got {quant_method} - supported types are:"
f" {list(AUTO_QUANTIZER_MAPPING.keys())}"
)
if "auto-round" in quant_method or is_hpu_supported(): # pragma: no cover
if "auto-round" in quant_method or is_hpex_available(): # pragma: no cover
target_cls = AutoRoundQuantizer
else:
target_cls = AUTO_QUANTIZER_MAPPING[quant_method]
Expand Down
4 changes: 2 additions & 2 deletions auto_round/inference/convert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
get_block_names,
get_layer_names_in_block,
get_module,
is_hpu_supported,
is_hpex_available,
set_module,
)

Expand Down Expand Up @@ -165,7 +165,7 @@ def get_available_devices():
if torch.cuda.is_available():
devices.append("cuda")

if is_hpu_supported():
if is_hpex_available():
devices.append("hpu")

if hasattr(torch, "xpu") and torch.xpu.is_available():
Expand Down
42 changes: 26 additions & 16 deletions auto_round/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def __init__(self, module_name):
"""Init LazyImport object.

Args:
module_name (string): The name of module imported later
module_name (string): The name of module imported later
"""
self.module_name = module_name
self.module = None
Expand Down Expand Up @@ -145,12 +145,31 @@ def __call__(self, *args, **kwargs):
htcore = LazyImport("habana_frameworks.torch.core")


################ Check available sys.module to decide behavior #################
def is_package_available(package_name: str) -> bool:
"""Check if the package exists in the environment without importing.

Args:
package_name (str): package name
"""
from importlib.util import find_spec

package_spec = find_spec(package_name)
return package_spec is not None


## check hpex
if is_package_available("habana_frameworks"):
_hpex_available = True
import habana_frameworks.torch.hpex # pylint: disable=E0401
else:
_hpex_available = False


@torch._dynamo.disable()
@lru_cache(None)
def is_optimum_habana_available():
from transformers.utils.import_utils import is_optimum_available

return is_optimum_available() and importlib.util.find_spec("optimum.habana") is not None
def is_hpex_available():
return _hpex_available


def get_module(module, key):
Expand Down Expand Up @@ -553,7 +572,7 @@ def is_valid_digit(s):
if torch.cuda.is_available():
device = torch.device("cuda")
# logger.info("Using GPU device")
elif is_optimum_habana_available(): # pragma: no cover
elif is_hpex_available(): # pragma: no cover
device = torch.device("hpu")
# logger.info("Using HPU device")
elif torch.xpu.is_available(): # pragma: no cover
Expand Down Expand Up @@ -780,15 +799,6 @@ def is_autoround_exllamav2_available():
return res


@lru_cache(None)
def is_hpu_supported(): # pragma: no cover
try:
import habana_frameworks.torch.core as htcore # pylint: disable=E0401
except ImportError as e:
return False
return True


def get_library_version(library_name):
from packaging.version import Version

Expand Down Expand Up @@ -906,7 +916,7 @@ def _clear_memory_for_cpu_and_cuda(tensor=None):

@torch._dynamo.disable()
def clear_memory(tensor=None):
if is_hpu_supported():
if is_hpex_available():
# hpu does not have empty_cache
return
else:
Expand Down
11 changes: 0 additions & 11 deletions test/test_cpu/_test_helpers.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
import pytest


def is_pytest_mode_compile():
return pytest.mode == "compile"


def is_pytest_mode_lazy():
return pytest.mode == "lazy"


def model_infer(model, tokenizer, apply_chat_template=False):
prompts = [
"Hello,my name is",
Expand Down
43 changes: 43 additions & 0 deletions test/test_hpu/_test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pytest


def is_pytest_mode_compile():
return pytest.mode == "compile"


def is_pytest_mode_lazy():
return pytest.mode == "lazy"


def model_infer(model, tokenizer, apply_chat_template=False):
prompts = [
"Hello,my name is",
# "The president of the United States is",
# "The capital of France is",
# "The future of AI is",
]
if apply_chat_template:
texts = []
for prompt in prompts:
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
texts.append(text)
prompts = texts

inputs = tokenizer(prompts, return_tensors="pt", padding=False, truncation=True)

outputs = model.generate(
input_ids=inputs["input_ids"].to(model.device),
attention_mask=inputs["attention_mask"].to(model.device),
do_sample=False, ## change this to follow official usage
max_new_tokens=5,
)
generated_ids = [output_ids[len(input_ids) :] for input_ids, output_ids in zip(inputs["input_ids"], outputs)]

decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

for i, prompt in enumerate(prompts):
print(f"Prompt: {prompt}")
print(f"Generated: {decoded_outputs[i]}")
print("-" * 50)
return decoded_outputs[0]
File renamed without changes.
1 change: 1 addition & 0 deletions test/test_hpu/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch
from _test_helpers import is_pytest_mode_compile, is_pytest_mode_lazy

from auto_round.utils import is_hpu_supported
from auto_round.utils import is_hpex_available


def run_opt_125m_on_hpu():
Expand All @@ -28,13 +28,13 @@ def run_opt_125m_on_hpu():
assert q_model is not None, "Expected q_model to be not None"


@pytest.mark.skipif(not is_hpu_supported(), reason="HPU is not supported")
@pytest.mark.skipif(not is_hpex_available(), reason="HPU is not supported")
@pytest.mark.skipif(not is_pytest_mode_lazy(), reason="Only for lazy mode")
def test_opt_125m_lazy_mode():
run_opt_125m_on_hpu()


@pytest.mark.skipif(not is_hpu_supported(), reason="HPU is not supported")
@pytest.mark.skipif(not is_hpex_available(), reason="HPU is not supported")
@pytest.mark.skipif(not is_pytest_mode_compile(), reason="Only for compile mode")
def test_opt_125m_compile_mode():
torch._dynamo.reset()
Expand Down
6 changes: 3 additions & 3 deletions test/test_cpu/test_hpu.py → test/test_hpu/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __iter__(self):
yield torch.ones([1, 10], dtype=torch.long)


def is_hpu_supported():
def is_hpex_available():
try:
import habana_frameworks.torch.core as htcore # pylint: disable=E0401
except ImportError as e:
Expand All @@ -40,7 +40,7 @@ def tearDownClass(self):
shutil.rmtree("runs", ignore_errors=True)

def test_autogptq_format_hpu_inference(self):
if not is_hpu_supported():
if not is_hpex_available():
return
try:
import auto_gptq
Expand Down Expand Up @@ -73,7 +73,7 @@ def test_autogptq_format_hpu_inference(self):
shutil.rmtree("./saved", ignore_errors=True)

def test_autoround_format_hpu_inference(self):
if not is_hpu_supported():
if not is_hpex_available():
return
bits, group_size, sym = 4, 128, False
autoround = AutoRound(
Expand Down
Loading