Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion tests/unit_tests/worker/test_hpu_input_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from vllm.v1.pool.metadata import PoolingMetadata
from vllm.v1.sample.logits_processor import LogitsProcessors
from vllm.v1.sample.metadata import SamplingMetadata
from vllm.v1.utils import CpuGpuBuffer
from vllm.v1.worker.block_table import BlockTable, MultiGroupBlockTable
from vllm_gaudi.v1.worker.hpu_input_batch import InputBatch, CachedRequestState

Expand All @@ -37,7 +38,7 @@ def _compare_objs(obj1, obj2, skip: Sequence = ("logitsprocs", "batch_update_bui

is_same = False
if isinstance(a, torch.Tensor):
if (a.numel() == 0 or b.numel() == 0):
if a.numel() == 0 or b.numel() == 0:
is_same = (a.numel() == 0 and b.numel() == 0)
elif torch.allclose(a, b):
is_same = True
Expand All @@ -53,6 +54,8 @@ def _compare_objs(obj1, obj2, skip: Sequence = ("logitsprocs", "batch_update_bui
is_same = True # if we make it here must be same
elif a == b:
is_same = True
elif isinstance(a, CpuGpuBuffer):
is_same = np.allclose(a.np, b.np) and torch.allclose(a.gpu, b.gpu)
assert is_same, f"Attribute {attr_name} is different"\
f" in {obj1} and {obj2}: {a} != {b}"

Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/worker/test_hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def _is_req_state_block_table_match(model_runner, req_id: str) -> bool:
if block_table.num_blocks_per_row[req_index] != len(req_state.block_ids[0]):
return False
num_blocks = block_table.num_blocks_per_row[req_index]
return (block_table.block_table_np[req_index, :num_blocks] == req_state.block_ids[0]).all()
return (block_table.block_table.np[req_index, :num_blocks] == req_state.block_ids[0]).all()


def test_update_states_new_request(model_runner, dist_init):
Expand Down
6 changes: 2 additions & 4 deletions vllm_gaudi/v1/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
from vllm.multimodal.inputs import PlaceholderRange
from vllm.sampling_params import SamplingType
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, LayerBlockType, cdiv, is_pin_memory_available, LazyLoader)
from vllm_gaudi.utils import (HPUCompileConfig, is_fake_hpu, async_h2d_copy)
from vllm_gaudi.v1.attention.backends.hpu_attn import HPUAttentionMetadataV1
Expand Down Expand Up @@ -728,9 +728,7 @@ def __init__(
logger.info("Bucketing is OFF.")
self._PAD_SLOT_ID = -1
self._PAD_BLOCK_ID = -1
self._tokenizer = init_tokenizer_from_configs(model_config=vllm_config.model_config,
scheduler_config=vllm_config.scheduler_config,
lora_config=vllm_config.lora_config).tokenizer
self._tokenizer = init_tokenizer_from_configs(model_config=vllm_config.model_config)

# TODO(madamczyk-intel): add a knob for that
# TODO(madamczyk-intel): debug why increasing it lowers acc
Expand Down