Skip to content

Commit 45272fd

Browse files
committed
fix xpu block num
1 parent 4066433 commit 45272fd

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

vllm/worker/cache_engine.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,12 @@ def allocate_xpu_cache(self) -> List[KVCache]:
104104
value_block_shape = self.get_value_block_shape()
105105
for _ in range(self.num_layers):
106106
key_blocks = torch.empty(
107-
size=(self.num_gpu_blocks, *key_block_shape),
107+
size=(self.num_xpu_blocks, *key_block_shape),
108108
dtype=self.dtype,
109109
device="xpu",
110110
)
111111
value_blocks = torch.empty(
112-
size=(self.num_gpu_blocks, *value_block_shape),
112+
size=(self.num_xpu_blocks, *value_block_shape),
113113
dtype=self.dtype,
114114
device="xpu",
115115
)
@@ -147,6 +147,7 @@ def _swap(
147147
dst: List[KVCache],
148148
src_to_dst: Dict[int, int],
149149
) -> None:
150+
print("swapping cache")
150151
with torch.cuda.stream(self.cache_stream):
151152
for i in range(self.num_layers):
152153
src_key_cache, src_value_cache = src[i]
@@ -166,6 +167,7 @@ def swap_out(self, src_to_dst: Dict[int, int]) -> None:
166167
self._swap(self.gpu_cache, self.cpu_cache, src_to_dst)
167168

168169
def copy(self, src_to_dsts: Dict[int, List[int]]) -> None:
170+
print("copy cache")
169171
key_caches = [key_cache for key_cache, _ in self.xpu_cache]
170172
value_caches = [value_cache for _, value_cache in self.xpu_cache]
171173
# NOTE(woosuk): This operation implicitly synchronizes the CPU and GPU.

0 commit comments

Comments
 (0)