Skip to content

Commit e08d643

Browse files
committed
refactor code so that only free() is used
1 parent 7305f9d commit e08d643

File tree

6 files changed

+272
-199
lines changed

6 files changed

+272
-199
lines changed

vllm/core/block/block_table.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ def ids(self) -> List[int]:
5656
return self._block_ids
5757

5858

59+
def append_token_ids_and_update_allocator(
60+
block: Block, token_ids: List[int],
61+
allocator: DeviceAwareBlockAllocator) -> Block:
62+
new_block = allocator.cow_block_if_not_appendable(block)
63+
if new_block:
64+
block = new_block
65+
66+
block.append_token_ids(token_ids)
67+
68+
immutable_block = allocator.promote_to_immutable_block(block)
69+
if immutable_block:
70+
block = immutable_block
71+
72+
return block
73+
74+
5975
class BlockTable:
6076
"""A class to manage blocks for a specific sequence.
6177
@@ -193,11 +209,14 @@ def append_token_ids(self,
193209
num_lookahead_slots)
194210

195211
# Update the blocks with the new tokens
196-
blocks = self.blocks[self._num_full_slots // self._block_size:]
212+
first_block_idx = self._num_full_slots // self._block_size
197213
token_blocks = self._chunk_token_blocks_for_append(token_ids)
198214

199-
for block, token_block in zip(blocks, token_blocks):
200-
block.append_token_ids(token_block)
215+
for i, token_block in enumerate(token_blocks):
216+
cur_block_idx = first_block_idx + i
217+
self._blocks[
218+
cur_block_idx] = append_token_ids_and_update_allocator(
219+
self._blocks[cur_block_idx], token_block, self._allocator)
201220

202221
self._num_full_slots += len(token_ids)
203222

@@ -328,7 +347,11 @@ def _allocate_blocks_for_token_ids(self, prev_block: Optional[Block],
328347

329348
block = self._allocator.allocate_mutable_block(
330349
prev_block=prev_block, device=device)
350+
351+
# Note that no copy-on-write or immutable promotion can happen
352+
# here since this block is fresh and not full
331353
block.append_token_ids(cur_token_ids)
354+
332355
blocks.append(block)
333356

334357
return blocks

vllm/core/block/common.py

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,22 @@ def __init__(
114114
self._refcounter = refcounter
115115
self._allocator = allocator
116116

117-
def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
117+
def is_appendable(self, block: Block) -> bool:
118+
block_id = block.block_id
119+
if block_id is None:
120+
return True
121+
122+
refcount = self._refcounter.get(block_id)
123+
return refcount <= 1
124+
125+
def cow_block_if_not_appendable(self, block: Block) -> Optional[Block]:
118126
"""Performs a copy-on-write operation on the given block if it is not
119127
appendable.
120128
121129
This method checks the reference count of the given block. If the
122130
reference count is greater than 1, indicating that the block is shared,
123131
a copy-on-write operation is performed. The original block is freed,
124-
and a new block is allocated with the same content. The new block index
125-
is returned.
132+
and a new block is allocated with the same content.
126133
127134
Args:
128135
block (Block): The block to check for copy-on-write.
@@ -132,30 +139,31 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
132139
-write operation was performed, or the original block index if
133140
no copy-on-write was necessary.
134141
"""
135-
block_id = block.block_id
136-
if block_id is None:
137-
return block_id
142+
if self.is_appendable(block):
143+
return None
138144

139-
refcount = self._refcounter.get(block_id)
140-
assert refcount != 0
141-
if refcount > 1:
142-
src_block_id = block_id
145+
# Get data from old block
146+
prev_block = block.prev_block
147+
token_ids = block.token_ids
148+
old_block_id = block.block_id
149+
150+
# Mark the block as free and decrement its refcount
151+
self._allocator.free(block)
143152

144-
# Decrement refcount of the old physical block. Note that
145-
# we do not free the actual block object here since it is
146-
# going to reused by the caller.
147-
self._allocator.free_block_id(block)
153+
# Allocate a new block
154+
new_block = self._allocator.allocate_mutable_block(
155+
prev_block=prev_block)
156+
# Copy the tokens to the new block
157+
new_block.append_token_ids(token_ids)
148158

149-
# Allocate a fresh new block.
150-
block_id = self._allocator.allocate_mutable_block(
151-
prev_block=block.prev_block).block_id
159+
new_block_id = new_block.block_id
152160

153-
# Track src/dst copy.
154-
assert src_block_id is not None
155-
assert block_id is not None
156-
self._copy_on_writes.append((src_block_id, block_id))
161+
# Track src/dst copy.
162+
assert old_block_id is not None
163+
assert new_block_id is not None
164+
self._copy_on_writes.append((old_block_id, new_block_id))
157165

158-
return block_id
166+
return new_block
159167

160168
def clear_cows(self) -> List[Tuple[BlockId, BlockId]]:
161169
"""Clears the copy-on-write tracking information and returns the current

vllm/core/block/cpu_gpu_block_allocator.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -170,17 +170,6 @@ def allocate_immutable_block(self, prev_block: Optional[Block],
170170
return self._allocators[device].allocate_immutable_block(
171171
prev_block, token_ids)
172172

173-
def free_block_id(self, block: Block) -> None:
174-
"""Frees the underlying physical block_id of the given block object
175-
176-
Args:
177-
block (Block): The block for which to free the physical block id
178-
"""
179-
block_id = block.block_id
180-
assert block_id is not None
181-
allocator = self._block_ids_to_allocator[block_id]
182-
allocator.free_block_id(block)
183-
184173
def free(self, block: Block) -> None:
185174
"""Frees the memory occupied by the given block.
186175
@@ -333,11 +322,13 @@ def get_common_computed_block_ids(
333322
def all_block_ids(self) -> FrozenSet[int]:
334323
return frozenset(self._block_ids_to_allocator.keys())
335324

336-
def promote_to_immutable_block(self, block: Block) -> BlockId:
337-
raise NotImplementedError
325+
def promote_to_immutable_block(self, block: Block) -> Optional[Block]:
326+
device = Device.GPU
327+
return self._allocators[device].promote_to_immutable_block(block)
338328

339-
def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
340-
raise NotImplementedError
329+
def cow_block_if_not_appendable(self, block: Block) -> Optional[Block]:
330+
device = Device.GPU
331+
return self._allocators[device].cow_block_if_not_appendable(block)
341332

342333
def get_and_reset_swaps(self) -> List[Tuple[int, int]]:
343334
"""Returns and clears the mapping of source to destination block IDs.

vllm/core/block/interfaces.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
class Block(ABC):
1010

1111
@abstractmethod
12-
def append_token_ids(self, token_ids: Optional[List[int]]) -> None:
12+
def append_token_ids(self,
13+
token_ids: Optional[List[int]]) -> Optional["Block"]:
1314
pass
1415

1516
@property
@@ -116,10 +117,6 @@ def allocate_immutable_blocks(
116117
block_token_ids: List[List[int]]) -> List[Block]:
117118
pass
118119

119-
@abstractmethod
120-
def free_block_id(self, block: Block) -> None:
121-
pass
122-
123120
@abstractmethod
124121
def free(self, block: Block) -> None:
125122
pass
@@ -177,13 +174,16 @@ def get_common_computed_block_ids(
177174
pass
178175

179176
@abstractmethod
180-
def cow_block_if_not_appendable(self, block: Block) -> Optional["BlockId"]:
177+
def is_appendable(self, block: Block) -> bool:
181178
"""NOTE: This should not be used besides Block"""
182179
pass
183180

184181
@abstractmethod
185-
def promote_to_immutable_block(self, block: Block) -> BlockId:
186-
"""NOTE: This should not be used besides Block"""
182+
def cow_block_if_not_appendable(self, block: Block) -> Optional[Block]:
183+
pass
184+
185+
@abstractmethod
186+
def promote_to_immutable_block(self, block: Block) -> Optional[Block]:
187187
pass
188188

189189
@abstractmethod
@@ -283,3 +283,11 @@ def allocate_or_get_null_block(self) -> Block:
283283
There is at most one null block per allocator.
284284
"""
285285
pass
286+
287+
@abstractmethod
288+
def cow_block_if_not_appendable(self, block: Block) -> Optional[Block]:
289+
pass
290+
291+
@abstractmethod
292+
def promote_to_immutable_block(self, block: Block) -> Optional[Block]:
293+
pass

0 commit comments

Comments
 (0)