Skip to content

Commit 3470a5c

Browse files
authored
ggml-alloc : make gallocr prefer chunks that allow memory reuse (#16788)
1 parent bd562fe commit 3470a5c

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

ggml/src/ggml-alloc.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,16 +226,23 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
226226
}
227227

228228
if (best_fit_block == -1) {
229-
// no suitable block found, try the last block (this will grow a chunks size)
229+
// no suitable block found, try the last block (this may grow a chunks size)
230+
int64_t best_reuse = INT64_MIN;
230231
for (int c = 0; c < alloc->n_chunks; ++c) {
231232
struct tallocr_chunk * chunk = alloc->chunks[c];
232233
if (chunk->n_free_blocks > 0) {
233234
struct free_block * block = &chunk->free_blocks[chunk->n_free_blocks - 1];
234235
max_avail = MAX(max_avail, block->size);
235-
if (block->size >= size) {
236+
int64_t reuse_factor = chunk->max_size - block->offset - size;
237+
// reuse_factor < 0 : amount of extra memory that needs to be allocated
238+
// reuse_factor = 0 : allocated free space exactly matches tensor size
239+
// reuse_factor > 0 : superfluous memory that will remain unused
240+
bool better_reuse = best_reuse < 0 && reuse_factor > best_reuse;
241+
bool better_fit = reuse_factor >= 0 && reuse_factor < best_reuse;
242+
if (block->size >= size && (better_reuse || better_fit)) {
236243
best_fit_chunk = c;
237244
best_fit_block = chunk->n_free_blocks - 1;
238-
break;
245+
best_reuse = reuse_factor;
239246
}
240247
}
241248
}
@@ -268,7 +275,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
268275
#ifdef GGML_ALLOCATOR_DEBUG
269276
add_allocated_tensor(alloc, addr, tensor);
270277
size_t cur_max = addr.offset + size;
271-
if (cur_max > alloc->max_size[addr.chunk]) {
278+
if (cur_max > chunk->max_size) {
272279
// sort allocated_tensors by chunk/offset
273280
for (int i = 0; i < 1024; i++) {
274281
for (int j = i + 1; j < 1024; j++) {

0 commit comments

Comments
 (0)