Skip to content

Fix memory allocation for base-patch16 #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,20 @@

// utility function for a workaround until https://github.com/ggerganov/ggml/issues/260 is resolved
// after that, remove this and use the mechanism implemented in GGML directly
size_t get_mem_req_by_size(size_t n_tensors)
size_t get_mem_req_by_size(const size_t n_tensors, const int n_image_positions)
{
size_t mb = 1024 * 1024;
switch (n_tensors)
{
case 397: // base
return 8 * mb;
if (n_image_positions == 50)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why use == here and <= down below.
also why different param name?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We call this function only once in clip_model_load and allocate that memory based the number of positions in the vision model. The scratch buffer, however, is allocated separately in clip_image_encode and clip_text_encode. n_image_positions is fixed for a gien model, but n_positions might be different in the case of the text model. So if I used == there, the condition would not return true for shorter texts.

{
return 8 * mb;
}
else
{
return 16 * mb;
}
case 589:
return 16 * mb;
case 909:
Expand All @@ -32,13 +39,20 @@ size_t get_mem_req_by_size(size_t n_tensors)
}
}

size_t get_scr_buf_req_by_size(size_t n_tensors)
size_t get_scr_buf_req_by_size(const size_t n_tensors, const int n_positions)
{
size_t mb = 1024 * 1024;
switch (n_tensors)
{
case 397: // base
return 16 * mb;
if (n_positions <= 50)
{
return 16 * mb;
}
else
{
return 64 * mb;
}
case 589:
return 64 * mb;
case 909:
Expand Down Expand Up @@ -758,8 +772,9 @@ struct clip_ctx *clip_model_load(const char *fname, const int verbosity = 1)
// TODO: We currently get the size of memory requirement from the pre-computed information
// based on the model variant, indicated by the number of tensors.
// Rewrite this logic when GGML implements a mechanism to predict the required memory.
size_t n_tensors = new_clip->text_model.tensors.size() + new_clip->vision_model.tensors.size();
size_t mem_req = get_mem_req_by_size(n_tensors);
const size_t n_tensors = new_clip->text_model.tensors.size() + new_clip->vision_model.tensors.size();
const int n_image_positions = (vision_model.hparams.image_size / vision_model.hparams.patch_size) * (vision_model.hparams.image_size / vision_model.hparams.patch_size) + 1;
size_t mem_req = get_mem_req_by_size(n_tensors, n_image_positions);
new_clip->buf_compute.resize(mem_req);

if (verbosity >= 2)
Expand Down Expand Up @@ -813,7 +828,7 @@ bool clip_text_encode(
struct ggml_cgraph gf = {};
gf.n_threads = n_threads;

static size_t scr0_size = get_scr_buf_req_by_size(ctx->text_model.tensors.size() + ctx->vision_model.tensors.size());
static size_t scr0_size = get_scr_buf_req_by_size(ctx->text_model.tensors.size() + ctx->vision_model.tensors.size(), N);
static void *scr0 = malloc(scr0_size);

struct ggml_tensor *input_ids = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
Expand Down Expand Up @@ -1056,7 +1071,7 @@ bool clip_image_encode(
struct ggml_cgraph gf = {};
gf.n_threads = n_threads;

static size_t scr0_size = get_scr_buf_req_by_size(ctx->text_model.tensors.size() + ctx->vision_model.tensors.size());
static size_t scr0_size = get_scr_buf_req_by_size(ctx->text_model.tensors.size() + ctx->vision_model.tensors.size(), num_positions);
static void *scr0 = malloc(scr0_size);

struct ggml_tensor *inp = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, image_size, image_size, 3, 1);
Expand Down