Skip to content

llama : check that all the tensor data is in the model file #6885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 25, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2999,9 +2999,13 @@ struct llama_model_loader {

ggml_tensor * tensor;

llama_tensor_weight(uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);

if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name));
}
}
};
std::vector<llama_tensor_weight> weights;
Expand Down Expand Up @@ -3040,15 +3044,15 @@ struct llama_model_loader {
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
llm_kv = LLM_KV(llm_arch_from_string(arch_name));

files.emplace_back(new llama_file(fname.c_str(), "rb"));
contexts.emplace_back(ctx);

// Save tensors data offset of the main file.
// For subsidiary files, `meta` tensor data offset must not be used,
// so we build a unified tensors index for weights.
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
weights.emplace_back(0, cur->name, meta, cur);
weights.emplace_back(files.back().get(), 0, cur->name, meta, cur);
}
files.emplace_back(new llama_file(fname.c_str(), "rb"));
contexts.emplace_back(ctx);

uint16_t n_split = 0;
get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);

Expand Down Expand Up @@ -3082,12 +3086,13 @@ struct llama_model_loader {
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
}

files.emplace_back(new llama_file(split_path, "rb"));
contexts.emplace_back(ctx);

// Save tensors data offset info of the shard.
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
weights.emplace_back(idx, cur->name, ctx_gguf, cur);
weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur);
}
files.emplace_back(new llama_file(split_path, "rb"));
contexts.emplace_back(ctx);

gguf_free(ctx_gguf);
}
Expand Down