Skip to content

Commit 09d1703

Browse files
refactor gguf load
1 parent 035f0d7 commit 09d1703

File tree

5 files changed

+55
-38
lines changed

5 files changed

+55
-38
lines changed

examples/mnist/mnist-common.cpp

Lines changed: 7 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -160,20 +160,19 @@ mnist_model mnist_model_init_from_file(const std::string & fname, const std::str
160160
mnist_model model(backend);
161161
fprintf(stderr, "%s: loading model weights from '%s'\n", __func__, fname.c_str());
162162

163-
struct gguf_context * ctx_be; // be == backend
164-
163+
struct gguf_context * ctx;
165164
{
166165
struct gguf_init_params params = {
167166
/*.no_alloc =*/ true,
168167
/*.ctx =*/ &model.ctx_weight,
169168
};
170-
ctx_be = gguf_init_from_file(fname.c_str(), params);
171-
if (!ctx_be) {
169+
ctx = gguf_init_from_file(fname.c_str(), params);
170+
if (!ctx) {
172171
fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
173172
exit(1);
174173
}
175174
}
176-
model.arch = gguf_get_val_str(ctx_be, gguf_find_key(ctx_be, "general.architecture"));
175+
model.arch = gguf_get_val_str(ctx, gguf_find_key(ctx, "general.architecture"));
177176
fprintf(stderr, "%s: model arch is %s\n", __func__, model.arch.c_str());
178177

179178
if (model.arch == "mnist-fc") {
@@ -247,40 +246,10 @@ mnist_model mnist_model_init_from_file(const std::string & fname, const std::str
247246
}
248247
model.buf_weightt = ggml_backend_alloc_ctx_tensors(model.ctx_weight, model.backend);
249248

250-
void * buf_tmp = malloc(model.size_weight);
251-
struct ggml_context * ctx_ggml_tmp;
252-
{
253-
struct ggml_init_params params = {
254-
/*.mem_size =*/ model.size_weight,
255-
/*.mem_buffer =*/ buf_tmp,
256-
/*.no_alloc =*/ false,
257-
};
258-
ctx_ggml_tmp = ggml_init(params);
249+
if(!ggml_backend_load_from_gguf(fname.c_str(), model.ctx_weight, ctx)) {
250+
fprintf(stderr, "%s: loading weights from %s failed\n", __func__, fname.c_str());
251+
exit(1);
259252
}
260-
struct gguf_context * ctx_gguf_tmp;
261-
{
262-
struct gguf_init_params params = {
263-
/*.no_alloc =*/ false,
264-
/*.ctx =*/ &ctx_ggml_tmp,
265-
};
266-
ctx_gguf_tmp = gguf_init_from_file(fname.c_str(), params);
267-
if (!ctx_gguf_tmp) {
268-
fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
269-
exit(1);
270-
}
271-
}
272-
for (const std::string & s : {"fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias"}) {
273-
const struct ggml_tensor * src = ggml_get_tensor(ctx_ggml_tmp, s.c_str());
274-
struct ggml_tensor * dst = ggml_get_tensor(model.ctx_weight, s.c_str());
275-
GGML_ASSERT(ggml_nbytes(src) == ggml_nbytes(dst));
276-
ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(dst));
277-
}
278-
279-
gguf_free(ctx_gguf_tmp);
280-
ggml_free(ctx_ggml_tmp);
281-
free(buf_tmp);
282-
283-
gguf_free(ctx_be);
284253

285254
fprintf(stderr, "%s: successfully loaded weights from %s\n", __func__, fname.c_str());
286255
return model;

include/ggml-alloc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph
6767
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
6868

6969
// Utils
70+
7071
// Create a buffer and allocate all the tensors in a ggml_context
7172
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
7273
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);

include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ extern "C" {
234234
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
235235
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
236236

237+
GGML_API bool ggml_backend_load_from_gguf(const char * fname, struct ggml_context * ctx_ggml, struct gguf_context * ctx_gguf);
237238

238239
#ifdef __cplusplus
239240
}

src/ggml-backend.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2267,3 +2267,48 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
22672267

22682268
return true;
22692269
}
2270+
2271+
bool ggml_backend_load_from_gguf(const char * fname, struct ggml_context * ctx_ggml, struct gguf_context * ctx_gguf) {
2272+
FILE * f = ggml_fopen(fname, "rb");
2273+
if (!f) {
2274+
return false;
2275+
}
2276+
2277+
const size_t buf_size = 4*1024*1024;
2278+
void * buf = malloc(buf_size);
2279+
2280+
const int n_tensors = gguf_get_n_tensors(ctx_gguf);
2281+
for (int i = 0; i < n_tensors; i++) {
2282+
const char * name = gguf_get_tensor_name(ctx_gguf, i);
2283+
2284+
struct ggml_tensor * tensor = ggml_get_tensor(ctx_ggml, name);
2285+
if (!tensor) {
2286+
return false;
2287+
}
2288+
2289+
const size_t offs = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i);
2290+
2291+
if (fseek(f, offs, SEEK_SET) != 0) {
2292+
fclose(f);
2293+
free(buf);
2294+
return false;
2295+
}
2296+
2297+
const size_t nbytes = ggml_nbytes(tensor);
2298+
for (size_t pos = 0; pos < nbytes; pos += buf_size) {
2299+
const size_t nbytes_cpy = MIN(buf_size, nbytes - pos);
2300+
2301+
if (fread(buf, 1, nbytes_cpy, f) != nbytes_cpy) {
2302+
fclose(f);
2303+
free(buf);
2304+
return false;
2305+
}
2306+
2307+
ggml_backend_tensor_set(tensor, buf, pos, nbytes_cpy);
2308+
}
2309+
}
2310+
2311+
fclose(f);
2312+
free(buf);
2313+
return true;
2314+
}

src/ggml-cuda/out-prod.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "out-prod.cuh"
12
#include "opt-step-adam.cuh"
23
#include "vendors/cuda.h"
34

0 commit comments

Comments
 (0)