@@ -1166,50 +1166,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
1166
1166
ctx->info [tensor_id].t .data = (void *)(uintptr_t )data; // double cast suppresses warning about casting away const
1167
1167
}
1168
1168
1169
- struct gguf_writer {
1170
- std::vector<int8_t > & buf;
1169
+ struct gguf_writer_base {
1170
+ size_t written_bytes {0u };
1171
+
1172
+ ~gguf_writer_base (void ) {}
1171
1173
1172
- gguf_writer (std::vector<int8_t > & buf) : buf(buf) {}
1174
+ // we bet on devirtualization
1175
+ virtual void write (int8_t val) = 0;
1176
+ virtual void write (const std::vector<int8_t > & val) = 0;
1177
+ virtual void write_tensor_data (const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;
1173
1178
1174
1179
template <typename T>
1175
- void write (const T & val) const {
1180
+ void write (const T & val) {
1176
1181
for (size_t i = 0 ; i < sizeof (val); ++i) {
1177
- buf. push_back (reinterpret_cast <const int8_t *>(&val)[i]);
1182
+ write (reinterpret_cast <const int8_t *>(&val)[i]);
1178
1183
}
1179
1184
}
1180
1185
1181
- void write (const std::vector<int8_t > & val) const {
1182
- buf.insert (buf.end (), val.begin (), val.end ());
1183
- }
1184
-
1185
- void write (const bool & val) const {
1186
+ void write (const bool & val) {
1186
1187
const int8_t val8 = val ? 1 : 0 ;
1187
1188
write (val8);
1188
1189
}
1189
1190
1190
- void write (const std::string & val) const {
1191
+ void write (const std::string & val) {
1191
1192
{
1192
1193
const uint64_t n = val.length ();
1193
1194
write (n);
1194
1195
}
1195
1196
for (size_t i = 0 ; i < val.length (); ++i) {
1196
- buf. push_back ( reinterpret_cast < const int8_t *> (val.data ())[i]);
1197
+ write ( (val.data ())[i]);
1197
1198
}
1198
1199
}
1199
1200
1200
- void write (const char * val) const {
1201
+ void write (const char * val) {
1201
1202
write (std::string (val));
1202
1203
}
1203
1204
1204
- void write (const enum ggml_type & val) const {
1205
+ void write (const enum ggml_type & val) {
1205
1206
write (int32_t (val));
1206
1207
}
1207
1208
1208
- void write (const enum gguf_type & val) const {
1209
+ void write (const enum gguf_type & val) {
1209
1210
write (int32_t (val));
1210
1211
}
1211
1212
1212
- void write (const struct gguf_kv & kv) const {
1213
+ void write (const struct gguf_kv & kv) {
1213
1214
const uint64_t ne = kv.get_ne ();
1214
1215
1215
1216
write (kv.get_key ());
@@ -1250,7 +1251,7 @@ struct gguf_writer {
1250
1251
}
1251
1252
}
1252
1253
1253
- void write_tensor_meta (const struct gguf_tensor_info & info) const {
1254
+ void write_tensor_meta (const struct gguf_tensor_info & info) {
1254
1255
write (info.t .name );
1255
1256
1256
1257
const uint32_t n_dims = ggml_n_dims (&info.t );
@@ -1263,14 +1264,33 @@ struct gguf_writer {
1263
1264
write (info.offset );
1264
1265
}
1265
1266
1266
- void pad (const size_t alignment) const {
1267
- while (buf. size () % alignment != 0 ) {
1267
+ void pad (const size_t alignment) {
1268
+ while (written_bytes % alignment != 0 ) {
1268
1269
const int8_t zero = 0 ;
1269
1270
write (zero);
1270
1271
}
1271
1272
}
1273
+ };
1274
+
1275
+ // vector buffer based writer
1276
+ struct gguf_writer_buf final : public gguf_writer_base {
1277
+ std::vector<int8_t > & buf;
1278
+
1279
+ gguf_writer_buf (std::vector<int8_t > & buf) : buf(buf) {}
1280
+
1281
+ using gguf_writer_base::write;
1282
+
1283
+ void write (const int8_t val) override {
1284
+ buf.push_back (val);
1285
+ written_bytes++;
1286
+ }
1272
1287
1273
- void write_tensor_data (const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const {
1288
+ void write (const std::vector<int8_t > & val) override {
1289
+ buf.insert (buf.end (), val.begin (), val.end ());
1290
+ written_bytes += val.size ();
1291
+ }
1292
+
1293
+ void write_tensor_data (const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1274
1294
GGML_ASSERT (buf.size () - offset_data == info.offset );
1275
1295
1276
1296
GGML_ASSERT (ggml_is_contiguous (&info.t ));
@@ -1284,14 +1304,58 @@ struct gguf_writer {
1284
1304
GGML_ASSERT (info.t .data );
1285
1305
memcpy (buf.data () + offset, info.t .data , nbytes);
1286
1306
}
1307
+ written_bytes += nbytes;
1287
1308
1288
1309
pad (alignment);
1289
1310
}
1290
1311
};
1291
1312
1292
- void gguf_write_to_buf (const struct gguf_context * ctx, std::vector<int8_t > & buf, bool only_meta) {
1293
- const struct gguf_writer gw (buf);
1313
+ // file based writer
1314
+ struct gguf_writer_file final : public gguf_writer_base {
1315
+ FILE * file;
1316
+
1317
+ gguf_writer_file (FILE* file) : file(file) {}
1318
+
1319
+ using gguf_writer_base::write;
1320
+
1321
+ void write (const int8_t val) override {
1322
+ const auto real_val = static_cast <uint8_t >(val);
1323
+ const auto ret = fputc (real_val, file);
1324
+ written_bytes++;
1325
+ if (ret != real_val) {
1326
+ throw std::runtime_error (" unexpected fputc result '" + std::to_string (ret) + " ' instead of '" + std::to_string ((int )real_val) + " '" );
1327
+ }
1328
+ }
1329
+
1330
+ void write (const std::vector<int8_t > & val) override {
1331
+ const auto ret = fwrite (val.data (), 1 , val.size (), file);
1332
+ written_bytes += val.size ();
1333
+ if (ret != val.size ()) {
1334
+ throw std::runtime_error (" unexpected fwrite number of bytes written, '" + std::to_string (ret) + " ' instead of '" + std::to_string (val.size ()) + " '" );
1335
+ }
1336
+ }
1337
+
1338
+ void write_tensor_data (const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1339
+ GGML_ASSERT (written_bytes - offset_data == info.offset );
1340
+
1341
+ GGML_ASSERT (ggml_is_contiguous (&info.t ));
1342
+ const size_t nbytes = ggml_nbytes (&info.t );
1294
1343
1344
+ std::vector<int8_t > buf (nbytes);
1345
+ if (info.t .buffer ) {
1346
+ ggml_backend_tensor_get (&info.t , buf.data (), 0 , nbytes);
1347
+ } else {
1348
+ GGML_ASSERT (info.t .data );
1349
+ memcpy (buf.data (), info.t .data , nbytes);
1350
+ }
1351
+ write (buf);
1352
+
1353
+ pad (alignment);
1354
+ }
1355
+ };
1356
+
1357
+ template <typename writer_t >
1358
+ static void gguf_write_out (const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
1295
1359
const int64_t n_kv = gguf_get_n_kv (ctx);
1296
1360
const int64_t n_tensors = gguf_get_n_tensors (ctx);
1297
1361
@@ -1321,14 +1385,19 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
1321
1385
return ;
1322
1386
}
1323
1387
1324
- const size_t offset_data = gw.buf . size () ;
1388
+ const size_t offset_data = gw.written_bytes ;
1325
1389
1326
1390
// write tensor data
1327
1391
for (int64_t i = 0 ; i < n_tensors; ++i) {
1328
1392
gw.write_tensor_data (ctx->info [i], offset_data, ctx->alignment );
1329
1393
}
1330
1394
}
1331
1395
1396
+ void gguf_write_to_buf (const struct gguf_context * ctx, std::vector<int8_t > & buf, bool only_meta) {
1397
+ gguf_writer_buf gw (buf);
1398
+ gguf_write_out (ctx, gw, only_meta);
1399
+ }
1400
+
1332
1401
bool gguf_write_to_file (const struct gguf_context * ctx, const char * fname, bool only_meta) {
1333
1402
FILE * file = ggml_fopen (fname, " wb" );
1334
1403
@@ -1337,11 +1406,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
1337
1406
return false ;
1338
1407
}
1339
1408
1340
- std::vector<int8_t > buf;
1341
- gguf_write_to_buf (ctx, buf, only_meta);
1342
- const bool ok = fwrite (buf.data (), 1 , buf.size (), file) == buf.size ();
1409
+ try {
1410
+ gguf_writer_file gw (file);
1411
+ gguf_write_out (ctx, gw, only_meta);
1412
+ } catch (const std::runtime_error& ex) {
1413
+ GGML_LOG_ERROR (" %s: failed to write GGUF data into '%s': %s\n " , __func__, fname, ex.what ());
1414
+ fclose (file);
1415
+ return false ;
1416
+ }
1417
+
1343
1418
fclose (file);
1344
- return ok ;
1419
+ return true ;
1345
1420
}
1346
1421
1347
1422
size_t gguf_get_meta_size (const struct gguf_context * ctx) {
0 commit comments