Skip to content

Commit

Permalink
Improve handling of special tokens in GGML to GGUF converter (ggml-or…
Browse files Browse the repository at this point in the history
…g#2725)

* Improve UNK, BOS, EOS token handling when converting without metadata.

* Allow importing as a module.

* Remove some obsolete code and minor cleanups.

* Set default UNK token mapping from -1 to 0 in llama.cpp

* Try to handle overflow due to buggy Windows Python with a better error message
  • Loading branch information
KerfuffleV2 authored and akawrykow committed Aug 29, 2023
1 parent 46ef5b5 commit f4cbcce
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 14 deletions.
43 changes: 30 additions & 13 deletions convert-llama-ggmlv3-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import sys, struct, math, argparse
import sys, struct, math, argparse, warnings
from pathlib import Path

import numpy as np

import gguf

warnings.filterwarnings('error')

# Note: Does not support GGML_QKK_64
QK_K = 256
# Items here are (block size, type size)
Expand Down Expand Up @@ -215,25 +217,32 @@ def add_vocab(self, gguf_writer):
if self.vocab_override is not None:
vo = self.vocab_override
print('* Adding vocab item(s)')
for (idx, vitem) in enumerate(vo.all_tokens()):
if len(vitem) == 3:
tokens.append(vitem[0])
scores.append(vitem[1])
toktypes.append(vitem[2])
else:
# Maybe try to guess the token type here?
tokens.append(vitem[0])
scores.append(vitem[1])
for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
tokens.append(vbytes)
scores.append(score)
toktypes.append(ttype)
assert len(tokens) == hp.n_vocab, f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
gguf_writer.add_token_list(tokens)
gguf_writer.add_token_scores(scores)
if len(toktypes) > 0:
gguf_writer.add_token_types(toktypes)
return
print(f'* Adding {hp.n_vocab} vocab item(s)')
assert len(self.model.vocab.items) >= 3, 'Cannot handle unexpectedly short model vocab'
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
tt = 1 # Normal
if len(vbytes) == 0:
# Special handling for UNK, BOS, EOS tokens.
if tokid <= 2:
if tokid == 0:
vbytes = b'<unk>'
tt = 2
elif tokid == 1:
vbytes = b'<s>'
tt = 3
else:
vbytes = b'</s>'
tt = 3
elif len(vbytes) == 0:
tt = 3 # Control
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
vbytes = bytes(f'<0x{vbytes[0]:02X}>', encoding = 'UTF-8')
Expand All @@ -246,6 +255,9 @@ def add_vocab(self, gguf_writer):
gguf_writer.add_token_list(tokens)
gguf_writer.add_token_scores(scores)
gguf_writer.add_token_types(toktypes)
gguf_writer.add_unk_token_id(0)
gguf_writer.add_bos_token_id(1)
gguf_writer.add_eos_token_id(2)

def add_tensors(self, gguf_writer):
nm = self.name_map
Expand Down Expand Up @@ -315,7 +327,11 @@ def main():
data = np.memmap(cfg.input, mode = 'r')
model = GGMLV3Model()
print('* Scanning GGML input file')
offset = model.load(data, 0)
try:
offset = model.load(data, 0)
except OverflowError:
print(f'!!! Caught overflow loading tensors. The most likely issue is running on Windows but not in WSL. Try running in WSL if possible.', file = sys.stderr)
raise
print(f'* GGML model hyperparameters: {model.hyperparameters}')
vocab_override = None
params_override = None
Expand All @@ -330,4 +346,5 @@ def main():
converter.save()
print(f'* Successful completion. Output saved to: {cfg.output}')

main()
if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ struct llama_vocab {
// default LLaMA special tokens
id special_bos_id = 1;
id special_eos_id = 2;
id special_unk_id = -1;
id special_unk_id = 0;
id special_sep_id = -1;
id special_pad_id = -1;

Expand Down

0 comments on commit f4cbcce

Please sign in to comment.