Skip to content

Commit

Permalink
convert : remove fsep token from GPTRefactForCausalLM (#8237)
Browse files Browse the repository at this point in the history
The <filename> token used by Refact doesn't serve
the same purpose as the <file_separator> from CodeGemma.

Signed-off-by: Jiri Podivin <jpodivin@redhat.com>
  • Loading branch information
jpodivin authored Jul 12, 2024
1 parent 71c1121 commit 5aefbce
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,11 +1203,10 @@ def set_vocab(self):

# TODO: how to determine special FIM tokens automatically?
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
special_token_types = ['prefix', 'suffix', 'middle', 'fsep', 'eot'])
special_token_types = ['prefix', 'suffix', 'middle', 'eot'])
special_vocab._set_special_token("prefix", 1)
special_vocab._set_special_token("suffix", 3)
special_vocab._set_special_token("middle", 2)
special_vocab._set_special_token("fsep", 4) # is this correct?
special_vocab.add_to_gguf(self.gguf_writer)

def set_gguf_parameters(self):
Expand Down

0 comments on commit 5aefbce

Please sign in to comment.