Skip to content

Commit

Permalink
[tokenizer] Fixes tokenizer bug (#2843)
Browse files Browse the repository at this point in the history
Fixes #2840
  • Loading branch information
frankfliu committed Apr 26, 2024
1 parent 6839eb6 commit 425d2d9
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ public HuggingFaceTokenizer build() throws IOException {
return managed(HuggingFaceTokenizer.newInstance(vocab, merges, options));
}
throw new IOException("tokenizer.json file not found.");
} else if (Files.exists(tokenizerPath)) {
} else if (!Files.exists(tokenizerPath)) {
throw new IOException("Tokenizer file not exits: " + tokenizerPath);
}
return managed(HuggingFaceTokenizer.newInstance(tokenizerPath, options));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ public void testTokenizer() throws IOException {
Assert.assertEquals(encodings.length, 2);
Assert.assertEquals(encodings[0].getIds(), ids);
}

Assert.assertThrows(
() -> {
Path file = Paths.get("build/tokenizer/non-exists.json");
HuggingFaceTokenizer.builder().optTokenizerPath(file).build();
});
}

@Test
Expand Down

0 comments on commit 425d2d9

Please sign in to comment.