Skip to content

Commit 0ef1562

Browse files
authored
Merge pull request #7 from pytorch-labs/add_tiktoken
Add tiktoken tests
2 parents b3660e1 + 260a7e1 commit 0ef1562

File tree

6 files changed

+128087
-9
lines changed

6 files changed

+128087
-9
lines changed

.github/workflows/pull.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,7 @@ jobs:
2727
cmake --build build -j9 --config Debug
2828
2929
# Run unit tests
30-
RESOURCES_PATH=test/resources build/sentencepiece_test
30+
export RESOURCES_PATH=test/resources
31+
32+
build/sentencepiece_test
3133
build/tiktoken_test

.github/workflows/trunk.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
name: trunk
22

33
on:
4+
pull_request:
5+
tags:
6+
- ciflow/trunk/*
47
push:
58
branches:
69
- main
7-
tags:
8-
- ciflow/trunk/*
910
workflow_dispatch:
1011

1112
concurrency:
@@ -29,4 +30,7 @@ jobs:
2930
cmake --build build -j9 --config Debug
3031
3132
# Run unit tests
32-
RESOURCES_PATH=test/resources build/sentencepiece_test
33+
export RESOURCES_PATH=test/resources
34+
35+
build/sentencepiece_test
36+
build/tiktoken_test

CMakeLists.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ if(TOKENIZERS_BUILD_TEST)
6565
target_link_libraries(sentencepiece_test PUBLIC tokenizers gtest_main)
6666

6767
# tiktoken tests
68-
add_executable(tiktoken_test test/test_base64.cpp)
69-
target_include_directories(tiktoken_test PUBLIC include GTEST_INCLUDE_PATH)
70-
target_link_libraries(tiktoken_test PUBLIC gtest_main)
68+
add_executable(tiktoken_test test/test_base64.cpp test/test_tiktoken.cpp)
69+
target_include_directories(
70+
tiktoken_test PUBLIC third-party/re2 third-party/abseil-cpp include
71+
GTEST_INCLUDE_PATH)
72+
target_link_libraries(tiktoken_test PUBLIC tokenizers gtest_main)
7173
endif()

include/tiktoken.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ class Tiktoken : public Tokenizer {
3939
: _special_tokens(_get_default_special_tokens()),
4040
_bos_token_index(kBOSTokenIndex), _eos_token_index(kEOSTokenIndex){};
4141

42-
~Tiktoken() override;
43-
4442
Error load(const std::string &tokenizer_path) override;
4543

4644
Result<std::vector<uint64_t>> encode(const std::string &input, int8_t bos,

0 commit comments

Comments
 (0)