Skip to content

Commit

Permalink
Simplify tokenizer header (#4283)
Browse files Browse the repository at this point in the history
Summary:
The dependency and includes for tokenizer are unnecessarily complicated. This PR deletes some unused and updates some to use/depend on what it really uses.

Pull Request resolved: #4283

Test Plan:
- test/run_oss_cpp_tests.sh examples/models/llama2/tokenizer/test
- test/run_oss_cpp_tests.sh extension/llm/tokenizer/test

Reviewed By: larryliu0820

Differential Revision: D59837081

Pulled By: helunwencser

fbshipit-source-id: 4c69f01a868a52207725ae39f65fa62ead7b6749
  • Loading branch information
helunwencser authored and facebook-github-bot committed Jul 22, 2024
1 parent 6dbb4dc commit 844a69f
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 24 deletions.
2 changes: 1 addition & 1 deletion extension/llm/tokenizer/bpe_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>

#include <string>
#include <cstring>

namespace torch {
namespace executor {
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/tokenizer/bpe_tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#pragma once

#include <executorch/extension/llm/tokenizer/tokenizer.h>
#include <cstdint>
#include <memory>

namespace torch {
namespace executor {
Expand Down
6 changes: 2 additions & 4 deletions extension/llm/tokenizer/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def define_common_targets():
"bpe_tokenizer.h",
],
exported_deps = [
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
"//executorch/runtime/core:core",
],
visibility = [
"@EXECUTORCH_CLIENTS",
Expand All @@ -67,8 +66,7 @@ def define_common_targets():
"base64.h",
],
exported_deps = [
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
"//executorch/runtime/core:core",
],
visibility = [
"@EXECUTORCH_CLIENTS",
Expand Down
8 changes: 0 additions & 8 deletions extension/llm/tokenizer/tiktoken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,8 @@

#include <executorch/extension/llm/tokenizer/base64.h>
#include <executorch/extension/llm/tokenizer/tiktoken.h>
#include <cctype>
#include <cstdint>
#include <fstream>
#include <functional>
#include <limits>
#include <memory>
#include <regex>
#include <string>
#include <unordered_set>
#include <vector>

namespace torch {
namespace executor {
Expand Down
4 changes: 1 addition & 3 deletions extension/llm/tokenizer/tiktoken.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@

#include <executorch/extension/llm/tokenizer/tokenizer.h>
#include <re2/re2.h>
#include <cstdint>
#include <functional>
#include <memory>
#include <optional>
#include <regex>
#include <unordered_map>

namespace torch {
Expand Down
8 changes: 1 addition & 7 deletions extension/llm/tokenizer/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,11 @@
// model won't work with this class, it needs to go through tokenizer.py first.
#pragma once

#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <cinttypes>
#include <string>
#include <vector>

#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
#include <executorch/runtime/core/result.h>

namespace torch {
Expand Down

0 comments on commit 844a69f

Please sign in to comment.