File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed
axlearn/experiments/text/gpt Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change 40
40
```
41
41
"""
42
42
43
-
44
43
from axlearn .common .config import InstantiableConfig , config_for_class , config_for_function
45
44
from axlearn .common .input_lm import lm_text_preprocessor
46
45
from axlearn .common .utils import get_data_dir
47
46
from axlearn .experiments .text .common import DataMixtureComponent , vocab
48
47
from axlearn .experiments .text .gpt import fuji , gspmd
49
48
from axlearn .experiments .text .gpt .common import mixture_train_input_source , tfds_input
50
- from axlearn .experiments .text .gpt .vocabulary_fuji_v3 import FujiV3Vocabulary
51
49
from axlearn .experiments .trainer_config_utils import TrainerConfigFn
52
50
53
51
@@ -59,6 +57,10 @@ def _vocab_cfg(vocab_size: int):
59
57
if vocab_size == 128 * 1024 :
60
58
return config_for_function (vocab ).set (sentencepiece_model_name = "bpe_128k_c4.model" )
61
59
if vocab_size == 128256 :
60
+ # Avoid a global tokenizers dependency.
61
+ # pylint: disable-next=import-outside-toplevel
62
+ from axlearn .experiments .text .gpt .vocabulary_fuji_v3 import FujiV3Vocabulary
63
+
62
64
# TikToken.
63
65
return config_for_class (FujiV3Vocabulary ).set (filename = "Llama-3-tokenizer.json" )
64
66
raise ValueError (f"Tokenizer with vocab size { vocab_size } does not exist." )
You can’t perform that action at this time.
0 commit comments