Avoid a top-level import of tokenizers. (apple#935)

Mark Lee · web-flow · commit 9b75ef166d63 · 2025-01-19T15:43:19.000Z
diff --git a/axlearn/experiments/text/gpt/c4_trainer.py b/axlearn/experiments/text/gpt/c4_trainer.py
@@ -40,7 +40,6 @@
 ```
 """
 
-
 from axlearn.common.config import InstantiableConfig, config_for_class, config_for_function
 from axlearn.common.input_lm import lm_text_preprocessor
 from axlearn.common.utils import get_data_dir
diff --git a/axlearn/experiments/text/gpt/vocabulary_fuji_v3.py b/axlearn/experiments/text/gpt/vocabulary_fuji_v3.py
@@ -9,7 +9,6 @@
 import jax
 import numpy as np
 import tensorflow.compat.v2 as tf
-from tokenizers import Tokenizer
 
 import axlearn.common.file_system as fs
 from axlearn.common.utils import get_data_dir
@@ -93,6 +92,10 @@ class FujiV3Vocabulary:
     """
 
     def __init__(self, filename: str):
+        # Only require tokenizers if instantiating.
+        # pylint: disable-next=import-outside-toplevel
+        from tokenizers import Tokenizer
+
         data_dir = get_data_dir()
         data_dir = (
             os.path.join(os.path.dirname(__file__), "..", "..", "..", "data")