Fix: Set clean_up_tokenization_spaces

Aznix07 · Aznix07 · commit aeba93b2985e · 2025-12-16T18:43:12.000+05:30
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
@@ -1073,8 +1073,8 @@ def __init__(self, **kwargs):
 
         self.model_input_names = kwargs.pop("model_input_names", self.model_input_names)
 
-        # By default, clean up tokenization spaces for both fast and slow tokenizers
-        self.clean_up_tokenization_spaces = kwargs.pop("clean_up_tokenization_spaces", False)
+        # By default, cleaning tokenization spaces for both fast and slow tokenizers
+        self.clean_up_tokenization_spaces = kwargs.pop("clean_up_tokenization_spaces", True)
 
         # By default, do not split special tokens for both fast and slow tokenizers
         self.split_special_tokens = kwargs.pop("split_special_tokens", False)