Skip to content

Commit aeba93b

Browse files
committed
Fix: Set clean_up_tokenization_spaces
1 parent 4d6516e commit aeba93b

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/transformers/tokenization_utils_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,8 +1073,8 @@ def __init__(self, **kwargs):
10731073

10741074
self.model_input_names = kwargs.pop("model_input_names", self.model_input_names)
10751075

1076-
# By default, clean up tokenization spaces for both fast and slow tokenizers
1077-
self.clean_up_tokenization_spaces = kwargs.pop("clean_up_tokenization_spaces", False)
1076+
# By default, cleaning tokenization spaces for both fast and slow tokenizers
1077+
self.clean_up_tokenization_spaces = kwargs.pop("clean_up_tokenization_spaces", True)
10781078

10791079
# By default, do not split special tokens for both fast and slow tokenizers
10801080
self.split_special_tokens = kwargs.pop("split_special_tokens", False)

0 commit comments

Comments
 (0)