diff --git a/src/transformers/models/donut/processing_donut.py b/src/transformers/models/donut/processing_donut.py index 5636ecb9435cf3..1f03fd6306fc0a 100644 --- a/src/transformers/models/donut/processing_donut.py +++ b/src/transformers/models/donut/processing_donut.py @@ -149,7 +149,9 @@ def token2json(self, tokens, is_inner_value=False, added_vocab=None): end_token = end_token.group() start_token_escaped = re.escape(start_token) end_token_escaped = re.escape(end_token) - content = re.search(f"{start_token_escaped}(.*?){end_token_escaped}", tokens, re.IGNORECASE) + content = re.search( + f"{start_token_escaped}(.*?){end_token_escaped}", tokens, re.IGNORECASE | re.DOTALL + ) if content is not None: content = content.group(1).strip() if r"GA30301123-4567" "Johnny" "JD" + "text\nwith\nnewlines" + "" ) actual_json = self.processor.token2json(sequence)