refactor restore to be iterative

podcastle-studio · Apr 4, 2022 · 4d8ce67 · 4d8ce67
1 parent 2b11dd2
commit 4d8ce67
Showing 1 changed file with 59 additions and 60 deletions.
diff --git a/phonemizer/punctuation.py b/phonemizer/punctuation.py
@@ -151,63 +151,62 @@ def restore(cls, text: Union[str, List[str]],
             ['hello', 'my world'], [',', '!'] -> ['hello, my world!']
 
         """
-        return cls._restore_aux(str2list(text), marks, 0, sep, strip)
-
-    @classmethod
-    def _restore_current(cls,
-                         current: _MarkIndex,
-                         text: List[str],
-                         marks: List[_MarkIndex],
-                         num: int,
-                         sep: Separator,
-                         strip: bool) -> List[str]:
-        """Auxiliary method for Punctuation._restore_aux()"""
-
-        # remove the word last separator from the current word
-        if sep.word and text[0].endswith(sep.word):
-            text[0] = text[0][:-len(sep.word)]
-        # replace internal spaces in the current mark with the word separator
-        mark = re.sub(r' ', sep.word, current.mark)
-
-        if current.position == 'B':
-            return cls._restore_aux(
-                [mark + text[0]] + text[1:], marks[1:], num, sep, strip)
-
-        if current.position == 'E':
-            return ([text[0] + mark + ('' if strip else sep.word)] +
-                    cls._restore_aux(text[1:], marks[1:], num + 1, sep, strip))
-
-        if current.position == 'A':
-            return [mark] + cls._restore_aux(text, marks[1:], num + 1, sep, strip)
-
-        # position == 'I'
-        if len(text) == 1:  # pragma: nocover
-            # a corner case where the final part of an intermediate
-            # mark (I) has not been phonemized
-            return cls._restore_aux([text[0] + mark], marks[1:], num, sep, strip)
-
-        return cls._restore_aux(
-            [text[0] + mark + text[1]] + text[2:], marks[1:], num, sep, strip)
-
-    @classmethod
-    def _restore_aux(cls,
-                     text: List[str],
-                     marks: List[_MarkIndex],
-                     num: int,
-                     sep: Separator,
-                     strip: bool) -> List[str]:
-        """Auxiliary method for Punctuation.restore()"""
-        if not marks:
-            return text
-
-        # nothing have been phonemized, returns the marks alone, with internal
-        # spaces replaced by the word separator
-        if not text:
-            return [re.sub(r' ', sep.word,
-                ''.join(m.mark for m in marks)) + ('' if strip else sep.word)]
-
-        current = marks[0]
-        if current.index == num:  # place the current mark here
-            return cls._restore_current(current, text, marks, num, sep, strip)
-
-        return [text[0]] + cls._restore_aux(text[1:], marks, num + 1, sep, strip)
+        text = str2list(text)
+        punctuated_text = []
+        pos = 0
+
+        while text or marks:
+
+            if not marks:
+                punctuated_text.append(''.join(text))
+                text = []
+            elif not text:
+                # nothing has been phonemized, returns the marks alone, with internal
+                # spaces replaced by the word separator
+                punctuated_text.append(re.sub(r' ',
+                                              sep.word,
+                                              ''.join(m.mark for m in marks)) +
+                                       ('' if strip else sep.word))
+                marks = []
+
+            else:
+                current_mark = marks[0]
+                if current_mark.index == pos:
+
+                    # place the current mark here
+                    mark = marks[0]
+                    marks = marks[1:]
+                    # replace internal spaces in the current mark with the word separator
+                    mark = re.sub(r' ', sep.word, mark.mark)
+
+                    # remove the word last separator from the current word
+                    if sep.word and text[0].endswith(sep.word):
+                        text[0] = text[0][:-len(sep.word)]
+
+                    if current_mark.position == 'B':
+                        text[0] = mark + text[0]
+                    elif current_mark.position == 'E':
+                        punctuated_text.append(text[0] + mark + ('' if strip else sep.word))
+                        text = text[1:]
+                        pos = pos + 1
+                    elif current_mark.position == 'A':
+                        punctuated_text.append(mark)
+                        pos = pos + 1
+                    else:
+                        # position == 'I'
+                        if len(text) == 1:  # pragma: nocover
+                            # a corner case where the final part of an intermediate
+                            # mark (I) has not been phonemized
+                            text[0] = text[0] + mark
+                        else:
+                            first_word = text[0]
+                            text = text[1:]
+                            text[0] = first_word + mark + text[0]
+
+                else:
+                    punctuated_text.append(text[0])
+                    text = text[1:]
+                    pos = pos + 1
+
+
+        return punctuated_text