Skip to content

Commit

Permalink
refactor restore to be iterative
Browse files Browse the repository at this point in the history
  • Loading branch information
jncasey committed Apr 4, 2022
1 parent 2b11dd2 commit 4d8ce67
Showing 1 changed file with 59 additions and 60 deletions.
119 changes: 59 additions & 60 deletions phonemizer/punctuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,63 +151,62 @@ def restore(cls, text: Union[str, List[str]],
['hello', 'my world'], [',', '!'] -> ['hello, my world!']
"""
return cls._restore_aux(str2list(text), marks, 0, sep, strip)

@classmethod
def _restore_current(cls,
current: _MarkIndex,
text: List[str],
marks: List[_MarkIndex],
num: int,
sep: Separator,
strip: bool) -> List[str]:
"""Auxiliary method for Punctuation._restore_aux()"""

# remove the word last separator from the current word
if sep.word and text[0].endswith(sep.word):
text[0] = text[0][:-len(sep.word)]
# replace internal spaces in the current mark with the word separator
mark = re.sub(r' ', sep.word, current.mark)

if current.position == 'B':
return cls._restore_aux(
[mark + text[0]] + text[1:], marks[1:], num, sep, strip)

if current.position == 'E':
return ([text[0] + mark + ('' if strip else sep.word)] +
cls._restore_aux(text[1:], marks[1:], num + 1, sep, strip))

if current.position == 'A':
return [mark] + cls._restore_aux(text, marks[1:], num + 1, sep, strip)

# position == 'I'
if len(text) == 1: # pragma: nocover
# a corner case where the final part of an intermediate
# mark (I) has not been phonemized
return cls._restore_aux([text[0] + mark], marks[1:], num, sep, strip)

return cls._restore_aux(
[text[0] + mark + text[1]] + text[2:], marks[1:], num, sep, strip)

@classmethod
def _restore_aux(cls,
text: List[str],
marks: List[_MarkIndex],
num: int,
sep: Separator,
strip: bool) -> List[str]:
"""Auxiliary method for Punctuation.restore()"""
if not marks:
return text

# nothing have been phonemized, returns the marks alone, with internal
# spaces replaced by the word separator
if not text:
return [re.sub(r' ', sep.word,
''.join(m.mark for m in marks)) + ('' if strip else sep.word)]

current = marks[0]
if current.index == num: # place the current mark here
return cls._restore_current(current, text, marks, num, sep, strip)

return [text[0]] + cls._restore_aux(text[1:], marks, num + 1, sep, strip)
text = str2list(text)
punctuated_text = []
pos = 0

while text or marks:

if not marks:
punctuated_text.append(''.join(text))
text = []
elif not text:
# nothing has been phonemized, returns the marks alone, with internal
# spaces replaced by the word separator
punctuated_text.append(re.sub(r' ',
sep.word,
''.join(m.mark for m in marks)) +
('' if strip else sep.word))
marks = []

else:
current_mark = marks[0]
if current_mark.index == pos:

# place the current mark here
mark = marks[0]
marks = marks[1:]
# replace internal spaces in the current mark with the word separator
mark = re.sub(r' ', sep.word, mark.mark)

# remove the word last separator from the current word
if sep.word and text[0].endswith(sep.word):
text[0] = text[0][:-len(sep.word)]

if current_mark.position == 'B':
text[0] = mark + text[0]
elif current_mark.position == 'E':
punctuated_text.append(text[0] + mark + ('' if strip else sep.word))
text = text[1:]
pos = pos + 1
elif current_mark.position == 'A':
punctuated_text.append(mark)
pos = pos + 1
else:
# position == 'I'
if len(text) == 1: # pragma: nocover
# a corner case where the final part of an intermediate
# mark (I) has not been phonemized
text[0] = text[0] + mark
else:
first_word = text[0]
text = text[1:]
text[0] = first_word + mark + text[0]

else:
punctuated_text.append(text[0])
text = text[1:]
pos = pos + 1


return punctuated_text

0 comments on commit 4d8ce67

Please sign in to comment.