Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
实一 committed Dec 9, 2022
1 parent 4cd2a78 commit a5a5899
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 5 deletions.
16 changes: 16 additions & 0 deletions data/ofa_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
import re
import torch.utils.data
from fairseq.data import FairseqDataset
import string

CHINESE_PUNCTUATION = '"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、\u3000、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·!?。。'
ENGLISH_PUNCTUATION = string.punctuation


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -77,3 +82,14 @@ def pre_caption(self, caption, max_words=None):
caption = ' '.join(caption_words[:max_words])

return caption

def pre_chinese(self, text, max_words):
text = text.lower().replace(CHINESE_PUNCTUATION, " ").replace(ENGLISH_PUNCTUATION, " ")
text = re.sub(
r"\s{2,}",
' ',
text,
)
text = text.rstrip('\n')
text = text.strip(' ')[:max_words]
return text
1 change: 0 additions & 1 deletion data/s2t_data/unify_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ def __init__(
self.text2phone_tokenizer = None
if text2phone_path is not None:
self.blank_id = self.phone_dict.index("<unk>")
self.phone_mask_idx = self.phone_dict.index("#1")
self.text2phone_tokenizer = Text2Phone(text2phone_path)

def set_epoch(self, epoch, **unused):
Expand Down
5 changes: 1 addition & 4 deletions tasks/speech_tasks/unify_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def setup_task(cls, cfg: DictConfig, **kwargs):
phone_dict = cls.load_dictionary(cfg.phone_dict_path)
if cfg.text2phone_path is None:
phone_dict.add_symbol("<blank>")
phone_dict.add_symbol("<mask>")
phone_dict.add_symbol("<mask>")

# src_dict.add_symbol("<phone_blank>")
# tgt_dict.add_symbol("<phone_blank>")
Expand Down Expand Up @@ -358,6 +358,3 @@ def phone_dictionary(self):
"""Return the phone :class:`~fairseq.data.Dictionary`."""
return self.phone_dict

@property
def train_stage(self):
return self.train_stage
1 change: 1 addition & 0 deletions utils/phone/zh/phone_dict.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,4 @@ zh 120
#14 134
#15 135
#16 136
#17 137

0 comments on commit a5a5899

Please sign in to comment.