-
Notifications
You must be signed in to change notification settings - Fork 96
Description
你好,我尝试为ITN加入新的基本规则,运行后报错, 麻烦看看能否解决,谢谢!:
其中,添加了WeTextProcessing/itn/chinese/data/dataflow/flow.tsv如下:
兆 M
g G
另添加了WeTextProcessing/itn/chinese/rules/dataflow.py如下:
from itn.chinese.rules.cardinal import Cardinal
from tn.processor import Processor
from pynini import string_file
from pynini.lib.pynutil import delete, insert
class DataFlow(Processor):
def __init__(self, enable_0_to_9=True):
super().__init__(name='dataflow')
self.enable_0_to_9 = enable_0_to_9
self.build_tagger()
self.build_verbalizer()
def build_tagger(self):
flow = string_file('itn/chinese/data/dataflow/flow.tsv')
digit = string_file('itn/chinese/data/number/digit.tsv') # 1 ~ 9
number = Cardinal().number if self.enable_0_to_9 else \
Cardinal().number_exclude_0_to_9
number |= digit
tagger = (insert('value: "') + number + insert('"') +
insert(' dataflow: "') + flow + insert('"'))
self.tagger = self.add_tokens(tagger)
def build_verbalizer(self):
dataflow = delete('dataflow: "') + self.SIGMA + delete('"')
value = delete(' value: "') + self.SIGMA + delete('"')
verbalizer = value + dataflow
self.verbalizer = self.delete_tokens(verbalizer)
同时,修改了WeTextProcessing/itn/chinese/inverse_normalizer.py 的内容如下:
from tn.processor import Processor
from itn.chinese.rules.cardinal import Cardinal
from itn.chinese.rules.char import Char
from itn.chinese.rules.date import Date
from itn.chinese.rules.fraction import Fraction
from itn.chinese.rules.math import Math
from itn.chinese.rules.measure import Measure
from itn.chinese.rules.money import Money
from itn.chinese.rules.whitelist import Whitelist
from itn.chinese.rules.time import Time
from itn.chinese.rules.postprocessor import PostProcessor
from itn.chinese.rules.license_plate import LicensePlate
from itn.chinese.rules.dataflow import DataFlow ####
from pynini.lib.pynutil import add_weight, delete
from importlib_resources import files
class InverseNormalizer(Processor):
def __init__(self, cache_dir=None, overwrite_cache=False,
enable_standalone_number=True,
enable_0_to_9=False):
super().__init__(name='inverse_normalizer', ordertype='itn')
self.convert_number = enable_standalone_number
self.enable_0_to_9 = enable_0_to_9
if cache_dir is None:
cache_dir = files("itn")
self.build_fst('zh_itn', cache_dir, overwrite_cache)
def build_tagger(self):
tagger = (add_weight(Date().tagger, 1.02)
| add_weight(Whitelist().tagger, 1.01)
| add_weight(Fraction().tagger, 1.05)
| add_weight(Measure(enable_0_to_9=self.enable_0_to_9).tagger, 1.05) # noqa
| add_weight(Money(enable_0_to_9=self.enable_0_to_9).tagger, 1.04) # noqa
| add_weight(Time().tagger, 1.05)
| add_weight(Cardinal(self.convert_number, self.enable_0_to_9).tagger, 1.06) # noqa
| add_weight(Math().tagger, 1.10)
| add_weight(LicensePlate().tagger, 1.0)
| add_weight(DataFlow().tagger, 1.11) ####
| add_weight(Char().tagger, 100)).optimize()
tagger = tagger.star
# remove the last space
self.tagger = tagger @ self.build_rule(delete(' '), '', '[EOS]')
def build_verbalizer(self):
verbalizer = (Cardinal(self.convert_number, self.enable_0_to_9).verbalizer # noqa
| Char().verbalizer
| Date().verbalizer
| Fraction().verbalizer
| Math().verbalizer
| Measure(enable_0_to_9=self.enable_0_to_9).verbalizer
| Money(enable_0_to_9=self.enable_0_to_9).verbalizer
| Time().verbalizer
| LicensePlate().verbalizer
| DataFlow(enable_0_to_9=self.enable_0_to_9).verbalizer ####
| Whitelist().verbalizer).optimize()
postprocessor = PostProcessor(remove_interjections=True).processor
self.verbalizer = (verbalizer @ postprocessor).star
最后运行python -m itn --text "八兆流量" --overwrite_cache后报错如下,
(wenetITN) liuhangchen@G08:~/WeNetITN/WeTextProcessing$ python -m itn --text "八兆流量" --overwrite_cache
dataflow { value: "8" dataflow: "M" } char { value: "流" } char { value: "量" }
ERROR: StringFstToOutputLabels: Invalid start state
Traceback (most recent call last):
File "/storage1/liuhangchen/anaconda3/envs/wenetITN/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/storage1/liuhangchen/anaconda3/envs/wenetITN/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/storage1/liuhangchen/WeNetITN/WeTextProcessing/itn/main.py", line 4, in
main()
File "/storage1/liuhangchen/WeNetITN/WeTextProcessing/itn/main.py", line 53, in main
print(normalizer.normalize(args.text))
File "/storage1/liuhangchen/WeNetITN/WeTextProcessing/tn/processor.py", line 96, in normalize
return self.verbalize(self.tag(input))
File "/storage1/liuhangchen/WeNetITN/WeTextProcessing/tn/processor.py", line 93, in verbalize
return shortestpath(lattice, nshortest=1, unique=True).string()
File "extensions/_pynini.pyx", line 462, in _pynini.Fst.string
File "extensions/_pynini.pyx", line 507, in _pynini.Fst.string
_pywrapfst.FstOpError: Operation failed