Skip to content

Commit 18db3cf

Browse files
author
Yibing Liu
committed
Handle the special tokens in scoring cer
1 parent ff1cc19 commit 18db3cf

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

fluid/DeepASR/score_error_rate.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,18 @@ def parse_args():
1616
default='cer',
1717
choices=['cer', 'wer'],
1818
help="Error rate type. (default: %(default)s)")
19+
parser.add_argument(
20+
'--special_tokens',
21+
type=str,
22+
default='<SPOKEN_NOISE>',
23+
help="Special tokens in scoring CER, seperated by space. "
24+
"They shouldn't be splitted and should be treated as one special "
25+
"character. Example: '<SPOKEN_NOISE> <bos> <eos>' "
26+
"(default: %(default)s)")
1927
parser.add_argument(
2028
'--ref', type=str, required=True, help="The ground truth text.")
2129
parser.add_argument(
22-
'--hyp', type=str, required=True, help="The decoding result.")
30+
'--hyp', type=str, required=True, help="The decoding result text.")
2331
args = parser.parse_args()
2432
return args
2533

@@ -31,6 +39,8 @@ def parse_args():
3139
sum_errors, sum_ref_len = 0.0, 0
3240
sent_cnt, not_in_ref_cnt = 0, 0
3341

42+
special_tokens = args.special_tokens.split(" ")
43+
3444
with open(args.ref, "r") as ref_txt:
3545
line = ref_txt.readline()
3646
while line:
@@ -51,6 +61,8 @@ def parse_args():
5161
continue
5262

5363
if args.error_rate_type == 'cer':
64+
for sp_tok in special_tokens:
65+
sent = sent.replace(sp_tok, '\0')
5466
errors, ref_len = char_errors(
5567
ref_dict[key].decode("utf8"),
5668
sent.decode("utf8"),

0 commit comments

Comments
 (0)