Skip to content

Commit a257387

Browse files
authored
[egs] Chime6: bug fix sorting utterances by their start time and end time (kaldi-asr#3953)
1 parent d9dc2ed commit a257387

File tree

3 files changed

+21
-7
lines changed

3 files changed

+21
-7
lines changed

egs/chime6/s5_track2/RESULTS

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@ Dev 57.15 83.96
1414
Eval 54.12 80.33
1515

1616
# ASR nnet3 tdnn+chain
17-
Dev: U06 %WER 81.18 [ 58881 / 47798, 1638 ins, 30528 del, 15632 sub ]
18-
Eval: U06 %WER 85.39 [ 55132 / 47076, 1107 ins, 27768 del, 18201 sub ]
17+
Dev: %WER 84.33 [ 49653 / 58881, 1529 ins, 35813 del, 12311 sub ]
18+
Eval: %WER 78.08 [ 43046 / 55132, 957 ins, 32045 del, 10044 sub ]

egs/chime6/s5_track2/local/get_hyp_perspeaker_perarray_file.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,18 @@ def main():
3939
combined_hyp_file = args.output_dir_path + '/' + 'hyp' + '_' + sessionid_micid_speakerid + '_comb'
4040
combined_hyp_writer = open(combined_hyp_file, 'w')
4141
utterances = sessionid_micid_speakerid_dict[sessionid_micid_speakerid]
42-
text = ''
42+
# sorting utterances by start and end time
43+
sessionid_micid_speakerid_utterances={}
4344
for line in utterances:
4445
parts = line.strip().split()
46+
utt_parts = parts[0].strip().split('-')
47+
time ='-'.join(utt_parts[2:])
48+
sessionid_micid_speakerid_utterances[time] = line
49+
text = ''
50+
for time_key in sorted(sessionid_micid_speakerid_utterances):
51+
parts = sessionid_micid_speakerid_utterances[time_key].strip().split()
4552
text = text + ' ' + ' '.join(parts[1:])
46-
hyp_writer.write(line)
53+
hyp_writer.write(sessionid_micid_speakerid_utterances[time_key])
4754
combined_utterance = 'utt' + " " + text
4855
combined_hyp_writer.write(combined_utterance)
4956
combined_hyp_writer.write('\n')

egs/chime6/s5_track2/local/get_ref_perspeaker_persession_file.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,21 @@ def main():
5555
spkrid_mapping[sessionid_speakerid.split('_')[1]]) + '_comb'
5656
combined_ref_writer = open(combined_ref_file, 'w')
5757
utterances = sessionid_speakerid_dict[sessionid_speakerid]
58-
text = ''
59-
uttid_wc = 'utt'
58+
sessionid_speakerid_utterances = {}
59+
# sorting utterances by start and end time
6060
for line in utterances:
6161
parts = line.strip().split()
62+
utt_parts = parts[0].strip().split('-')
63+
time ='-'.join(utt_parts[1:])
64+
sessionid_speakerid_utterances[time] = line
65+
text = ''
66+
uttid_wc = 'utt'
67+
for time_key in sorted(sessionid_speakerid_utterances):
68+
parts = sessionid_speakerid_utterances[time_key].strip().split()
6269
uttid_id = parts[0]
6370
utt_text = ' '.join(parts[1:])
6471
text = text + ' ' + ' '.join(parts[1:])
65-
ref_writer.write(line)
72+
ref_writer.write(sessionid_speakerid_utterances[time_key])
6673
length = str(len(utt_text.split()))
6774
uttid_id_len = uttid_id + ":" + length
6875
uttid_wc = uttid_wc + ' ' + uttid_id_len

0 commit comments

Comments
 (0)