-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[egs] Changes to IAM handwriting-recognition recipe, including BPE en…
…coding (#2658)
- Loading branch information
Showing
24 changed files
with
1,412 additions
and
566 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,88 +1,107 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright 2017 Ashish Arora | ||
#Copyright 2017 Ashish Arora | ||
|
||
""" This module will be used by scripts for open vocabulary setup. | ||
If the hypothesis transcription contains <unk>, then it will replace the | ||
<unk> with the word predicted by <unk> model by concatenating phones decoded | ||
from the unk-model. It is currently supported only for triphone setup. | ||
Args: | ||
phones: File name of a file that contains the phones.txt, (symbol-table for phones). | ||
phone and phoneID, Eg. a 217, phoneID of 'a' is 217. | ||
words: File name of a file that contains the words.txt, (symbol-table for words). | ||
word and wordID. Eg. ACCOUNTANCY 234, wordID of 'ACCOUNTANCY' is 234. | ||
unk: ID of <unk>. Eg. 231. | ||
one-best-arc-post: A file in arc-post format, which is a list of timing info and posterior | ||
of arcs along the one-best path from the lattice. | ||
E.g. 506_m01-049-00 8 12 1 7722 282 272 288 231 | ||
<utterance-id> <start-frame> <num-frames> <posterior> <word> [<ali>] | ||
[<phone1> <phone2>...] | ||
output-text: File containing hypothesis transcription with <unk> recognized by the | ||
unk-model. | ||
E.g. A move to stop mr. gaitskell. | ||
Eg. local/unk_arc_post_to_transcription.py lang/phones.txt lang/words.txt | ||
data/lang/oov.int | ||
""" | ||
import argparse | ||
import os | ||
import sys | ||
|
||
parser = argparse.ArgumentParser(description="""uses phones to convert unk to word""") | ||
parser.add_argument('phones', type=str, help='phones and phonesID') | ||
parser.add_argument('words', type=str, help='word and wordID') | ||
parser.add_argument('unk', type=str, default='-', help='location of unk file') | ||
parser.add_argument('--input-ark', type=str, default='-', help='where to read the input data') | ||
parser.add_argument('--out-ark', type=str, default='-', help='where to write the output data') | ||
parser.add_argument('phones', type=str, help='File name of a file that contains the' | ||
'symbol-table for phones. Each line must be: <phone> <phoneID>') | ||
parser.add_argument('words', type=str, help='File name of a file that contains the' | ||
'symbol-table for words. Each line must be: <word> <word-id>') | ||
parser.add_argument('unk', type=str, default='-', help='File name of a file that' | ||
'contains the ID of <unk>. The content must be: <oov-id>, e.g. 231') | ||
parser.add_argument('--one-best-arc-post', type=str, default='-', help='A file in arc-post' | ||
'format, which is a list of timing info and posterior of arcs' | ||
'along the one-best path from the lattice') | ||
parser.add_argument('--output-text', type=str, default='-', help='File containing' | ||
'hypothesis transcription with <unk> recognized by the unk-model') | ||
args = parser.parse_args() | ||
|
||
|
||
### main ### | ||
phone_fh = open(args.phones, 'r', encoding='latin-1') | ||
word_fh = open(args.words, 'r', encoding='latin-1') | ||
unk_fh = open(args.unk, 'r', encoding='latin-1') | ||
if args.input_ark == '-': | ||
input_fh = sys.stdin | ||
phone_handle = open(args.phones, 'r', encoding='latin-1') # Create file handles | ||
word_handle = open(args.words, 'r', encoding='latin-1') | ||
unk_handle = open(args.unk,'r', encoding='latin-1') | ||
if args.one_best_arc_post == '-': | ||
arc_post_handle = sys.stdin | ||
else: | ||
input_fh = open(args.input_ark, 'r', encoding='latin-1') | ||
if args.out_ark == '-': | ||
out_fh = sys.stdout | ||
arc_post_handle = open(args.one_best_arc_post, 'r', encoding='latin-1') | ||
if args.output_text == '-': | ||
output_text_handle = sys.stdout | ||
else: | ||
out_fh = open(args.out_ark, 'w', encoding='latin-1') | ||
output_text_handle = open(args.output_text, 'w', encoding='latin-1') | ||
|
||
phone_dict = dict() # Stores phoneID and phone mapping | ||
phone_data_vect = phone_fh.read().strip().split("\n") | ||
for key_val in phone_data_vect: | ||
id2phone = dict() # Stores the mapping from phone_id (int) to phone (char) | ||
phones_data = phone_handle.read().strip().split("\n") | ||
|
||
for key_val in phones_data: | ||
key_val = key_val.split(" ") | ||
phone_dict[key_val[1]] = key_val[0] | ||
id2phone[key_val[1]] = key_val[0] | ||
|
||
word_dict = dict() | ||
word_data_vect = word_fh.read().strip().split("\n") | ||
word_data_vect = word_handle.read().strip().split("\n") | ||
|
||
for key_val in word_data_vect: | ||
key_val = key_val.split(" ") | ||
word_dict[key_val[1]] = key_val[0] | ||
unk_val = unk_fh.read().strip().split(" ")[0] | ||
unk_val = unk_handle.read().strip().split(" ")[0] | ||
|
||
utt_word_dict = dict() | ||
utt_phone_dict = dict() # Stores utteranceID and phoneID | ||
unk_word_dict = dict() | ||
count=0 | ||
for line in input_fh: | ||
utt_word_dict = dict() # Dict of list, stores mapping from utteranceID(int) to words(str) | ||
for line in arc_post_handle: | ||
line_vect = line.strip().split("\t") | ||
if len(line_vect) < 6: | ||
print("Bad line: '{}' Expecting 6 fields. Skipping...".format(line), | ||
if len(line_vect) < 6: # Check for 1best-arc-post output | ||
print("Error: Bad line: '{}' Expecting 6 fields. Skipping...".format(line), | ||
file=sys.stderr) | ||
continue | ||
uttID = line_vect[0] | ||
utt_id = line_vect[0] | ||
word = line_vect[4] | ||
phones = line_vect[5] | ||
if uttID in utt_word_dict.keys(): | ||
utt_word_dict[uttID][count] = word | ||
utt_phone_dict[uttID][count] = phones | ||
else: | ||
count = 0 | ||
utt_word_dict[uttID] = dict() | ||
utt_phone_dict[uttID] = dict() | ||
utt_word_dict[uttID][count] = word | ||
utt_phone_dict[uttID][count] = phones | ||
if word == unk_val: # Get character sequence for unk | ||
phone_key_vect = phones.split(" ") | ||
phone_val_vect = list() | ||
for pkey in phone_key_vect: | ||
phone_val_vect.append(phone_dict[pkey]) | ||
if utt_id not in list(utt_word_dict.keys()): | ||
utt_word_dict[utt_id] = list() | ||
|
||
if word == unk_val: # Get the 1best phone sequence given by the unk-model | ||
phone_id_seq = phones.split(" ") | ||
phone_seq = list() | ||
for pkey in phone_id_seq: | ||
phone_seq.append(id2phone[pkey]) # Convert the phone-id sequence to a phone sequence. | ||
phone_2_word = list() | ||
for phone_val in phone_val_vect: | ||
phone_2_word.append(phone_val.split('_')[0]) | ||
phone_2_word = ''.join(phone_2_word) | ||
utt_word_dict[uttID][count] = phone_2_word | ||
for phone_val in phone_seq: | ||
phone_2_word.append(phone_val.split('_')[0]) # Removing the world-position markers(e.g. _B) | ||
phone_2_word = ''.join(phone_2_word) # Concatnate phone sequence | ||
utt_word_dict[utt_id].append(phone_2_word) # Store word from unk-model | ||
else: | ||
if word == '0': | ||
if word == '0': # Store space/silence | ||
word_val = ' ' | ||
else: | ||
word_val = word_dict[word] | ||
utt_word_dict[uttID][count] = word_val | ||
count += 1 | ||
utt_word_dict[utt_id].append(word_val) # Store word from 1best-arc-post | ||
|
||
transcription = "" | ||
for key in sorted(utt_word_dict.keys()): | ||
transcription = key | ||
for index in sorted(utt_word_dict[key].keys()): | ||
value = utt_word_dict[key][index] | ||
transcription = transcription + " " + value | ||
out_fh.write(transcription + '\n') | ||
transcription = "" # Output transcription | ||
for utt_key in sorted(utt_word_dict.keys()): | ||
transcription = utt_key | ||
for word in utt_word_dict[utt_key]: | ||
transcription = transcription + " " + word | ||
output_text_handle.write(transcription + '\n') |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/bash | ||
# Copyright 2018 Hossein Hadian | ||
# 2018 Ashish Arora | ||
|
||
# Apache 2.0 | ||
# This script performs data augmentation. | ||
|
||
nj=4 | ||
cmd=run.pl | ||
feat_dim=40 | ||
echo "$0 $@" | ||
|
||
. ./cmd.sh | ||
. ./path.sh | ||
. ./utils/parse_options.sh || exit 1; | ||
|
||
srcdir=$1 | ||
outdir=$2 | ||
datadir=$3 | ||
aug_set=aug1 | ||
mkdir -p $datadir/augmentations | ||
echo "copying $srcdir to $datadir/augmentations/$aug_set, allowed length, creating feats.scp" | ||
|
||
for set in $aug_set; do | ||
image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \ | ||
$srcdir $datadir/augmentations/$set | ||
cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt | ||
local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \ | ||
--fliplr false --augment true $datadir/augmentations/$set | ||
done | ||
|
||
echo " combine original data and data from different augmentations" | ||
utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/$aug_set | ||
cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
tuning/run_cnn_e2eali_1c.sh | ||
tuning/run_cnn_e2eali_1d.sh |
Oops, something went wrong.