Skip to content

Commit

Permalink
keep punctuation by default, add wrt command
Browse files Browse the repository at this point in the history
  • Loading branch information
gooofy committed Dec 28, 2018
1 parent c184304 commit 1f76ec5
Showing 1 changed file with 17 additions and 5 deletions.
22 changes: 17 additions & 5 deletions speech_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@
DEFAULT_DICT = 'dict-de.ipa'
DEFAULT_WRT = 'data/src/wrt/librivox_de.csv'

PUNCTUATION = set([',','.','\'','!','?','"','-'])
PUNCTUATION = set([',','.','\'','!','?','"','-',':'])

def tokwrt (ts):

global options, wrt

res = []
for t in tokenize(ts, lang=options.lang, keep_punctuation=options.keep_punctuation):
for t in tokenize(ts, lang=options.lang, keep_punctuation=not options.ignore_punctuation):
if t in wrt:
res.append(wrt[t])
else:
Expand Down Expand Up @@ -342,8 +342,8 @@ def lex_edit(token):
parser.add_option("-l", "--lang", dest="lang", type = "str", default='de',
help="language (default: de)")

parser.add_option("-k", "--keep-punctuation", action="store_true", dest="keep_punctuation",
help="keep punctuation marks")
parser.add_option("-i", "--ignore-punctuation", action="store_true", dest="ignore_punctuation",
help="ignore (remove) punctuation marks")

parser.add_option("-m", "--missing-words", action="store_true", dest="missing_words",
help="only work on submissions that have at least one missing word")
Expand Down Expand Up @@ -388,7 +388,8 @@ def lex_edit(token):
continue
wrt[parts[0]] = parts[1]

logging.info(repr(wrt))
# logging.info(repr(wrt))
logging.info('loading WRT from %s ... done, %d entries.' % (options.wrt, len(wrt)))

#
# load transcripts
Expand Down Expand Up @@ -597,6 +598,17 @@ def paint_main(cur_ts):
readline.add_history(ts['ts'].encode('utf8'))
ts['ts'] = raw_input('transcript: ').decode('utf8')

elif c == 'w':

if missing_token:
readline.add_history(missing_token.encode('utf8'))
r = raw_input(u'WRT entry for %s: ' % missing_token).decode('utf8')
if r:
wrt[missing_token] = r
else:
del wrt[missing_token]
ts['ts'] = ''

elif c == 'a':

if prompt_token_idx < len(prompt_tokens):
Expand Down

0 comments on commit 1f76ec5

Please sign in to comment.