Skip to content

Commit

Permalink
Option to give an existing vocab file(.pt or text) to preprocess.py (O…
Browse files Browse the repository at this point in the history
…penNMT#1346)

* Option to give an existing vocab file(.pt or text) to preprocess.py
  • Loading branch information
kvthr authored and vince62s committed Mar 8, 2019
1 parent 9b3083f commit 25547d8
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions onmt/inputters/inputter.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,18 @@ def build_vocab(train_dataset_files, fields, data_type, share_vocab,

counters = defaultdict(Counter)

if src_vocab_path:
try:
logger.info("Using existing vocabulary...")
vocab = torch.load(src_vocab_path)
# return vocab to dump with standard name
return vocab
except torch.serialization.pickle.UnpicklingError:
logger.info("Building vocab from text file...")
# empty train_dataset_files so that vocab is only loaded from
# given paths in src_vocab_path, tgt_vocab_path
train_dataset_files = []

# Load vocabulary
if src_vocab_path:
src_vocab, src_vocab_size = _load_vocab(
Expand Down

0 comments on commit 25547d8

Please sign in to comment.