From ee90695ad97f8750a8329116c315859ce3e527ee Mon Sep 17 00:00:00 2001 From: Sasha Rush Date: Fri, 22 Dec 2017 11:34:49 -0500 Subject: [PATCH] . --- .travis.yml | 2 +- onmt/Models.py | 6 +- onmt/Optim.py | 4 +- onmt/io/IO.py | 12 +-- onmt/modules/ConvMultiStepAttention.py | 2 +- onmt/modules/Gate.py | 4 +- onmt/modules/ImageEncoder.py | 6 +- onmt/modules/SRU.py | 2 + onmt/modules/WeightNorm.py | 26 +++--- onmt/modules/__init__.py | 6 +- onmt/translate/Beam.py | 122 +++++++++++++------------ onmt/translate/Translation.py | 12 +-- onmt/translate/Translator.py | 60 ++++++------ test/test_models.py | 72 ++++++++------- test/test_preprocess.py | 13 +-- translate.py | 4 +- 16 files changed, 180 insertions(+), 173 deletions(-) diff --git a/.travis.yml b/.travis.yml index 96acda1d0b..8a557928aa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,5 +60,5 @@ matrix: include: - env: LINT_CHECK python: "2.7" - install: pip install flake8 + install: pip install flake8 pep8-naming script: flake8 diff --git a/onmt/Models.py b/onmt/Models.py index c52cfdca1c..d03c8ec601 100644 --- a/onmt/Models.py +++ b/onmt/Models.py @@ -451,9 +451,9 @@ def beam_update(self, idx, positions, beam_size): """ Update when beam advances. """ for e in self._all: a, br, d = e.size() - sentStates = e.view(a, beam_size, br // beam_size, d)[:, :, idx] - sentStates.data.copy_( - sentStates.data.index_select(1, positions)) + sent_states = e.view(a, beam_size, br // beam_size, d)[:, :, idx] + sent_states.data.copy_( + sent_states.data.index_select(1, positions)) class RNNDecoderState(DecoderState): diff --git a/onmt/Optim.py b/onmt/Optim.py index e02272e6ea..9576b67781 100644 --- a/onmt/Optim.py +++ b/onmt/Optim.py @@ -49,7 +49,7 @@ def __init__(self, method, lr, max_grad_norm, self.adagrad_accum = adagrad_accum self.opt = opt - def _setRate(self, lr): + def _set_rate(self, lr): self.lr = lr self.optimizer.param_groups[0]['lr'] = self.lr @@ -69,7 +69,7 @@ def step(self): clip_grad_norm(self.params, self.max_grad_norm) self.optimizer.step() - def updateLearningRate(self, ppl, epoch): + def update_learning_rate(self, ppl, epoch): """ Decay learning rate if val perf does not improve or we hit the start_decay_at limit. diff --git a/onmt/io/IO.py b/onmt/io/IO.py index c685ef864a..3691a1a294 100644 --- a/onmt/io/IO.py +++ b/onmt/io/IO.py @@ -16,17 +16,17 @@ EOS_WORD = '' -def __getstate__(self): +def _getstate(self): return dict(self.__dict__, stoi=dict(self.stoi)) -def __setstate__(self, state): +def _setstate(self, state): self.__dict__.update(state) self.stoi = defaultdict(lambda: 0, self.stoi) -torchtext.vocab.Vocab.__getstate__ = __getstate__ -torchtext.vocab.Vocab.__setstate__ = __setstate__ +torchtext.vocab.Vocab.__getstate__ = _getstate +torchtext.vocab.Vocab.__setstate__ = _setstate def get_fields(data_type, n_src_features, n_tgt_features): @@ -478,9 +478,9 @@ def _read_audio_file(path, src_dir, side, sample_rate, window_size, win_length = n_fft hop_length = int(sample_rate * window_stride) # STFT - D = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length, + d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window) - spect, _ = librosa.magphase(D) + spect, _ = librosa.magphase(d) spect = np.log1p(spect) spect = torch.FloatTensor(spect) if normalize_audio: diff --git a/onmt/modules/ConvMultiStepAttention.py b/onmt/modules/ConvMultiStepAttention.py index 47310234de..a406e55367 100644 --- a/onmt/modules/ConvMultiStepAttention.py +++ b/onmt/modules/ConvMultiStepAttention.py @@ -21,7 +21,7 @@ def __init__(self, input_size): self.linear_in = nn.Linear(input_size, input_size) self.mask = None - def applyMask(self, mask): + def apply_mask(self, mask): self.mask = mask def forward(self, base_target_emb, input, encoder_out_top, diff --git a/onmt/modules/Gate.py b/onmt/modules/Gate.py index 76b973daa1..615f8d7dd9 100644 --- a/onmt/modules/Gate.py +++ b/onmt/modules/Gate.py @@ -9,8 +9,8 @@ import torch.nn as nn -def ContextGateFactory(type, embeddings_size, decoder_size, - attention_size, output_size): +def context_gate_factory(type, embeddings_size, decoder_size, + attention_size, output_size): """Returns the correct ContextGate class""" gate_types = {'source': SourceContextGate, diff --git a/onmt/modules/ImageEncoder.py b/onmt/modules/ImageEncoder.py index 8795877c65..887c4fea9a 100644 --- a/onmt/modules/ImageEncoder.py +++ b/onmt/modules/ImageEncoder.py @@ -50,7 +50,7 @@ def load_pretrained_vectors(self, opt): pass def forward(self, input, lengths=None): - batchSize = input.size(0) + batch_size = input.size(0) # (batch_size, 64, imgH, imgW) # layer 1 input = F.relu(self.layer1(input[:, :, :, :]-0.5), True) @@ -93,8 +93,8 @@ def forward(self, input, lengths=None): for row in range(input.size(2)): inp = input[:, :, row, :].transpose(0, 2)\ .transpose(1, 2) - row_vec = torch.Tensor(batchSize).type_as(inp.data)\ - .long().fill_(row) + row_vec = torch.Tensor(batch_size).type_as(inp.data)\ + .long().fill_(row) pos_emb = self.pos_lut(Variable(row_vec)) with_pos = torch.cat( (pos_emb.view(1, pos_emb.size(0), pos_emb.size(1)), inp), 0) diff --git a/onmt/modules/SRU.py b/onmt/modules/SRU.py index cdff038c4a..42a439cf19 100644 --- a/onmt/modules/SRU.py +++ b/onmt/modules/SRU.py @@ -5,6 +5,8 @@ This implementation is adpoted from the author of the paper: https://github.com/taolei87/sru/blob/master/cuda_functional.py. """ +# flake8: noqa + import subprocess import platform import os diff --git a/onmt/modules/WeightNorm.py b/onmt/modules/WeightNorm.py index 405edfc9e9..00c70de52b 100644 --- a/onmt/modules/WeightNorm.py +++ b/onmt/modules/WeightNorm.py @@ -61,10 +61,10 @@ def forward(self, x, init=False): self.V.data.copy_(torch.randn(self.V.data.size()).type_as( self.V.data) * 0.05) # norm is out_features * 1 - V_norm = self.V.data / \ + v_norm = self.V.data / \ self.V.data.norm(2, 1).expand_as(self.V.data) # batch_size * out_features - x_init = F.linear(x, Variable(V_norm)).data + x_init = F.linear(x, Variable(v_norm)).data # out_features m_init, v_init = x_init.mean(0).squeeze( 0), x_init.var(0).squeeze(0) @@ -119,10 +119,10 @@ def forward(self, x, init=False): # out_channels, in_channels // groups, * kernel_size self.V.data.copy_(torch.randn(self.V.data.size() ).type_as(self.V.data) * 0.05) - V_norm = self.V.data / self.V.data.view(self.out_channels, -1)\ + v_norm = self.V.data / self.V.data.view(self.out_channels, -1)\ .norm(2, 1).view(self.out_channels, *( [1] * (len(self.kernel_size) + 1))).expand_as(self.V.data) - x_init = F.conv2d(x, Variable(V_norm), None, self.stride, + x_init = F.conv2d(x, Variable(v_norm), None, self.stride, self.padding, self.dilation, self.groups).data t_x_init = x_init.transpose(0, 1).contiguous().view( self.out_channels, -1) @@ -144,20 +144,20 @@ def forward(self, x, init=False): self.b_avg.copy_(self.b.data) return Variable(x_init) else: - V, g, b = get_vars_maybe_avg( + v, g, b = get_vars_maybe_avg( self, ['V', 'g', 'b'], self.training, polyak_decay=self.polyak_decay) - scalar = torch.norm(V.view(self.out_channels, -1), 2, 1) + scalar = torch.norm(v.view(self.out_channels, -1), 2, 1) if len(scalar.size()) == 2: scalar = g / scalar.squeeze(1) else: scalar = g / scalar - W = scalar.view(self.out_channels, * - ([1] * (len(V.size()) - 1))).expand_as(V) * V + w = scalar.view(self.out_channels, * + ([1] * (len(v.size()) - 1))).expand_as(v) * v - x = F.conv2d(x, W, b, self.stride, + x = F.conv2d(x, w, b, self.stride, self.padding, self.dilation, self.groups) return x @@ -192,12 +192,12 @@ def forward(self, x, init=False): # in_channels, out_channels, *kernel_size self.V.data.copy_(torch.randn(self.V.data.size()).type_as( self.V.data) * 0.05) - V_norm = self.V.data / self.V.data.transpose(0, 1).contiguous() \ + v_norm = self.V.data / self.V.data.transpose(0, 1).contiguous() \ .view(self.out_channels, -1).norm(2, 1).view( self.in_channels, self.out_channels, *([1] * len(self.kernel_size))).expand_as(self.V.data) x_init = F.conv_transpose2d( - x, Variable(V_norm), None, self.stride, + x, Variable(v_norm), None, self.stride, self.padding, self.output_padding, self.groups).data # self.out_channels, 1 t_x_init = x_init.tranpose(0, 1).contiguous().view( @@ -228,10 +228,10 @@ def forward(self, x, init=False): scalar = g / \ torch.norm(V.transpose(0, 1).contiguous().view( self.out_channels, -1), 2, 1).squeeze(1) - W = scalar.view(self.in_channels, self.out_channels, + w = scalar.view(self.in_channels, self.out_channels, *([1] * (len(V.size()) - 2))).expand_as(V) * V - x = F.conv_transpose2d(x, W, b, self.stride, + x = F.conv_transpose2d(x, w, b, self.stride, self.padding, self.output_padding, self.groups) return x diff --git a/onmt/modules/__init__.py b/onmt/modules/__init__.py index c581f04706..fb95e49ee9 100644 --- a/onmt/modules/__init__.py +++ b/onmt/modules/__init__.py @@ -1,6 +1,6 @@ from onmt.modules.UtilClass import LayerNorm, Bottle, BottleLinear, \ BottleLayerNorm, BottleSoftmax, Elementwise -from onmt.modules.Gate import ContextGateFactory +from onmt.modules.Gate import context_gate_factory from onmt.modules.GlobalAttention import GlobalAttention from onmt.modules.ConvMultiStepAttention import ConvMultiStepAttention from onmt.modules.ImageEncoder import ImageEncoder @@ -25,8 +25,8 @@ LayerNorm, Bottle, BottleLinear, BottleLayerNorm, BottleSoftmax, TransformerEncoder, TransformerDecoder, Embeddings, Elementwise, MatrixTree, WeightNormConv2d, ConvMultiStepAttention, - CNNEncoder, CNNDecoder, StackedLSTM, StackedGRU, ContextGateFactory, - CopyGeneratorLossCompute, AudioEncoder] + CNNEncoder, CNNDecoder, StackedLSTM, StackedGRU, + context_gate_factory, CopyGeneratorLossCompute, AudioEncoder] if can_use_sru: __all__.extend([SRU, check_sru_requirement]) diff --git a/onmt/translate/Beam.py b/onmt/translate/Beam.py index c2f7b7e56a..5ab3a58f4d 100644 --- a/onmt/translate/Beam.py +++ b/onmt/translate/Beam.py @@ -18,19 +18,19 @@ def __init__(self, size, pad, bos, eos, # The score for each translation on the beam. self.scores = self.tt.FloatTensor(size).zero_() - self.allScores = [] + self.all_scores = [] # The backpointers at each time-step. - self.prevKs = [] + self.prev_ks = [] # The outputs at each time-step. - self.nextYs = [self.tt.LongTensor(size) - .fill_(pad)] - self.nextYs[0][0] = bos + self.next_ys = [self.tt.LongTensor(size) + .fill_(pad)] + self.next_ys[0][0] = bos # Has EOS topped the beam yet. self._eos = eos - self.eosTop = False + self.eos_top = False # The attentions (matrix) for each time. self.attn = [] @@ -40,98 +40,100 @@ def __init__(self, size, pad, bos, eos, self.n_best = n_best # Information for global scoring. - self.globalScorer = global_scorer - self.globalState = {} + self.global_scorer = global_scorer + self.global_state = {} - def getCurrentState(self): + def get_current_state(self): "Get the outputs for the current timestep." - return self.nextYs[-1] + return self.next_ys[-1] - def getCurrentOrigin(self): + def get_current_origin(self): "Get the backpointers for the current timestep." - return self.prevKs[-1] + return self.prev_ks[-1] - def advance(self, wordLk, attnOut): + def advance(self, word_probs, attn_out): """ Given prob over words for every last beam `wordLk` and attention - `attnOut`: Compute and update the beam search. + `attn_out`: Compute and update the beam search. Parameters: - * `wordLk`- probs of advancing from the last step (K x words) - * `attnOut`- attention at the last step + * `word_probs`- probs of advancing from the last step (K x words) + * `attn_out`- attention at the last step Returns: True if beam search is complete. """ - numWords = wordLk.size(1) + num_words = word_probs.size(1) # Sum the previous scores. - if len(self.prevKs) > 0: - beamLk = wordLk + self.scores.unsqueeze(1).expand_as(wordLk) + if len(self.prev_ks) > 0: + beam_scores = word_probs + \ + self.scores.unsqueeze(1).expand_as(word_probs) # Don't let EOS have children. - for i in range(self.nextYs[-1].size(0)): - if self.nextYs[-1][i] == self._eos: - beamLk[i] = -1e20 + for i in range(self.next_ys[-1].size(0)): + if self.next_ys[-1][i] == self._eos: + beam_scores[i] = -1e20 else: - beamLk = wordLk[0] - flatBeamLk = beamLk.view(-1) - bestScores, bestScoresId = flatBeamLk.topk(self.size, 0, True, True) + beam_scores = word_probs[0] + flat_beam_scores = beam_scores.view(-1) + best_scores, best_scores_id = flat_beam_scores.topk(self.size, 0, + True, True) - self.allScores.append(self.scores) - self.scores = bestScores + self.all_scores.append(self.scores) + self.scores = best_scores - # bestScoresId is flattened beam x word array, so calculate which + # best_scores_id is flattened beam x word array, so calculate which # word and beam each score came from - prevK = bestScoresId / numWords - self.prevKs.append(prevK) - self.nextYs.append((bestScoresId - prevK * numWords)) - self.attn.append(attnOut.index_select(0, prevK)) + prev_k = best_scores_id / num_words + self.prev_ks.append(prev_k) + self.next_ys.append((best_scores_id - prev_k * num_words)) + self.attn.append(attn_out.index_select(0, prev_k)) - if self.globalScorer is not None: - self.globalScorer.updateGlobalState(self) + if self.global_scorer is not None: + self.global_scorer.update_global_state(self) - for i in range(self.nextYs[-1].size(0)): - if self.nextYs[-1][i] == self._eos: + for i in range(self.next_ys[-1].size(0)): + if self.next_ys[-1][i] == self._eos: s = self.scores[i] - if self.globalScorer is not None: - globalScores = self.globalScorer.score(self, self.scores) - s = globalScores[i] - self.finished.append((s, len(self.nextYs) - 1, i)) + if self.global_scorer is not None: + global_scores = self.global_scorer.score(self, self.scores) + s = global_scores[i] + self.finished.append((s, len(self.next_ys) - 1, i)) # End condition is when top-of-beam is EOS and no global score. - if self.nextYs[-1][0] == self._eos: - # self.allScores.append(self.scores) - self.eosTop = True + if self.next_ys[-1][0] == self._eos: + # self.all_scores.append(self.scores) + self.eos_top = True def done(self): - return self.eosTop and len(self.finished) >= self.n_best + return self.eos_top and len(self.finished) >= self.n_best - def sortFinished(self, minimum=None): + def sort_finished(self, minimum=None): if minimum is not None: i = 0 # Add from beam until we have minimum outputs. while len(self.finished) < minimum: s = self.scores[i] - if self.globalScorer is not None: - globalScores = self.globalScorer.score(self, self.scores) - s = globalScores[i] - self.finished.append((s, len(self.nextYs) - 1, i)) + if self.global_scorer is not None: + global_scores = self.global_scorer.score(self, self.scores) + s = global_scores[i] + self.finished.append((s, len(self.next_ys) - 1, i)) self.finished.sort(key=lambda a: -a[0]) scores = [sc for sc, _, _ in self.finished] ks = [(t, k) for _, t, k in self.finished] return scores, ks - def getHyp(self, timestep, k): + def get_hyp(self, timestep, k): """ Walk back to construct the full hypothesis. """ hyp, attn = [], [] - for j in range(len(self.prevKs[:timestep]) - 1, -1, -1): - hyp.append(self.nextYs[j+1][k]) + for j in range(len(self.prev_ks[:timestep]) - 1, -1, -1): + hyp.append(self.next_ys[j+1][k]) attn.append(self.attn[j][k]) - k = self.prevKs[j][k] + k = self.prev_ks[j][k] return hyp[::-1], torch.stack(attn[::-1]) @@ -145,16 +147,16 @@ def __init__(self, alpha, beta): def score(self, beam, logprobs): "Additional term add to log probability" - cov = beam.globalState["coverage"] + cov = beam.global_state["coverage"] pen = self.beta * torch.min(cov, cov.clone().fill_(1.0)).log().sum(1) - l_term = (((5 + len(beam.nextYs)) ** self.alpha) / + l_term = (((5 + len(beam.next_ys)) ** self.alpha) / ((5 + 1) ** self.alpha)) return (logprobs / l_term) + pen - def updateGlobalState(self, beam): + def update_global_state(self, beam): "Keeps the coverage vector as sum of attens" - if len(beam.prevKs) == 1: - beam.globalState["coverage"] = beam.attn[-1] + if len(beam.prev_ks) == 1: + beam.global_state["coverage"] = beam.attn[-1] else: - beam.globalState["coverage"] = beam.globalState["coverage"] \ - .index_select(0, beam.prevKs[-1]).add(beam.attn[-1]) + beam.global_state["coverage"] = beam.global_state["coverage"] \ + .index_select(0, beam.prev_ks[-1]).add(beam.attn[-1]) diff --git a/onmt/translate/Translation.py b/onmt/translate/Translation.py index 2a9738e00f..4902b89e91 100644 --- a/onmt/translate/Translation.py +++ b/onmt/translate/Translation.py @@ -10,7 +10,7 @@ def __init__(self, data, fields, n_best, replace_unk, has_tgt): self.replace_unk = replace_unk self.has_tgt = has_tgt - def _buildTargetTokens(self, src, src_vocab, src_raw, pred, attn): + def _build_target_tokens(self, src, src_vocab, src_raw, pred, attn): vocab = self.fields["tgt"].vocab tokens = [] for tok in pred: @@ -28,13 +28,13 @@ def _buildTargetTokens(self, src, src_vocab, src_raw, pred, attn): tokens[i] = src_raw[maxIndex[0]] return tokens - def fromBatch(self, translation_batch): + def from_batch(self, translation_batch): batch = translation_batch["batch"] assert(len(translation_batch["gold_score"]) == len(translation_batch["predictions"])) batch_size = batch.batch_size - preds, predScore, attn, goldScore, indices = list(zip( + preds, predScore, attn, gold_score, indices = list(zip( *sorted(zip(translation_batch["predictions"], translation_batch["scores"], translation_batch["attention"], @@ -63,19 +63,19 @@ def fromBatch(self, translation_batch): else: src_vocab = None src_raw = None - pred_sents = [self._buildTargetTokens( + pred_sents = [self._build_target_tokens( src[:, b], src_vocab, src_raw, preds[b][n], attn[b][n]) for n in range(self.n_best)] gold_sent = None if tgt is not None: - gold_sent = self._buildTargetTokens( + gold_sent = self._build_target_tokens( src[:, b], src_vocab, src_raw, tgt[1:, b] if tgt is not None else None, None) translation = Translation(src[:, b], src_raw, pred_sents, attn[b], predScore[b], gold_sent, - goldScore[b]) + gold_score[b]) translations.append(translation) return translations diff --git a/onmt/translate/Translator.py b/onmt/translate/Translator.py index efaaf2f32b..c7649f6ff4 100644 --- a/onmt/translate/Translator.py +++ b/onmt/translate/Translator.py @@ -29,7 +29,7 @@ def __init__(self, model, fields, "scores": [], "log_probs": []} - def translateBatch(self, batch, data): + def translate_batch(self, batch, data): # (0) Prep each of the components of the search. # And helper method for reducing verbosity. beam_size = self.beam_size @@ -61,9 +61,9 @@ def unbottle(m): if data_type == 'text': _, src_lengths = batch.src - encStates, context = self.model.encoder(src, src_lengths) - decStates = self.model.decoder.init_decoder_state( - src, context, encStates) + enc_states, context = self.model.encoder(src, src_lengths) + dec_states = self.model.decoder.init_decoder_state( + src, context, enc_states) if src_lengths is None: src_lengths = torch.Tensor(batch_size).type_as(context.data)\ @@ -71,10 +71,10 @@ def unbottle(m): .fill_(context.size(0)) # (2) Repeat src objects `beam_size` times. - srcMap = rvar(batch.src_map.data) if data_type == 'text' else None + src_map = rvar(batch.src_map.data) if data_type == 'text' else None context = rvar(context.data) context_lengths = src_lengths.repeat(beam_size) - decStates.repeat_beam_size_times(beam_size) + dec_states.repeat_beam_size_times(beam_size) # (3) run the decoder to generate sentences, using beam search. for i in range(self.max_length): @@ -83,7 +83,7 @@ def unbottle(m): # Construct batch x beam_size nxt words. # Get all the pending current beam words and arrange for forward. - inp = var(torch.stack([b.getCurrentState() for b in beam]) + inp = var(torch.stack([b.get_current_state() for b in beam]) .t().contiguous().view(1, -1)) # Turn any copied words to UNKs @@ -97,20 +97,20 @@ def unbottle(m): inp = inp.unsqueeze(2) # Run one step. - decOut, decStates, attn = self.model.decoder( - inp, context, decStates, context_lengths=context_lengths) - decOut = decOut.squeeze(0) - # decOut: beam x rnn_size + dec_out, dec_states, attn = self.model.decoder( + inp, context, dec_states, context_lengths=context_lengths) + dec_out = dec_out.squeeze(0) + # dec_out: beam x rnn_size # (b) Compute a vector of batch*beam word scores. if not self.copy_attn: - out = self.model.generator.forward(decOut).data + out = self.model.generator.forward(dec_out).data out = unbottle(out) # beam x tgt_vocab else: - out = self.model.generator.forward(decOut, + out = self.model.generator.forward(dec_out, attn["copy"].squeeze(0), - srcMap) + src_map) # beam x (tgt_vocab + extra_vocab) out = data.collapse_copy_scores( unbottle(out.data), @@ -123,26 +123,26 @@ def unbottle(m): b.advance( out[:, j], unbottle(attn["std"]).data[:, j, :context_lengths[j]]) - decStates.beam_update(j, b.getCurrentOrigin(), beam_size) + dec_states.beam_update(j, b.get_current_origin(), beam_size) # (4) Extract sentences from beam. - ret = self._fromBeam(beam) + ret = self._from_beam(beam) ret["gold_score"] = [0] * batch_size if "tgt" in batch.__dict__: - ret["gold_score"] = self._runTarget(batch, data) + ret["gold_score"] = self._run_target(batch, data) ret["batch"] = batch return ret - def _fromBeam(self, beam): + def _from_beam(self, beam): ret = {"predictions": [], "scores": [], "attention": []} for b in beam: n_best = self.n_best - scores, ks = b.sortFinished(minimum=n_best) + scores, ks = b.sort_finished(minimum=n_best) hyps, attn = [], [] for i, (times, k) in enumerate(ks[:n_best]): - hyp, att = b.getHyp(times, k) + hyp, att = b.get_hyp(times, k) hyps.append(hyp) attn.append(att) ret["predictions"].append(hyps) @@ -150,7 +150,7 @@ def _fromBeam(self, beam): ret["attention"].append(attn) return ret - def _runTarget(self, batch, data): + def _run_target(self, batch, data): data_type = data.data_type if data_type == 'text': _, src_lengths = batch.src @@ -160,23 +160,23 @@ def _runTarget(self, batch, data): tgt_in = onmt.io.make_features(batch, 'tgt')[:-1] # (1) run the encoder on the src - encStates, context = self.model.encoder(src, src_lengths) - decStates = self.model.decoder.init_decoder_state( - src, context, encStates) + enc_states, context = self.model.encoder(src, src_lengths) + dec_states = self.model.decoder.init_decoder_state(src, + context, enc_states) # (2) if a target is specified, compute the 'goldScore' # (i.e. log likelihood) of the target under the model tt = torch.cuda if self.cuda else torch - goldScores = tt.FloatTensor(batch.batch_size).fill_(0) - decOut, decStates, attn = self.model.decoder( - tgt_in, context, decStates, context_lengths=src_lengths) + gold_scores = tt.FloatTensor(batch.batch_size).fill_(0) + dec_out, dec_states, attn = self.model.decoder( + tgt_in, context, dec_states, context_lengths=src_lengths) tgt_pad = self.fields["tgt"].vocab.stoi[onmt.io.PAD_WORD] - for dec, tgt in zip(decOut, batch.tgt[1:].data): + for dec, tgt in zip(dec_out, batch.tgt[1:].data): # Log prob of each word. out = self.model.generator.forward(dec) tgt = tgt.unsqueeze(1) scores = out.data.gather(1, tgt) scores.masked_fill_(tgt.eq(tgt_pad), 0) - goldScores += scores - return goldScores + gold_scores += scores + return gold_scores diff --git a/test/test_models.py b/test/test_models.py index 0a79d358bb..41b0d53bd0 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -34,60 +34,61 @@ def get_vocab(self): src.build_vocab([]) return src.vocab - def get_batch(self, sourceL=3, bsize=1): + def get_batch(self, source_l=3, bsize=1): # len x batch x nfeat - test_src = Variable(torch.ones(sourceL, bsize, 1)).long() - test_tgt = Variable(torch.ones(sourceL, bsize, 1)).long() - test_length = torch.ones(bsize).fill_(sourceL) + test_src = Variable(torch.ones(source_l, bsize, 1)).long() + test_tgt = Variable(torch.ones(source_l, bsize, 1)).long() + test_length = torch.ones(bsize).fill_(source_l) return test_src, test_tgt, test_length - def get_batch_image(self, tgtL=3, bsize=1, h=15, w=17): + def get_batch_image(self, tgt_l=3, bsize=1, h=15, w=17): # batch x c x h x w test_src = Variable(torch.ones(bsize, 3, h, w)).float() - test_tgt = Variable(torch.ones(tgtL, bsize, 1)).long() + test_tgt = Variable(torch.ones(tgt_l, bsize, 1)).long() test_length = None return test_src, test_tgt, test_length - def get_batch_audio(self, tgtL=3, bsize=1, sample_rate=5500, + def get_batch_audio(self, tgt_l=3, bsize=1, sample_rate=5500, window_size=0.03, t=37): # batch x 1 x nfft x t nfft = int(math.floor((sample_rate * window_size) / 2) + 1) test_src = Variable(torch.ones(bsize, 1, nfft, t)).float() - test_tgt = Variable(torch.ones(tgtL, bsize, 1)).long() + test_tgt = Variable(torch.ones(tgt_l, bsize, 1)).long() test_length = None return test_src, test_tgt, test_length - def embeddings_forward(self, opt, sourceL=3, bsize=1): + def embeddings_forward(self, opt, source_l=3, bsize=1): ''' Tests if the embeddings works as expected args: opt: set of options - sourceL: Length of generated input sentence + source_l: Length of generated input sentence bsize: Batchsize of generated input ''' word_dict = self.get_vocab() feature_dicts = [] emb = make_embeddings(opt, word_dict, feature_dicts) - test_src, _, __ = self.get_batch(sourceL=sourceL, + test_src, _, __ = self.get_batch(source_l=source_l, bsize=bsize) if opt.decoder_type == 'transformer': input = torch.cat([test_src, test_src], 0) res = emb(input) - compare_to = torch.zeros(sourceL * 2, bsize, opt.src_word_vec_size) + compare_to = torch.zeros(source_l * 2, bsize, + opt.src_word_vec_size) else: res = emb(test_src) - compare_to = torch.zeros(sourceL, bsize, opt.src_word_vec_size) + compare_to = torch.zeros(source_l, bsize, opt.src_word_vec_size) self.assertEqual(res.size(), compare_to.size()) - def encoder_forward(self, opt, sourceL=3, bsize=1): + def encoder_forward(self, opt, source_l=3, bsize=1): ''' Tests if the encoder works as expected args: opt: set of options - sourceL: Length of generated input sentence + source_l: Length of generated input sentence bsize: Batchsize of generated input ''' word_dict = self.get_vocab() @@ -95,14 +96,14 @@ def encoder_forward(self, opt, sourceL=3, bsize=1): embeddings = make_embeddings(opt, word_dict, feature_dicts) enc = make_encoder(opt, embeddings) - test_src, test_tgt, test_length = self.get_batch(sourceL=sourceL, + test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) hidden_t, outputs = enc(test_src, test_length) # Initialize vectors to compare size with test_hid = torch.zeros(self.opt.enc_layers, bsize, opt.rnn_size) - test_out = torch.zeros(sourceL, bsize, opt.rnn_size) + test_out = torch.zeros(source_l, bsize, opt.rnn_size) # Ensure correct sizes and types self.assertEqual(test_hid.size(), @@ -112,14 +113,14 @@ def encoder_forward(self, opt, sourceL=3, bsize=1): self.assertEqual(type(outputs), torch.autograd.Variable) self.assertEqual(type(outputs.data), torch.FloatTensor) - def nmtmodel_forward(self, opt, sourceL=3, bsize=1): + def nmtmodel_forward(self, opt, source_l=3, bsize=1): """ Creates a nmtmodel with a custom opt function. Forwards a testbatch and checks output size. Args: opt: Namespace with options - sourceL: length of input sequence + source_l: length of input sequence bsize: batchsize """ word_dict = self.get_vocab() @@ -134,25 +135,25 @@ def nmtmodel_forward(self, opt, sourceL=3, bsize=1): model = onmt.Models.NMTModel(enc, dec) - test_src, test_tgt, test_length = self.get_batch(sourceL=sourceL, + test_src, test_tgt, test_length = self.get_batch(source_l=source_l, bsize=bsize) outputs, attn, _ = model(test_src, test_tgt, test_length) - outputsize = torch.zeros(sourceL - 1, bsize, opt.rnn_size) + outputsize = torch.zeros(source_l - 1, bsize, opt.rnn_size) # Make sure that output has the correct size and type self.assertEqual(outputs.size(), outputsize.size()) self.assertEqual(type(outputs), torch.autograd.Variable) self.assertEqual(type(outputs.data), torch.FloatTensor) - def imagemodel_forward(self, opt, tgtL=2, bsize=1, h=15, w=17): + def imagemodel_forward(self, opt, tgt_l=2, bsize=1, h=15, w=17): """ Creates an image-to-text nmtmodel with a custom opt function. Forwards a testbatch and checks output size. Args: opt: Namespace with options - sourceL: length of input sequence + source_l: length of input sequence bsize: batchsize """ if opt.encoder_type == 'transformer' or opt.encoder_type == 'cnn': @@ -175,24 +176,24 @@ def imagemodel_forward(self, opt, tgtL=2, bsize=1, h=15, w=17): test_src, test_tgt, test_length = self.get_batch_image( h=h, w=w, bsize=bsize, - tgtL=tgtL) + tgt_l=tgt_l) outputs, attn, _ = model(test_src, test_tgt, test_length) - outputsize = torch.zeros(tgtL - 1, bsize, opt.rnn_size) + outputsize = torch.zeros(tgt_l - 1, bsize, opt.rnn_size) # Make sure that output has the correct size and type self.assertEqual(outputs.size(), outputsize.size()) self.assertEqual(type(outputs), torch.autograd.Variable) self.assertEqual(type(outputs.data), torch.FloatTensor) - def audiomodel_forward(self, opt, tgtL=2, bsize=1, t=37): + def audiomodel_forward(self, opt, tgt_l=2, bsize=1, t=37): """ Creates a speech-to-text nmtmodel with a custom opt function. Forwards a testbatch and checks output size. Args: opt: Namespace with options - sourceL: length of input sequence + source_l: length of input sequence bsize: batchsize """ if opt.encoder_type == 'transformer' or opt.encoder_type == 'cnn': @@ -218,36 +219,37 @@ def audiomodel_forward(self, opt, tgtL=2, bsize=1, t=37): bsize=bsize, sample_rate=opt.sample_rate, window_size=opt.window_size, - t=t, tgtL=tgtL) + t=t, tgt_l=tgt_l) outputs, attn, _ = model(test_src, test_tgt, test_length) - outputsize = torch.zeros(tgtL - 1, bsize, opt.rnn_size) + outputsize = torch.zeros(tgt_l - 1, bsize, opt.rnn_size) # Make sure that output has the correct size and type self.assertEqual(outputs.size(), outputsize.size()) self.assertEqual(type(outputs), torch.autograd.Variable) self.assertEqual(type(outputs.data), torch.FloatTensor) -def _add_test(paramSetting, methodname): +def _add_test(param_setting, methodname): """ Adds a Test to TestModel according to settings Args: - paramSetting: list of tuples of (param, setting) + param_setting: list of tuples of (param, setting) methodname: name of the method that gets called """ def test_method(self): - if paramSetting: + if param_setting: opt = copy.deepcopy(self.opt) - for param, setting in paramSetting: + for param, setting in param_setting: setattr(opt, param, setting) else: opt = self.opt getattr(self, methodname)(opt) - if paramSetting: - name = 'test_' + methodname + "_" + "_".join(str(paramSetting).split()) + if param_setting: + name = 'test_' + methodname + "_" + "_".join( + str(param_setting).split()) else: name = 'test_' + methodname + '_standard' setattr(TestModel, name, test_method) diff --git a/test/test_preprocess.py b/test/test_preprocess.py index 1918368228..74a4d4640e 100644 --- a/test/test_preprocess.py +++ b/test/test_preprocess.py @@ -57,25 +57,26 @@ def test_merge_vocab(self): self.assertTrue('b' in merged.itos) -def _add_test(paramSetting, methodname): +def _add_test(param_setting, methodname): """ Adds a Test to TestData according to settings Args: - paramSetting: list of tuples of (param, setting) + param_setting: list of tuples of (param, setting) methodname: name of the method that gets called """ def test_method(self): - if paramSetting: + if param_setting: opt = copy.deepcopy(self.opt) - for param, setting in paramSetting: + for param, setting in param_setting: setattr(opt, param, setting) else: opt = self.opt getattr(self, methodname)(opt) - if paramSetting: - name = 'test_' + methodname + "_" + "_".join(str(paramSetting).split()) + if param_setting: + name = 'test_' + methodname + "_" + "_".join( + str(param_setting).split()) else: name = 'test_' + methodname + '_standard' setattr(TestData, name, test_method) diff --git a/translate.py b/translate.py index 1f31cabc07..5bd7b9bfa5 100755 --- a/translate.py +++ b/translate.py @@ -76,8 +76,8 @@ def main(): gold_score_total, gold_words_total = 0, 0 for batch in test_data: - batch_data = translator.translateBatch(batch, data) - translations = builder.fromBatch(batch_data) + batch_data = translator.translate_batch(batch, data) + translations = builder.from_batch(batch_data) for trans in translations: pred_score_total += trans.pred_scores[0]