Skip to content

Commit

Permalink
Merge branch 'master' into newdistrib
Browse files Browse the repository at this point in the history
  • Loading branch information
vince62s authored Sep 27, 2018
2 parents a607791 + 195f5ae commit 13386e5
Show file tree
Hide file tree
Showing 23 changed files with 287 additions and 2,108 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ before_install:
# Useful for debugging any issues with conda
- conda info -a
# freeze the supported pytorch version for consistency
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytorch=0.4.0 -c soumith
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytorch=0.4.1 cuda92 -c pytorch
- source activate test-environment
# use requirements.txt for dependencies
- pip install -r requirements.txt
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This is a [Pytorch](https://github.com/pytorch/pytorch)
port of [OpenNMT](https://github.com/OpenNMT/OpenNMT),
an open-source (MIT) neural machine translation system. It is designed to be research friendly to try out new ideas in translation, summary, image-to-text, morphology, and many other domains.

Codebase is relatively stable, but PyTorch is still evolving. We currently only support PyTorch 0.4 and recommend forking if you need to have stable code.
Codebase is relatively stable, but PyTorch is still evolving. We currently only support PyTorch 0.4.1 and recommend forking if you need to have stable code.

OpenNMT-py is run as a collaborative open-source project. It is maintained by [Sasha Rush](http://github.com/srush) (Cambridge, MA), [Ben Peters](http://github.com/bpopeters) (Lisbon), and [Jianyu Zhan](http://github.com/jianyuzhan) (Shanghai). The original code was written by [Adam Lerer](http://github.com/adamlerer) (NYC).
We love contributions. Please consult the Issues page for any [Contributions Welcome](https://github.com/OpenNMT/OpenNMT-py/issues?q=is%3Aissue+is%3Aopen+label%3A%22contributions+welcome%22) tagged post.
Expand All @@ -32,7 +32,7 @@ All dependencies can be installed via:
pip install -r requirements.txt
```

Note that we currently only support PyTorch 0.4.
Note that we currently only support PyTorch 0.4.1

## Features

Expand Down
31 changes: 16 additions & 15 deletions docs/source/Library.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
"import torch.nn as nn\n",
"\n",
"import onmt\n",
"import onmt.io\n",
"import onmt.modules"
"import onmt.inputters\n",
"import onmt.modules\n",
"import onmt.utils"
]
},
{
Expand All @@ -30,8 +31,8 @@
"outputs": [],
"source": [
"vocab = dict(torch.load(\"../../data/data.vocab.pt\"))\n",
"src_padding = vocab[\"src\"].stoi[onmt.io.PAD_WORD]\n",
"tgt_padding = vocab[\"tgt\"].stoi[onmt.io.PAD_WORD]"
"src_padding = vocab[\"src\"].stoi[onmt.inputters.PAD_WORD]\n",
"tgt_padding = vocab[\"tgt\"].stoi[onmt.inputters.PAD_WORD]"
]
},
{
Expand All @@ -53,22 +54,22 @@
"encoder_embeddings = onmt.modules.Embeddings(emb_size, len(vocab[\"src\"]),\n",
" word_padding_idx=src_padding)\n",
"\n",
"encoder = onmt.modules.RNNEncoder(hidden_size=rnn_size, num_layers=1, \n",
"encoder = onmt.encoders.RNNEncoder(hidden_size=rnn_size, num_layers=1, \n",
" rnn_type=\"LSTM\", bidirectional=True,\n",
" embeddings=encoder_embeddings)\n",
"\n",
"decoder_embeddings = onmt.modules.Embeddings(emb_size, len(vocab[\"tgt\"]),\n",
" word_padding_idx=tgt_padding)\n",
"decoder = onmt.modules.InputFeedRNNDecoder(hidden_size=rnn_size, num_layers=1, \n",
"decoder = onmt.decoders.decoder.InputFeedRNNDecoder(hidden_size=rnn_size, num_layers=1, \n",
" bidirectional_encoder=True,\n",
" rnn_type=\"LSTM\", embeddings=decoder_embeddings)\n",
"model = onmt.modules.NMTModel(encoder, decoder)\n",
"model = onmt.models.model.NMTModel(encoder, decoder)\n",
"\n",
"# Specify the tgt word generator and loss computation module\n",
"model.generator = nn.Sequential( \n",
" nn.Linear(rnn_size, len(vocab[\"tgt\"])), \n",
" nn.LogSoftmax())\n",
"loss = onmt.Loss.NMTLossCompute(model.generator, vocab[\"tgt\"]) "
"loss = onmt.utils.loss.NMTLossCompute(model.generator, vocab[\"tgt\"]) "
]
},
{
Expand All @@ -84,8 +85,8 @@
"metadata": {},
"outputs": [],
"source": [
"optim = onmt.Optim(method=\"sgd\", lr=1, max_grad_norm=2)\n",
"optim.set_parameters(model.parameters())"
"optim = onmt.utils.optimizers.Optimizer(method=\"sgd\", lr=1, max_grad_norm=2)\n",
"optim.set_parameters(model.named_parameters())"
]
},
{
Expand All @@ -102,8 +103,8 @@
"outputs": [],
"source": [
"# Load some data\n",
"data = torch.load(\"../../data/data.train.pt\")\n",
"valid_data = torch.load(\"../../data/data.valid.pt\")\n",
"data = torch.load(\"../../data/data.train.1.pt\")\n",
"valid_data = torch.load(\"../../data/data.valid.1.pt\")\n",
"data.load_fields(vocab)\n",
"valid_data.load_fields(vocab)\n",
"data.examples = data.examples[:100] "
Expand All @@ -122,11 +123,11 @@
"metadata": {},
"outputs": [],
"source": [
"train_iter = onmt.io.OrderedIterator( \n",
"train_iter = onmt.inputters.OrderedIterator( \n",
" dataset=data, batch_size=10, \n",
" device=-1, \n",
" repeat=False)\n",
"valid_iter = onmt.io.OrderedIterator( \n",
"valid_iter = onmt.inputters.OrderedIterator( \n",
" dataset=valid_data, batch_size=10, \n",
" device=-1,\n",
" train=False) "
Expand Down Expand Up @@ -176,7 +177,7 @@
}
],
"source": [
"trainer = onmt.Trainer(model, train_iter, valid_iter, loss, loss, optim)\n",
"trainer = onmt.Trainer(model, loss, loss, optim)\n",
"\n",
"def report_func(*args):\n",
" stats = args[-1]\n",
Expand Down
29 changes: 15 additions & 14 deletions docs/source/Library.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,18 @@ import torch
import torch.nn as nn

import onmt
import onmt.io
import onmt.inputters
import onmt.modules
import onmt.utils
```

We begin by loading in the vocabulary for the model of interest. This will let us check vocab size and to get the special ids for padding.


```python
vocab = dict(torch.load("../../data/data.vocab.pt"))
src_padding = vocab["src"].stoi[onmt.io.PAD_WORD]
tgt_padding = vocab["tgt"].stoi[onmt.io.PAD_WORD]
src_padding = vocab["src"].stoi[onmt.inputters.PAD_WORD]
tgt_padding = vocab["tgt"].stoi[onmt.inputters.PAD_WORD]
```

Next we specify the core model itself. Here we will build a small model with an encoder and an attention based input feeding decoder. Both models will be RNNs and the encoder will be bidirectional
Expand All @@ -35,39 +36,39 @@ rnn_size = 6
encoder_embeddings = onmt.modules.Embeddings(emb_size, len(vocab["src"]),
word_padding_idx=src_padding)

encoder = onmt.modules.RNNEncoder(hidden_size=rnn_size, num_layers=1,
encoder = onmt.encoders.RNNEncoder(hidden_size=rnn_size, num_layers=1,
rnn_type="LSTM", bidirectional=True,
embeddings=encoder_embeddings)

decoder_embeddings = onmt.modules.Embeddings(emb_size, len(vocab["tgt"]),
word_padding_idx=tgt_padding)
decoder = onmt.modules.InputFeedRNNDecoder(hidden_size=rnn_size, num_layers=1,
decoder = onmt.decoders.decoder.InputFeedRNNDecoder(hidden_size=rnn_size, num_layers=1,
bidirectional_encoder=True,
rnn_type="LSTM", embeddings=decoder_embeddings)
model = onmt.modules.NMTModel(encoder, decoder)
model = onmt.models.model.NMTModel(encoder, decoder)

# Specify the tgt word generator and loss computation module
model.generator = nn.Sequential(
nn.Linear(rnn_size, len(vocab["tgt"])),
nn.LogSoftmax())
loss = onmt.Loss.NMTLossCompute(model.generator, vocab["tgt"])
loss = onmt.utils.loss.NMTLossCompute(model.generator, vocab["tgt"])
```

Now we set up the optimizer. This could be a core torch optim class, or our wrapper which handles learning rate updates and gradient normalization automatically.


```python
optim = onmt.Optim(method="sgd", lr=1, max_grad_norm=2)
optim.set_parameters(model.parameters())
optim = onmt.utils.optimizers.Optimizer(method="sgd", learning_rate=1, max_grad_norm=2)
optim.set_parameters(model.named_parameters())
```

Now we load the data from disk. Currently will need to call a function to load the fields into the data as well.


```python
# Load some data
data = torch.load("../../data/data.train.pt")
valid_data = torch.load("../../data/data.valid.pt")
data = torch.load("../../data/data.train.1.pt")
valid_data = torch.load("../../data/data.valid.1.pt")
data.load_fields(vocab)
valid_data.load_fields(vocab)
data.examples = data.examples[:100]
Expand All @@ -77,11 +78,11 @@ To iterate through the data itself we use a torchtext iterator class. We specify


```python
train_iter = onmt.io.OrderedIterator(
train_iter = onmt.inputters.OrderedIterator(
dataset=data, batch_size=10,
device=-1,
repeat=False)
valid_iter = onmt.io.OrderedIterator(
valid_iter = onmt.inputters.OrderedIterator(
dataset=valid_data, batch_size=10,
device=-1,
train=False)
Expand All @@ -91,7 +92,7 @@ Finally we train.


```python
trainer = onmt.Trainer(model, train_iter, valid_iter, loss, loss, optim)
trainer = onmt.Trainer(model, loss, loss, optim)

def report_func(*args):
stats = args[-1]
Expand Down
5 changes: 3 additions & 2 deletions docs/source/im2text.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,15 @@ wget -O data/im2text.tgz http://lstm.seas.harvard.edu/latex/im2text_small.tgz; t
python preprocess.py -data_type img -src_dir data/im2text/images/ -train_src data/im2text/src-train.txt \
-train_tgt data/im2text/tgt-train.txt -valid_src data/im2text/src-val.txt \
-valid_tgt data/im2text/tgt-val.txt -save_data data/im2text/demo \
-tgt_seq_length 150 -tgt_words_min_frequency 2
-tgt_seq_length 150 -tgt_words_min_frequency 2 -shard_size 500 -image_channel_size 1
```

2) Train the model.

```
python train.py -model_type img -data data/im2text/demo -save_model demo-model -gpu_ranks 0 -batch_size 20 \
-max_grad_norm 20 -learning_rate 0.1 -word_vec_size 80 -encoder_type brnn
-max_grad_norm 20 -learning_rate 0.1 -word_vec_size 80 -encoder_type brnn -image_channel_size 1
```

3) Translate the images.
Expand Down
13 changes: 13 additions & 0 deletions docs/source/options/preprocess.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ For text corpus of large volume, it will be divided into shards of this size to
preprocess. If 0, the data will be handled as a whole. The unit is in bytes.
Optimal value should be multiples of 64 bytes.

* **-shard_size []**
Divide src_corpus and tgt_corpus into
smaller multiple src_copus and tgt corpus files, then
build shards, each shard will have
opt.shard_size samples except last shard.
shard_size=0 means no segmentation
shard_size>0 means segment dataset into multiple shards,
each shard has shard_size samples.

### **Vocab**:
* **-src_vocab []**
Path to an existing source vocabulary. Format: one word per line.
Expand Down Expand Up @@ -96,3 +105,7 @@ Window stride for spectrogram in seconds.

* **-window [hamming]**
Window type for spectrogram generation.

### **Image**:
* **-image_channel_size [3]**
Using grayscale image can training model faster and smaller.
4 changes: 4 additions & 0 deletions docs/source/options/train.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,3 +266,7 @@ Sample rate.

* **-window_size [0.02]**
Window size for spectrogram in seconds.

### **Image**:
* **-image_channel_size [3]**
Using grayscale image can training model faster and smaller.
4 changes: 4 additions & 0 deletions docs/source/options/translate.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,7 @@ Window stride for spectrogram in seconds

* **-window [hamming]**
Window type for spectrogram generation

### **Image**:
* **-image_channel_size [3]**
Using grayscale image can training model faster and smaller.
15 changes: 8 additions & 7 deletions onmt/encoders/image_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ class ImageEncoder(nn.Module):
dropout (float): dropout probablity.
"""

def __init__(self, num_layers, bidirectional, rnn_size, dropout):
def __init__(self, num_layers, bidirectional, rnn_size, dropout,
image_chanel_size=3):
super(ImageEncoder, self).__init__()
self.num_layers = num_layers
self.num_directions = 2 if bidirectional else 1
self.hidden_size = rnn_size

self.layer1 = nn.Conv2d(3, 64, kernel_size=(3, 3),
self.layer1 = nn.Conv2d(image_chanel_size, 64, kernel_size=(3, 3),
padding=(1, 1), stride=(1, 1))
self.layer2 = nn.Conv2d(64, 128, kernel_size=(3, 3),
padding=(1, 1), stride=(1, 1))
Expand All @@ -40,7 +41,7 @@ def __init__(self, num_layers, bidirectional, rnn_size, dropout):
self.batch_norm3 = nn.BatchNorm2d(512)

src_size = 512
self.rnn = nn.LSTM(src_size, rnn_size,
self.rnn = nn.LSTM(src_size, int(rnn_size / self.num_directions),
num_layers=num_layers,
dropout=dropout,
bidirectional=bidirectional)
Expand All @@ -56,7 +57,7 @@ def forward(self, src, lengths=None):
batch_size = src.size(0)
# (batch_size, 64, imgH, imgW)
# layer 1
src = F.relu(self.layer1(src[:, :, :, :]-0.5), True)
src = F.relu(self.layer1(src[:, :, :, :] - 0.5), True)

# (batch_size, 64, imgH/2, imgW/2)
src = F.max_pool2d(src, kernel_size=(2, 2), stride=(2, 2))
Expand Down Expand Up @@ -94,10 +95,10 @@ def forward(self, src, lengths=None):
# # (batch_size, 512, H, W)
all_outputs = []
for row in range(src.size(2)):
inp = src[:, :, row, :].transpose(0, 2)\
inp = src[:, :, row, :].transpose(0, 2) \
.transpose(1, 2)
row_vec = torch.Tensor(batch_size).type_as(inp.data)\
.long().fill_(row)
row_vec = torch.Tensor(batch_size).type_as(inp.data) \
.long().fill_(row)
pos_emb = self.pos_lut(row_vec)
with_pos = torch.cat(
(pos_emb.view(1, pos_emb.size(0), pos_emb.size(1)), inp), 0)
Expand Down
Loading

0 comments on commit 13386e5

Please sign in to comment.