|
| 1 | +require 'nn' |
| 2 | +toy = require 'toy' |
| 3 | + |
| 4 | +-- take command line arguments to control parameters |
| 5 | +cmd = torch.CmdLine() |
| 6 | +cmd:text() |
| 7 | +cmd:text('Train MLP') |
| 8 | +cmd:text() |
| 9 | +cmd:text('Options') |
| 10 | +cmd:option('-seed', os.time(), 'initial random seed (defult: current time)') |
| 11 | +cmd:option('-hidden', 25, 'hidden state size') |
| 12 | +cmd:option('-batch', 5, 'batch size') |
| 13 | +cmd:option('-rate', 0.03, 'learn rate') |
| 14 | +cmd:option('-iterations', 100, 'maximum number of iterations of SGD') |
| 15 | +cmd:option('-trained', 'trained_mlp_1.t7', 'filename for saved trained model') |
| 16 | +cmd:option('-grid', 'grid_predictions_mlp_1.csv', 'file name for saved grid predictions') |
| 17 | +cmd:text() |
| 18 | + |
| 19 | +-- parse input parameters |
| 20 | +params = cmd:parse(arg) |
| 21 | + |
| 22 | +-- set 'random seed' to make the result reproduceable |
| 23 | +torch.manualSeed(params.seed) |
| 24 | + |
| 25 | +--[[ |
| 26 | + read data |
| 27 | + N: number of rows of data |
| 28 | + n_inputs: number of input variables (all columns in data - target column) |
| 29 | + ]]-- |
| 30 | +d = torch.load('fixed_width_3.t7') |
| 31 | +N = d:size(1) |
| 32 | +n_inputs = d:size(2) - 1 |
| 33 | + |
| 34 | +-- separate data into inputs (x) and targets (y) |
| 35 | +x = d:narrow(2, 1, n_inputs) |
| 36 | +y = d:narrow(2, n_inputs + 1, 1) |
| 37 | + |
| 38 | +-- train/test split sizes |
| 39 | +test_frac = 0.3 |
| 40 | +n_test = torch.floor(N * test_frac) |
| 41 | +n_train = N - n_test |
| 42 | + |
| 43 | +-- train/test splits |
| 44 | +x_train = x:narrow(1, 1, n_train) |
| 45 | +y_train = y:narrow(1, 1, n_train) |
| 46 | + |
| 47 | +x_test = x:narrow(1, n_train + 1, n_test) |
| 48 | +y_test = y:narrow(1, n_train + 1, n_test) |
| 49 | + |
| 50 | +-- normalize training inputs |
| 51 | +norm_mean = x_train:mean() |
| 52 | +norm_std = x_train:std() |
| 53 | +x_train_n = (x_train - norm_mean) / norm_std |
| 54 | + |
| 55 | +-- normalize test inputs based on training data normalization values |
| 56 | +x_test_n = (x_test - norm_mean) / norm_std |
| 57 | + |
| 58 | +-- the nn SGD trainer needs a data structure where examples can be accessed via |
| 59 | +-- the index operator, [], and should have a size() method |
| 60 | +dataset = {} |
| 61 | +function dataset:size() |
| 62 | + return torch.floor(n_train / params.batch) |
| 63 | +end |
| 64 | +for i = 1, dataset:size() do |
| 65 | + local start = (i - 1) * params.batch + 1 |
| 66 | + dataset[i] = {x_train_n:narrow(1, start, params.batch), |
| 67 | + y_train:narrow(1, start, params.batch)} |
| 68 | +end |
| 69 | + |
| 70 | +-- set up the neural net |
| 71 | +n_hidden = params.hidden |
| 72 | +mlp = nn.Sequential() |
| 73 | +mlp:add(nn.Linear(n_inputs, n_hidden)) |
| 74 | +mlp:add(nn.Sigmoid()) |
| 75 | +mlp:add(nn.Linear(n_hidden, 1)) |
| 76 | + |
| 77 | +-- get all parameters packaged into a vector |
| 78 | +mlp_params = mlp:getParameters() |
| 79 | + |
| 80 | +-- we need our model to learn to predict real values target, so setting |
| 81 | +-- least-square loss as the objective function |
| 82 | +criterion = nn.MSECriterion() |
| 83 | + |
| 84 | +-- set up trainer to use SGD - Stochastic Gradient Descent |
| 85 | +trainer = nn.StochasticGradient(mlp, criterion) |
| 86 | +trainer.maxIteration = params.iterations |
| 87 | +trainer.learningRate = params.rate |
| 88 | +function trainer:hookIteration(iteration) |
| 89 | + print('# test error = ' .. criterion:forward(mlp:forward(x_test_n), y_test)) |
| 90 | +end |
| 91 | + |
| 92 | +-- train the model, after randomly initializing the parameters and clearing out |
| 93 | +-- any existing gradient. |
| 94 | +mlp_params:uniform(-0.1, 0.1) |
| 95 | +mlp:zeroGradParameters() |
| 96 | +print("parameter count: " .. mlp_params:size(1)) |
| 97 | +print("initial error before training = " .. criterion:forward(mlp:forward(x_test_n), y_test)) |
| 98 | +trainer:train(dataset) |
| 99 | + |
| 100 | +-- save the trained model |
| 101 | +torch.save(params.trained, {mlp = mlp, params = mlp_params}) |
| 102 | + |
| 103 | +-- Output predictions along a grid so we can see how well it learned the |
| 104 | +-- function. We'll generate inputs without noise so we can see how well it does |
| 105 | +-- in the absence of noise, which will give us a sense of whether it's learned |
| 106 | +-- the true underlying function. |
| 107 | +grid_size = 200 |
| 108 | +target_grid = torch.linspace(0, toy.max_target, grid_size):view(grid_size,1) |
| 109 | +inputs_grid = toy.target_to_inputs(target_grid, 0) |
| 110 | +inputs_grid_n = (inputs_grid - norm_mean) / norm_std |
| 111 | +predictions = mlp:forward(inputs_grid_n) |
| 112 | + |
| 113 | +-- Use penlight to write the data |
| 114 | +pldata = require 'pl.data' |
| 115 | +pred_d = pldata.new(predictions:totable()) |
| 116 | +pred_d:write(params.grid) |
0 commit comments