Skip to content

Commit e5ab4a5

Browse files
committed
Merged with master branch
2 parents bc079b3 + 2fe7f49 commit e5ab4a5

File tree

7 files changed

+821
-799
lines changed

7 files changed

+821
-799
lines changed

scripts/batch_generator.py

Lines changed: 705 additions & 753 deletions
Large diffs are not rendered by default.

scripts/deep_quant.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,14 @@ def get_configs():
6262
configs.DEFINE_integer("min_years",None,"Alt to min_unrollings")
6363
configs.DEFINE_integer("max_years",None,"Alt to max_unrollings")
6464
configs.DEFINE_integer("pls_years",None,"Alt to max_years. max_years = min_year+pls_years")
65-
# num_unrollings is being depricated by max_unrollings
65+
# num_unrollings is being depricated, replaced with max_unrollings
6666
configs.DEFINE_integer("num_unrollings",4,"Number of unrolling steps")
6767
configs.DEFINE_integer("stride",12,"How many steps to skip per unrolling")
6868
configs.DEFINE_integer("forecast_n",12,"How many steps to forecast into the future")
6969
configs.DEFINE_integer("batch_size",1,"Size of each batch")
7070
configs.DEFINE_integer("num_layers",1, "Numer of RNN layers")
7171
configs.DEFINE_integer("num_hidden",10,"Number of hidden layer units")
72+
configs.DEFINE_float("training_noise",None, "Level of training noise as multiple of 1-stdev")
7273
configs.DEFINE_float("init_scale",0.1, "Initial scale for weights")
7374
configs.DEFINE_float("max_grad_norm",10.0,"Gradient clipping")
7475
configs.DEFINE_integer("start_date",None,"First date to train on as YYYYMM")
@@ -102,6 +103,7 @@ def get_configs():
102103
configs.DEFINE_float("passes",1.0,"Passes through day per epoch")
103104
configs.DEFINE_float("target_lambda",0.5,"How much to weight last step vs. all steps in loss")
104105
configs.DEFINE_float("rnn_lambda",0.5,"How much to weight last step vs. all steps in loss")
106+
configs.DEFINE_float("l2_alpha",0.0,"L2 Regularization")
105107
configs.DEFINE_integer("max_epoch",0,"Stop after max_epochs")
106108
configs.DEFINE_integer("early_stop",None,"Early stop parameter")
107109
configs.DEFINE_integer("seed",None,"Seed for deterministic training")

scripts/models/base_model.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,6 @@ def debug_step(self, sess, batch, training=False, uq=False, UQ_model_type='MVE')
108108
np.set_printoptions(suppress=True)
109109
np.set_printoptions(precision=3)
110110

111-
print()
112-
print(batch.inputs[-1][0][18:22])
113-
114111
feed_dict = self._get_feed_dict(batch,keep_prob=1.0,training=training)
115112

116113
# (s,t,lt,lkt,lkti,o,lo,lko,lkoi) = sess.run([self._seq_lengths,self._t,self._lt,self._lkt,self._lkti,self._o,self._lo,self._lko,self._lkoi],feed_dict)

scripts/models/deep_rnn_model.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,17 @@ def rnn_cell():
167167
# here is the learning part of the graph
168168
p1 = config.target_lambda
169169
p2 = config.rnn_lambda
170-
l2 = config.l2_alpha*sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables() if "_b" not in tf_var.name)
171-
loss = p1 * self._mse_0 + (1.0-p1)*(p2*self._mse_1 + (1.0-p2)*self._mse_2) + l2
170+
172171
tvars = tf.trainable_variables()
173-
grads = tf.gradients(loss, tvars)
172+
173+
l2 = config.l2_alpha * sum(
174+
tf.nn.l2_loss(tf_var)
175+
for tf_var in tvars
176+
if not ("_b" in tf_var.name)
177+
)
178+
179+
loss = p1 * self._mse_0 + (1.0-p1)*(p2*self._mse_1 + (1.0-p2)*self._mse_2) + l2
180+
grads = tf.gradients(loss,tvars)
174181

175182
if (config.max_grad_norm > 0):
176183
grads, self._grad_norm = tf.clip_by_global_norm(grads,config.max_grad_norm)

scripts/noise_model.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2016 Euclidean Technologies Management LLC All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
16+
import os
17+
import time
18+
import sys
19+
import random
20+
21+
import numpy as np
22+
import pandas as pd
23+
import math
24+
import copy
25+
26+
class NoiseModel(object):
27+
def __init__(self, seed=None, scaling_params=None, degree=0.00):
28+
self._scale = scaling_params['scale']
29+
self._degree = degree
30+
if seed is not None:
31+
np.random.seed(seed)
32+
33+
def add_noise(self,batch):
34+
batch = copy.deepcopy(batch)
35+
inputs = batch.inputs
36+
input_scales = np.tile(self._scale,(batch.size,1))
37+
num_inputs = inputs[0].shape[1]
38+
for i in range(len(inputs)):
39+
input_noise = np.random.normal(loc=0.0,
40+
scale=self._degree,
41+
size=(batch.size,num_inputs))
42+
batch.inputs[i] += input_scales * input_noise
43+
return batch

scripts/train.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,19 @@
2424
import tensorflow as tf
2525
import regex as re
2626
import math
27+
import numpy as np
2728

2829
from utils import data_utils, model_utils
30+
from noise_model import NoiseModel
2931

3032
def pretty_progress(step, prog_int, dot_count):
3133
if ( (prog_int<=1) or (step % (int(prog_int)+1)) == 0):
3234
dot_count += 1; print('.',end=''); sys.stdout.flush()
3335
return dot_count
3436

3537
def run_epoch(session, model, train_data, valid_data,
36-
keep_prob=1.0, passes=1.0, verbose=False):
38+
keep_prob=1.0, passes=1.0,
39+
noise_model=None, verbose=False):
3740

3841
if not train_data.num_batches > 0:
3942
raise RuntimeError("batch_size*max_unrollings is larger "
@@ -54,17 +57,18 @@ def run_epoch(session, model, train_data, valid_data,
5457

5558
for step in range(train_steps):
5659
batch = train_data.next_batch()
60+
if noise_model is not None:
61+
batch = noise_model.add_noise(batch)
5762
train_mse += model.train_step(session, batch, keep_prob=keep_prob)
5863
if verbose: dot_count = pretty_progress(step,prog_int,dot_count)
5964

65+
# evaluate validation data
6066
for step in range(valid_steps):
6167
batch = valid_data.next_batch()
6268
(mse,_) = model.step(session, batch)
6369
valid_mse += mse
6470
if verbose: dot_count = pretty_progress(train_steps+step,prog_int,dot_count)
6571

66-
# evaluate validation data
67-
6872
if verbose:
6973
print("."*(100-dot_count),end='')
7074
print(" passes: %.2f "
@@ -117,19 +121,15 @@ def train_model(config):
117121
print("Constructing model ...")
118122
model = model_utils.get_model(session, config, verbose=True)
119123

120-
if config.data_scaler is not None:
121-
start_time = time.time()
122-
print("Calculating scaling parameters ...", end=' '); sys.stdout.flush()
123-
scaling_params = train_data.get_scaling_params(config.data_scaler)
124-
model.set_scaling_params(session,**scaling_params)
125-
print("done in %.2f seconds."%(time.time() - start_time))
126-
print("%-10s %-6s %-6s"%('feature','mean','std'))
127-
for i in range(len(train_data.feature_names)):
128-
center = "%.4f"%scaling_params['center'][i];
129-
scale = "%.4f"%scaling_params['scale'][i];
130-
print("%-10s %-6s %-6s"%(train_data.feature_names[i],
131-
center,scale))
132-
sys.stdout.flush()
124+
params = model_utils.get_scaling_params(config,train_data,verbose=True)
125+
model.set_scaling_params(session,**params)
126+
127+
noise_model = None
128+
if config.training_noise is not None:
129+
print("Training noise level: %.2f * 1-stdev"%config.training_noise)
130+
noise_model = NoiseModel(seed=config.seed,
131+
scaling_params=params,
132+
degree=config.training_noise)
133133

134134
if config.early_stop is not None:
135135
print("Training will early stop without "
@@ -148,6 +148,7 @@ def train_model(config):
148148
(train_mse, valid_mse) = run_epoch(session, model, train_data, valid_data,
149149
keep_prob=config.keep_prob,
150150
passes=config.passes,
151+
noise_model=noise_model,
151152
verbose=True)
152153
print( ('Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f') %
153154
(i + 1, train_mse, valid_mse, lr) )

scripts/utils/model_utils.py

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,31 @@ def adjust_learning_rate(session, model,
7070
model.set_learning_rate(session, learning_rate)
7171
return learning_rate
7272

73+
def get_scaling_params(config, data, verbose=False):
74+
# Initialize scaling params
75+
scaling_params = None
76+
if config.scalesfile is not None and os.path.isfile(config.scalesfile):
77+
scaling_params = pickle.load( open( config.scalesfile, "rb" ) )
78+
if verbose:
79+
print("Reading scaling params from %s"%config.scalesfile);
80+
else:
81+
scaling_params = data.get_scaling_params(config.data_scaler)
82+
if config.scalesfile is not None:
83+
pickle.dump(scaling_params, open( config.scalesfile, "wb" ))
84+
if verbose:
85+
print("Writing scaling params to %s"%config.scalesfile);
86+
87+
if verbose:
88+
print("Scaling params are:")
89+
print("%-10s %-6s %-6s"%('feature','mean','std'))
90+
for i in range(len(data.feature_names)):
91+
center = "%.4f"%scaling_params['center'][i];
92+
scale = "%.4f"%scaling_params['scale'][i];
93+
print("%-10s %-6s %-6s"%(data.feature_names[i],
94+
center,scale))
95+
return scaling_params
96+
97+
7398
def get_model(session, config, verbose=False):
7499
"""
75100
Args:
@@ -79,30 +104,25 @@ def get_model(session, config, verbose=False):
79104
Returns:
80105
the model
81106
"""
82-
if config.nn_type == 'logreg':
83-
model_file = os.path.join(config.model_dir, "logreg.pkl" )
84-
clf = LogRegModel(load_from=model_file)
85-
mtrain, mdeploy = clf, clf
86-
107+
108+
model = _create_model(session, config, verbose)
109+
110+
ckpt = tf.train.get_checkpoint_state(config.model_dir)
111+
start_time = time.time()
112+
if ckpt and gfile.Exists(ckpt.model_checkpoint_path+".index"):
113+
if verbose:
114+
print("Reading model parameters from {}...".format(
115+
ckpt.model_checkpoint_path), end=' ')
116+
tf.train.Saver(max_to_keep=200).restore(session,
117+
ckpt.model_checkpoint_path)
118+
if verbose:
119+
print("done in %.2f seconds."%(time.time() - start_time))
87120
else:
88-
model = _create_model(session, config, verbose)
89-
90-
ckpt = tf.train.get_checkpoint_state(config.model_dir)
91-
start_time = time.time()
92-
if ckpt and gfile.Exists(ckpt.model_checkpoint_path+".index"):
93-
if verbose:
94-
print("Reading model parameters from {}...".format(
95-
ckpt.model_checkpoint_path), end=' ')
96-
tf.train.Saver(max_to_keep=200).restore(session,
97-
ckpt.model_checkpoint_path)
98-
if verbose:
99-
print("done in %.2f seconds."%(time.time() - start_time))
100-
else:
101-
if verbose:
102-
print("Creating model with fresh parameters ...", end=' ')
103-
session.run(tf.global_variables_initializer())
104-
if verbose:
105-
print("done in %.2f seconds."%(time.time() - start_time))
121+
if verbose:
122+
print("Creating model with fresh parameters ...", end=' ')
123+
session.run(tf.global_variables_initializer())
124+
if verbose:
125+
print("done in %.2f seconds."%(time.time() - start_time))
106126

107127
return model
108128

0 commit comments

Comments
 (0)