Skip to content

Commit

Permalink
Updated documentation for optimizer argument and added support for in…
Browse files Browse the repository at this point in the history
…stance of sub-class of optimizer if user wants to provide optimizer with custom parameters.

Additionally fixed clipping gradients usage.
Change: 118995198
  • Loading branch information
A. Unique TensorFlower authored and tensorflower-gardener committed Apr 4, 2016
1 parent 3a6565b commit 3ab39c1
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 25 deletions.
39 changes: 27 additions & 12 deletions tensorflow/contrib/layers/python/layers/optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from __future__ import division
from __future__ import print_function

import six

from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
Expand All @@ -26,6 +28,7 @@
from tensorflow.python.ops import logging_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import variables as vars_
from tensorflow.python.training import optimizer as optimizer_
from tensorflow.python.training import training as train

OPTIMIZER_CLS_NAMES = {
Expand All @@ -52,7 +55,13 @@ def optimize_loss(loss,
loss: Tensor, 0 dimensional.
global_step: Tensor, step counter for each update.
learning_rate: float or Tensor, magnitude of update per each training step.
optimizer: string or function, used as optimizer for training.
optimizer: string, class or optimizer instance, used as trainer.
string should be name of optimizer, like 'SGD',
'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
class should be sub-class of tf.Optimizer that implements
`compute_gradients` and `apply_gradients` functions.
optimizer instance should be instantion of tf.Optimizer sub-class
and have `compute_gradients` and `apply_gradients` functions.
clip_gradients: float or None, clips gradients by this value.
moving_average_decay: float or None, takes into account previous loss
to make learning smoother due to outliers.
Expand All @@ -77,14 +86,6 @@ def optimize_loss(loss,
logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

# Convert optimizer into the optimizer class.
if isinstance(optimizer, str):
opt_cls = OPTIMIZER_CLS_NAMES[optimizer]
elif callable(optimizer):
opt_cls = optimizer
else:
raise ValueError("Unrecognized optimizer: should be string or function.")

# Learning rate variable, with possible decay.
lr = vs.get_variable("learning_rate",
[],
Expand All @@ -93,8 +94,21 @@ def optimize_loss(loss,
if learning_rate_decay_fn is not None:
lr = learning_rate_decay_fn(lr, global_step)

# Create optimizer.
opt = opt_cls(learning_rate=lr)
# Create optimizer, given specified parameters.
if isinstance(optimizer, six.string_types):
if optimizer not in OPTIMIZER_CLS_NAMES:
raise ValueError("Optimizer name should be one of [%s], you provided %s."
% (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
elif isinstance(optimizer, type) and issubclass(optimizer,
optimizer_.Optimizer):
opt = optimizer(learning_rate=lr)
elif isinstance(optimizer, optimizer_.Optimizer):
opt = optimizer
else:
raise ValueError("Unrecognized optimizer: should be string, "
"subclass of Optimizer or instance of "
"subclass of Optimizer. Got %s." % str(optimizer))

# All trainable variables, if specific variables are not specified.
if variables is None:
Expand All @@ -103,9 +117,10 @@ def optimize_loss(loss,
# Compute gradients and clip them if provided.
gradients = opt.compute_gradients(loss, variables)
if clip_gradients is not None:
gradients, variables = zip(*gradients)
clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients,
clip_gradients)
gradients = zip(clipped_gradients, variables)
gradients = list(zip(clipped_gradients, variables))

# Add scalar summary for loss.
logging_ops.scalar_summary("loss", loss)
Expand Down
58 changes: 45 additions & 13 deletions tensorflow/contrib/layers/python/layers/optimizers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,60 @@
import tensorflow as tf


def _setup_model():
x = tf.placeholder(tf.float32, [])
var = tf.get_variable("test", [], initializer=tf.constant_initializer(10))
loss = tf.abs(var * x)
global_step = tf.get_variable("global_step",
[],
trainable=False,
initializer=tf.constant_initializer(0))
return x, var, loss, global_step


class OptimizersTest(tf.test.TestCase):

def testSGDOptimizer(self):
optimizers = ["SGD", tf.train.GradientDescentOptimizer,
tf.train.GradientDescentOptimizer(learning_rate=0.1)]
for optimizer in optimizers:
with tf.Graph().as_default() as g:
with self.test_session(graph=g) as session:
x, var, loss, global_step = _setup_model()
train = tf.contrib.layers.optimize_loss(loss,
global_step,
learning_rate=0.1,
optimizer=optimizer)
tf.initialize_all_variables().run()
session.run(train, feed_dict={x: 5})
var_value, global_step_value = session.run([var, global_step])
self.assertEqual(var_value, 9.5)
self.assertEqual(global_step_value, 1)

def testWrongOptimizer(self):
optimizers = ["blah", tf.Variable, object()]
for optimizer in optimizers:
with tf.Graph().as_default() as g:
with self.test_session(graph=g):
_, _, loss, global_step = _setup_model()
with self.assertRaises(ValueError):
tf.contrib.layers.optimize_loss(loss,
global_step,
learning_rate=0.1,
optimizer=optimizer)

def testGradientClip(self):
with self.test_session() as session:
x = tf.placeholder(tf.float32, [])
var = tf.get_variable("test", [], initializer=tf.constant_initializer(10))
loss = tf.abs(var * x)
global_step = tf.get_variable("global_step",
[],
trainable=False,
initializer=tf.constant_initializer(0))
lr_decay = lambda lr, gs: tf.train.exponential_decay(lr, gs, 1, 0.5)
x, var, loss, global_step = _setup_model()
train = tf.contrib.layers.optimize_loss(loss,
global_step,
learning_rate=0.1,
learning_rate_decay_fn=lr_decay,
optimizer="SGD")
optimizer="SGD",
clip_gradients=0.1)
tf.initialize_all_variables().run()
session.run(train, feed_dict={x: 5})
var_value, global_step_value = session.run([
var, global_step])
self.assertEqual(var_value, 9.5)
var_value, global_step_value = session.run([var, global_step])
self.assertAlmostEqual(var_value, 9.98999, 4)
self.assertEqual(global_step_value, 1)


Expand Down

0 comments on commit 3ab39c1

Please sign in to comment.