Skip to content

Commit db72978

Browse files
Wapaul1pcmoritz
authored andcommitted
Added functionality for retrieving variables from control dependencies (ray-project#220)
* Added test for retriving variables from an optimizer * Added comments to test * Addressed comments * Fixed travis bug * Added fix to circular controls * Added set for explored operations and duplicate prefix stripping * Removed embeded ipython * Removed prefix, use seperate graph for each network * Removed redundant imports * Addressed comments and added separate graph to initializer * fix typos * get rid of prefix in documentation
1 parent 6703f7b commit db72978

File tree

4 files changed

+84
-36
lines changed

4 files changed

+84
-36
lines changed

doc/using-ray-with-tensorflow.md

+8-15
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,18 @@ b.assign(np.zeros(1)) # This adds a node to the graph every time you call it.
7272
## Complete Example
7373

7474
Putting this all together, we would first create the graph on each worker using
75-
environment variables. Within the environment variables, we would use the
75+
environment variables. Within the environment variables, we would use the
7676
`get_weights` and `set_weights` methods of the `TensorFlowVariables` class. We
77-
would then use those methods to ship the weights (as a dictionary of variable
77+
would then use those methods to ship the weights (as a dictionary of variable
7878
names mapping to tensorflow tensors) between the processes without shipping the
7979
actual TensorFlow graphs, which are much more complex Python objects. Note that
80-
to avoid namespace collision with already created variables on the workers, we
81-
use a variable_scope and a prefix in the environment variables and then pass
82-
true to the prefix in `TensorFlowVariables` so it can properly decode the variable
83-
names.
80+
to avoid namespace collision with already created variables on the workers, we
81+
use a separate graph for each network.
8482

8583
```python
8684
import tensorflow as tf
8785
import numpy as np
8886
import ray
89-
import uuid
9087

9188
ray.init(num_workers=5)
9289

@@ -95,11 +92,8 @@ NUM_BATCHES = 1
9592
NUM_ITERS = 201
9693

9794
def net_vars_initializer():
98-
# Prefix should be random so that there is no conflict with variable names in
99-
# the cluster setting.
100-
prefix = str(uuid.uuid1().hex)
101-
# Use the tensorflow variable_scope to prefix all of the variables
102-
with tf.variable_scope(prefix):
95+
# Use a separate graph for each network.
96+
with tf.Graph().as_default():
10397
# Seed TensorFlow to make the script deterministic.
10498
tf.set_random_seed(0)
10599
# Define the inputs.
@@ -116,9 +110,8 @@ def net_vars_initializer():
116110
# Define the weight initializer and session.
117111
init = tf.global_variables_initializer()
118112
sess = tf.Session()
119-
# Additional code for setting and getting the weights, and use a prefix
120-
# so that the variable names can be converted between workers.
121-
variables = ray.experimental.TensorFlowVariables(loss, sess, prefix=True)
113+
# Additional code for setting and getting the weights
114+
variables = ray.experimental.TensorFlowVariables(loss, sess)
122115
# Return all of the data needed to use the network.
123116
return variables, sess, train, loss, x_data, y_data, init
124117

examples/lbfgs/driver.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ def grad(self, xs, ys):
6161
return self.sess.run(self.cross_entropy_grads, feed_dict={self.x: xs, self.y_: ys})
6262

6363
def net_initialization():
64-
return LinearModel([784,10])
64+
with tf.Graph().as_default():
65+
return LinearModel([784,10])
6566

6667
# By default, when an environment variable is used by a remote function, the
6768
# initialization code will be rerun at the end of the remote task to ensure

python/ray/experimental/tfutils.py

+22-9
Original file line numberDiff line numberDiff line change
@@ -28,28 +28,41 @@ class TensorFlowVariables(object):
2828
assignment_placeholders (List[tf.placeholders]): The nodes that weights get
2929
passed to.
3030
assignment_nodes (List[tf.Tensor]): The nodes that assign the weights.
31-
prefix (Bool): Boolean for if there is a prefix on the variable names.
3231
"""
33-
def __init__(self, loss, sess=None, prefix=False):
32+
def __init__(self, loss, sess=None):
3433
"""Creates a TensorFlowVariables instance."""
3534
import tensorflow as tf
3635
self.sess = sess
3736
self.loss = loss
38-
self.prefix = prefix
3937
queue = deque([loss])
4038
variable_names = []
39+
explored_inputs = set([loss])
4140

4241
# We do a BFS on the dependency graph of the input function to find
4342
# the variables.
4443
while len(queue) != 0:
45-
op = queue.popleft().op
46-
queue.extend(op.inputs)
47-
if op.node_def.op == "Variable":
48-
variable_names.append(op.node_def.name)
44+
tf_obj = queue.popleft()
45+
46+
# The object put into the queue is not necessarily an operation, so we
47+
# want the op attribute to get the operation underlying the object.
48+
# Only operations contain the inputs that we can explore.
49+
if hasattr(tf_obj, "op"):
50+
tf_obj = tf_obj.op
51+
for input_op in tf_obj.inputs:
52+
if input_op not in explored_inputs:
53+
queue.append(input_op)
54+
explored_inputs.add(input_op)
55+
# Tensorflow control inputs can be circular, so we keep track of
56+
# explored operations.
57+
for control in tf_obj.control_inputs:
58+
if control not in explored_inputs:
59+
queue.append(control)
60+
explored_inputs.add(control)
61+
if tf_obj.node_def.op == "Variable":
62+
variable_names.append(tf_obj.node_def.name)
4963
self.variables = OrderedDict()
5064
for v in [v for v in tf.global_variables() if v.op.node_def.name in variable_names]:
51-
name = v.op.node_def.name.split("/", 1 if prefix else 0)[-1]
52-
self.variables[name] = v
65+
self.variables[v.op.node_def.name] = v
5366
self.assignment_placeholders = dict()
5467
self.assignment_nodes = []
5568

test/tensorflow_test.py

+52-11
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,39 @@ def make_linear_network(w_name=None, b_name=None):
1717
b = tf.Variable(tf.zeros([1]), name=b_name)
1818
y = w * x_data + b
1919
# Return the loss and weight initializer.
20-
return tf.reduce_mean(tf.square(y - y_data)), tf.global_variables_initializer()
20+
return tf.reduce_mean(tf.square(y - y_data)), tf.global_variables_initializer(), x_data, y_data
2121

2222
def net_vars_initializer():
23-
# Random prefix so variable names do not clash if we use nets with
24-
# the same name.
25-
prefix = str(uuid.uuid1().hex)
26-
# Use the tensorflow variable_scope to prefix all of the variables
27-
with tf.variable_scope(prefix):
23+
# Uses a separate graph for each network.
24+
with tf.Graph().as_default():
2825
# Create the network.
29-
loss, init = make_linear_network()
26+
loss, init, _, _ = make_linear_network()
3027
sess = tf.Session()
3128
# Additional code for setting and getting the weights.
32-
variables = ray.experimental.TensorFlowVariables(loss, sess, prefix=True)
29+
variables = ray.experimental.TensorFlowVariables(loss, sess)
3330
# Return all of the data needed to use the network.
3431
return variables, init, sess
3532

3633
def net_vars_reinitializer(net_vars):
3734
return net_vars
3835

36+
def train_vars_initializer():
37+
# Almost the same as above, but now returns the placeholders and gradient.
38+
with tf.Graph().as_default():
39+
loss, init, x_data, y_data = make_linear_network()
40+
sess = tf.Session()
41+
variables = ray.experimental.TensorFlowVariables(loss, sess)
42+
grad = tf.gradients(loss, list(variables.variables.values()))
43+
return variables, init, sess, grad, [x_data, y_data]
44+
45+
3946
class TensorFlowTest(unittest.TestCase):
4047

4148
def testTensorFlowVariables(self):
4249
ray.init(num_workers=2)
4350

4451
sess = tf.Session()
45-
loss, init = make_linear_network()
52+
loss, init, _, _ = make_linear_network()
4653
sess.run(init)
4754

4855
variables = ray.experimental.TensorFlowVariables(loss, sess)
@@ -54,7 +61,7 @@ def testTensorFlowVariables(self):
5461
variables.set_weights(weights)
5562
self.assertEqual(weights, variables.get_weights())
5663

57-
loss2, init2 = make_linear_network("w", "b")
64+
loss2, init2, _, _ = make_linear_network("w", "b")
5865
sess.run(init2)
5966

6067
variables2 = ray.experimental.TensorFlowVariables(loss2, sess)
@@ -148,7 +155,7 @@ def testNetworkDriverWorkerIndependent(self):
148155

149156
# Create a network on the driver locally.
150157
sess1 = tf.Session()
151-
loss1, init1 = make_linear_network()
158+
loss1, init1, _, _ = make_linear_network()
152159
net_vars1 = ray.experimental.TensorFlowVariables(loss1, sess1)
153160
sess1.run(init1)
154161

@@ -170,5 +177,39 @@ def set_and_get_weights(weights):
170177

171178
ray.worker.cleanup()
172179

180+
def testVariablesControlDependencies(self):
181+
ray.init(num_workers=1)
182+
183+
# Creates a network and appends a momentum optimizer.
184+
sess = tf.Session()
185+
loss, init, _, _ = make_linear_network()
186+
minimizer = tf.train.MomentumOptimizer(0.9, 0.9).minimize(loss)
187+
net_vars = ray.experimental.TensorFlowVariables(minimizer, sess)
188+
sess.run(init)
189+
190+
# Tests if all variables are properly retrieved, 2 variables and 2 momentum
191+
# variables.
192+
self.assertEqual(len(net_vars.variables.items()), 4)
193+
194+
ray.worker.cleanup()
195+
196+
def testRemoteTrainingStep(self):
197+
ray.init(num_workers=1)
198+
199+
ray.env.net = ray.EnvironmentVariable(train_vars_initializer, net_vars_reinitializer)
200+
201+
@ray.remote
202+
def training_step(weights):
203+
variables, _, sess, grad, placeholders = ray.env.net
204+
variables.set_weights(weights)
205+
return sess.run(grad, feed_dict=dict(zip(placeholders, [[1]*100]*2)))
206+
207+
variables, init, sess, _, _ = ray.env.net
208+
209+
sess.run(init)
210+
ray.get(training_step.remote(variables.get_weights()))
211+
212+
ray.worker.cleanup()
213+
173214
if __name__ == "__main__":
174215
unittest.main(verbosity=2)

0 commit comments

Comments
 (0)