Skip to content

Commit f644011

Browse files
committed
add assign_to_device fn for multi-gpu
1 parent d43c58c commit f644011

File tree

2 files changed

+44
-4
lines changed

2 files changed

+44
-4
lines changed

examples/6_MultiGPU/multigpu_cnn.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,22 @@ def average_gradients(tower_grads):
104104
return average_grads
105105

106106

107+
# By default, all variables will be placed on '/gpu:0'
108+
# So we need a custom device function, to assign all variables to '/cpu:0'
109+
# Note: If GPUs are peered, '/gpu:0' can be a faster option
110+
PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']
111+
112+
def assign_to_device(device, ps_device='/cpu:0'):
113+
def _assign(op):
114+
node_def = op if isinstance(op, tf.NodeDef) else op.node_def
115+
if node_def.op in PS_OPS:
116+
return "/" + ps_device
117+
else:
118+
return device
119+
120+
return _assign
121+
122+
107123
# Place all ops on CPU by default
108124
with tf.device('/cpu:0'):
109125
tower_grads = []
@@ -115,7 +131,7 @@ def average_gradients(tower_grads):
115131

116132
# Loop over all GPUs and construct their own computation graph
117133
for i in range(num_gpus):
118-
with tf.device('/gpu:%d' % i):
134+
with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')):
119135

120136
# Split data between GPUs
121137
_x = X[i * batch_size: (i+1) * batch_size]

notebooks/6_MultiGPU/multigpu_cnn.ipynb

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,30 @@
167167
{
168168
"cell_type": "code",
169169
"execution_count": 4,
170+
"metadata": {
171+
"collapsed": true
172+
},
173+
"outputs": [],
174+
"source": [
175+
"# By default, all variables will be placed on '/gpu:0'\n",
176+
"# So we need a custom device function, to assign all variables to '/cpu:0'\n",
177+
"# Note: If GPUs are peered, '/gpu:0' can be a faster option\n",
178+
"PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']\n",
179+
"\n",
180+
"def assign_to_device(device, ps_device='/cpu:0'):\n",
181+
" def _assign(op):\n",
182+
" node_def = op if isinstance(op, tf.NodeDef) else op.node_def\n",
183+
" if node_def.op in PS_OPS:\n",
184+
" return \"/\" + ps_device\n",
185+
" else:\n",
186+
" return device\n",
187+
"\n",
188+
" return _assign"
189+
]
190+
},
191+
{
192+
"cell_type": "code",
193+
"execution_count": 5,
170194
"metadata": {
171195
"collapsed": false,
172196
"scrolled": false
@@ -214,7 +238,7 @@
214238
"\n",
215239
" # Loop over all GPUs and construct their own computation graph\n",
216240
" for i in range(num_gpus):\n",
217-
" with tf.device('/gpu:%d' % i):\n",
241+
" with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')):\n",
218242
"\n",
219243
" # Split data between GPUs\n",
220244
" _x = X[i * batch_size: (i+1) * batch_size]\n",
@@ -289,7 +313,7 @@
289313
"language_info": {
290314
"codemirror_mode": {
291315
"name": "ipython",
292-
"version": 2.0
316+
"version": 2
293317
},
294318
"file_extension": ".py",
295319
"mimetype": "text/x-python",
@@ -301,4 +325,4 @@
301325
},
302326
"nbformat": 4,
303327
"nbformat_minor": 0
304-
}
328+
}

0 commit comments

Comments
 (0)