|
1 | 1 | import unittest
|
2 | 2 |
|
3 | 3 | import numpy
|
| 4 | +import itertools |
4 | 5 | import paddle.v2.framework.core as core
|
5 | 6 | from paddle.v2.framework.op import Operator
|
6 | 7 |
|
7 | 8 | __all__ = ['get_numeric_gradient']
|
8 | 9 |
|
9 | 10 |
|
10 | 11 | def create_op(op_type):
|
| 12 | + # TODO need to set attrs |
11 | 13 | kwargs = dict()
|
12 | 14 | for in_name in Operator.get_op_input_names(op_type):
|
13 | 15 | kwargs[in_name] = in_name
|
@@ -66,7 +68,6 @@ def get_numeric_gradient(op,
|
66 | 68 | local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace(
|
67 | 69 | ))
|
68 | 70 |
|
69 |
| - # TODO(yuyang18): Only CPU is support now. |
70 | 71 | cpu_ctx = core.DeviceContext.create(core.CPUPlace())
|
71 | 72 |
|
72 | 73 | def get_output():
|
@@ -109,12 +110,110 @@ def product(dim):
|
109 | 110 |
|
110 | 111 |
|
111 | 112 | class GradientChecker(unittest.TestCase):
|
112 |
| - def assert_is_close(self, numeric_grads, scope, max_relative_error, |
113 |
| - msg_prefix): |
114 |
| - for name in numeric_grads: |
115 |
| - b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) |
116 |
| - a = numeric_grads[name] |
| 113 | + def __get_gradient(self, forward_op, backward_op, input_value, grad_names, |
| 114 | + place): |
| 115 | + """Get the input gradients after running forward and backward operators |
| 116 | + on the given places. |
| 117 | +
|
| 118 | + :param forward_op: forward operator |
| 119 | + :type forward_op: Operator |
| 120 | + :param backward_op: backward operator |
| 121 | + :type backward_op: Operator |
| 122 | + :param input_value: input values. |
| 123 | + :type input_value: dict{string:numpy.array} |
| 124 | + :param grad_names: the names of returned input gradients. |
| 125 | + :type input_value: a list of string |
| 126 | + :param place: the device type. |
| 127 | + :type place: CPUPlace or GPUPlace |
| 128 | + :return: the input grdients of given grad_names. |
| 129 | + :rtype: a list of numpy.array |
| 130 | + """ |
| 131 | + scope = core.Scope() |
| 132 | + ctx = core.DeviceContext.create(place) |
| 133 | + |
| 134 | + inputs = forward_op.inputs() |
| 135 | + in_names = [item for k in inputs for item in inputs[k]] |
| 136 | + outputs = forward_op.outputs() |
| 137 | + out_names = [item for k in outputs for item in outputs[k]] |
| 138 | + |
| 139 | + # create input var and set value |
| 140 | + for name, value in input_value.iteritems(): |
| 141 | + if name not in in_names: |
| 142 | + raise ValueError(name + "does not exist in Op's inputs.") |
| 143 | + var = scope.new_var(name).get_tensor() |
| 144 | + var.set_dims(value.shape) |
| 145 | + var.set(value, place) |
| 146 | + |
| 147 | + # run forward op |
| 148 | + for out_name in out_names: |
| 149 | + scope.new_var(out_name) |
| 150 | + forward_op.infer_shape(scope) |
| 151 | + forward_op.run(scope, ctx) |
| 152 | + |
| 153 | + # set output var's shape |
| 154 | + # set output grad to ones |
| 155 | + for name in out_names: |
| 156 | + out_tensor = scope.find_var(name).get_tensor() |
| 157 | + grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() |
| 158 | + grad_tensor.set_dims(out_tensor.shape()) |
| 159 | + data = numpy.ones(out_tensor.shape(), dtype=numpy.float32) |
| 160 | + grad_tensor.set(data, place) |
| 161 | + |
| 162 | + # run backward op |
| 163 | + for name in backward_op.outputs(): |
| 164 | + scope.new_var(name) |
| 165 | + backward_op.infer_shape(scope) |
| 166 | + backward_op.run(scope, ctx) |
| 167 | + |
| 168 | + outs = [ |
| 169 | + numpy.array(scope.find_var(name).get_tensor()) |
| 170 | + for name in grad_names |
| 171 | + ] |
| 172 | + return outs |
| 173 | + |
| 174 | + def compare_grad(self, forward_op, input_value): |
| 175 | + """ Compare the input gradients between CPU and GPU for the given forward |
| 176 | + operator. |
| 177 | +
|
| 178 | + :param forward_op: forward operator |
| 179 | + :type forward_op: Operator |
| 180 | + :param input_value: input values. |
| 181 | + :type input_value: dict{string:numpy.array} |
| 182 | + :raises: AssertionError, there is different gradient value. |
| 183 | + """ |
| 184 | + backward_op = core.Operator.backward(forward_op, set()) |
| 185 | + # return if not compile with GPU or not implementing GPU kernel |
| 186 | + if not (core.is_compile_gpu() and backward_op.support_gpu()): |
| 187 | + return |
117 | 188 |
|
| 189 | + outputs = backward_op.outputs() |
| 190 | + out_names = [item for k in outputs for item in outputs[k]] |
| 191 | + cpu_grads = self.__get_gradient(forward_op, backward_op, input_value, |
| 192 | + out_names, core.CPUPlace()) |
| 193 | + gpu_grads = self.__get_gradient(forward_op, backward_op, input_value, |
| 194 | + out_names, core.GPUPlace(0)) |
| 195 | + |
| 196 | + for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads, |
| 197 | + out_names): |
| 198 | + self.assertTrue( |
| 199 | + numpy.allclose( |
| 200 | + c_grad, g_grad, atol=1e-4), |
| 201 | + "output name: " + name + " has diff") |
| 202 | + |
| 203 | + def __assert_is_close(self, numeric_grads, analytic_grads, names, |
| 204 | + max_relative_error, msg_prefix): |
| 205 | + """Use relative error for the comparison. |
| 206 | +
|
| 207 | + :param numeric_grads: the numerical graidents. |
| 208 | + :type numeric_grads: a list of numpy.array |
| 209 | + :param analytic_grads: the analytical graidents. |
| 210 | + :type analytic_grads: a list of numpy.array |
| 211 | + :param name: the names of gradients, used to print for debug. |
| 212 | + :type names: a list of string |
| 213 | + :param msg_prefix: string info, used to print for debug. |
| 214 | + :type msf_prefix: string |
| 215 | + """ |
| 216 | + for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): |
118 | 217 | abs_a = numpy.abs(a)
|
119 | 218 | # if abs_a is nearly zero, then use abs error for a, not relative
|
120 | 219 | # error.
|
@@ -159,105 +258,26 @@ def check_grad(self,
|
159 | 258 |
|
160 | 259 | inputs = forward_op.inputs()
|
161 | 260 | in_names = [item for k in inputs for item in inputs[k]]
|
162 |
| - outputs = forward_op.outputs() |
163 |
| - out_names = [item for k in outputs for item in outputs[k]] |
164 |
| - |
165 | 261 | for no_grad in no_grad_set:
|
166 | 262 | if no_grad not in in_names:
|
167 | 263 | raise ValueError("no_grad should be in in_names")
|
168 | 264 | backward_op = core.Operator.backward(forward_op, no_grad_set)
|
169 | 265 |
|
170 |
| - bwd_outputs = backward_op.outputs() |
171 |
| - bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]] |
172 |
| - |
173 | 266 | places = [core.CPUPlace()]
|
174 | 267 | if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
|
175 | 268 | places.append(core.GPUPlace(0))
|
176 | 269 |
|
177 |
| - numeric_grad = dict() |
178 |
| - # get numeric gradient |
179 |
| - for check_name in inputs_to_check: |
180 |
| - numeric_grad[check_name] = \ |
181 |
| - get_numeric_gradient(forward_op, input_vars, output_name, |
182 |
| - check_name) |
| 270 | + # get numerical gradients |
| 271 | + numeric_grads = [ |
| 272 | + get_numeric_gradient(forward_op, input_vars, output_name, name) |
| 273 | + for name in inputs_to_check |
| 274 | + ] |
183 | 275 |
|
184 |
| - # get operator gradient according to different device |
| 276 | + check_names = [grad_var_name(name) for name in inputs_to_check] |
185 | 277 | for place in places:
|
186 |
| - scope = core.Scope() |
187 |
| - ctx = core.DeviceContext.create(place) |
188 |
| - |
189 |
| - # create input var and set value |
190 |
| - for name, value in input_vars.iteritems(): |
191 |
| - if name not in in_names: |
192 |
| - raise ValueError(name + " not in op.inputs_") |
193 |
| - var = scope.new_var(name).get_tensor() |
194 |
| - var.set_dims(value.shape) |
195 |
| - var.set(value, place) |
196 |
| - |
197 |
| - # create output var |
198 |
| - for out_name in out_names: |
199 |
| - scope.new_var(out_name).get_tensor() |
200 |
| - |
201 |
| - # infer the shape of output var and compute/set value of output var |
202 |
| - forward_op.infer_shape(scope) |
203 |
| - forward_op.run(scope, ctx) |
204 |
| - |
205 |
| - # create output grad var |
206 |
| - # set shape as the output var |
207 |
| - # set value of this grad to ones |
208 |
| - for name in out_names: |
209 |
| - out_tensor = scope.find_var(name).get_tensor() |
210 |
| - grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() |
211 |
| - grad_tensor.set_dims(out_tensor.shape()) |
212 |
| - data = 1.0 * numpy.ones(out_tensor.shape()) |
213 |
| - grad_tensor.set(data, place) |
214 |
| - |
215 |
| - # create input grad var |
216 |
| - for name in bwd_out_names: |
217 |
| - scope.new_var(name).get_tensor() |
218 |
| - |
219 |
| - # infer the shape of input gradient var and compute/set it's value |
220 |
| - # with backward op |
221 |
| - backward_op.infer_shape(scope) |
222 |
| - backward_op.run(scope, ctx) |
223 |
| - |
224 |
| - self.assert_is_close(numeric_grad, scope, max_relative_error, |
225 |
| - "Gradient Check On %s" % str(place)) |
226 |
| - |
227 |
| - |
228 |
| -if __name__ == '__main__': |
229 |
| - |
230 |
| - class GetNumericGradientTest(unittest.TestCase): |
231 |
| - def test_add_op(self): |
232 |
| - add_op = Operator('add_two', X="X", Y="Y", Out="Z") |
233 |
| - x = numpy.random.random((10, 1)).astype("float32") |
234 |
| - y = numpy.random.random((10, 1)).astype("float32") |
235 |
| - |
236 |
| - arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') |
237 |
| - self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2) |
238 |
| - |
239 |
| - def test_softmax_op(self): |
240 |
| - def stable_softmax(x): |
241 |
| - """Compute the softmax of vector x in a numerically stable way.""" |
242 |
| - shiftx = x - numpy.max(x) |
243 |
| - exps = numpy.exp(shiftx) |
244 |
| - return exps / numpy.sum(exps) |
245 |
| - |
246 |
| - def label_softmax_grad(Y, dY): |
247 |
| - dX = Y * 0.0 |
248 |
| - for i in range(Y.shape[0]): |
249 |
| - d = numpy.dot(Y[i, :], dY[i, :]) |
250 |
| - dX[i, :] = Y[i, :] * (dY[i, :] - d) |
251 |
| - return dX |
252 |
| - |
253 |
| - softmax_op = Operator("softmax", X="X", Y="Y") |
254 |
| - |
255 |
| - X = numpy.random.random((2, 2)).astype("float32") |
256 |
| - Y = numpy.apply_along_axis(stable_softmax, 1, X) |
257 |
| - dY = numpy.ones(Y.shape) |
258 |
| - dX = label_softmax_grad(Y, dY) |
259 |
| - |
260 |
| - arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') |
261 |
| - numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) |
262 |
| - |
263 |
| - unittest.main() |
| 278 | + # get analytical gradients according to different device |
| 279 | + analytic_grads = self.__get_gradient(forward_op, backward_op, |
| 280 | + input_vars, check_names, place) |
| 281 | + self.__assert_is_close(numeric_grads, analytic_grads, check_names, |
| 282 | + max_relative_error, |
| 283 | + "Gradient Check On %s" % str(place)) |
0 commit comments