@@ -55,7 +55,10 @@ def __init__(self):
55
55
'''
56
56
def __call__ (self , net , param_init_net , param , grad = None ):
57
57
if grad is None :
58
- assert isinstance (param , parameter_info .ParameterInfo )
58
+ assert isinstance (param , parameter_info .ParameterInfo ), (
59
+ "Expected parameter to be of type ParameterInfo, got {}" .format (
60
+ param
61
+ ))
59
62
assert param .grad is not None
60
63
else :
61
64
if isinstance (param , basestring ):
@@ -155,7 +158,8 @@ def add_lr_multiplier(self, lr_multiplier, is_gpu_blob=False):
155
158
156
159
@staticmethod
157
160
def dedup (net , sparse_dedup_aggregator , grad ):
158
- assert (isinstance (grad , core .GradientSlice ))
161
+ assert isinstance (grad , core .GradientSlice ), (
162
+ "Dedup only works for sparse gradient, got {}" .format (grad ))
159
163
if sparse_dedup_aggregator :
160
164
return net .DeduplicateGradientSlices (
161
165
grad , aggregator = sparse_dedup_aggregator )
@@ -213,11 +217,14 @@ def _run(self, net, param_init_net, param_info):
213
217
grad = param_info .grad
214
218
if self .base_learning_rate == 0 :
215
219
return
216
- assert self .base_learning_rate > 0
220
+ assert self .base_learning_rate > 0 , (
221
+ "Expect positive base learning rate, got {}" .format (
222
+ self .base_learning_rate ))
217
223
218
224
# TODO(zqq): support LARS for sparse parameters
219
225
if self .lars is not None and not isinstance (grad , core .GradientSlice ):
220
- assert self .lars >= 0 , 'Lars offset must be nonnegative.'
226
+ assert self .lars >= 0 , (
227
+ 'Lars offset must be nonnegative, got {}' .format (self .lars ))
221
228
lr_lars_multiplier = net .Lars (
222
229
[param , grad ],
223
230
self .make_unique_blob_name (str (param ) + "_lars" ),
@@ -317,7 +324,9 @@ def _run(self, net, param_init_net, param_info):
317
324
grad = param_info .grad
318
325
if self .base_learning_rate == 0 :
319
326
return
320
- assert self .base_learning_rate > 0
327
+ assert self .base_learning_rate > 0 , (
328
+ "Expect positive base learning rate, got {}" .format (
329
+ self .base_learning_rate ))
321
330
322
331
lr , _ = self .build_lr (
323
332
net , param_init_net ,
@@ -330,8 +339,8 @@ def _run(self, net, param_init_net, param_info):
330
339
param_fp32 , str (param ) + "_momentum" , value = 0. )
331
340
self ._aux_params .local .append (momentum_data )
332
341
333
- assert not isinstance (grad , core .GradientSlice ), \
334
- "Doesn't support sparse gradients"
342
+ assert not isinstance (grad , core .GradientSlice ), (
343
+ "MultiPrecisionSgd does not support sparse gradients")
335
344
336
345
# Copy gradient to fp32
337
346
grad_fp32 = net .HalfToFloat (grad , grad + "_fp32" )
@@ -404,7 +413,9 @@ def _run(self, net, param_init_net, param_info, fp32_update=False):
404
413
405
414
if self .base_learning_rate == 0 :
406
415
return
407
- assert self .base_learning_rate > 0
416
+ assert self .base_learning_rate > 0 , (
417
+ "Expect positive base learning rate, got {}" .format (
418
+ self .base_learning_rate ))
408
419
409
420
lr , _ = self .build_lr (
410
421
net , param_init_net ,
@@ -421,8 +432,8 @@ def _run(self, net, param_init_net, param_info, fp32_update=False):
421
432
422
433
self ._aux_params .local .append (momentum_data )
423
434
424
- assert not isinstance (grad , core .GradientSlice ), \
425
- "Doesn't support sparse gradients"
435
+ assert not isinstance (grad , core .GradientSlice ), (
436
+ "FP16Sgd does not support sparse gradients")
426
437
427
438
if fp32_update_flag == 0 :
428
439
net .FP16MomentumSGDUpdate (
@@ -462,7 +473,8 @@ def _run(self, net, param_init_net, param_info):
462
473
)
463
474
464
475
if isinstance (param_info .grad , core .GradientSlice ):
465
- assert "Weight decay does not yet support sparse gradients"
476
+ raise ValueError (
477
+ "Weight decay does not yet support sparse gradients" )
466
478
else :
467
479
net .WeightedSum (
468
480
[param_info .grad , ONE , param_info .blob , WD ],
@@ -493,7 +505,8 @@ def _run(self, net, param_init_net, param_info):
493
505
return
494
506
495
507
if self .lars is not None and not isinstance (grad , core .GradientSlice ):
496
- assert self .lars >= 0 , 'Lars offset must be nonnegative.'
508
+ assert self .lars >= 0 , (
509
+ 'Lars offset must be nonnegative, got {}' .format (self .lars ))
497
510
lr_lars_multiplier = net .Lars (
498
511
[param , grad ],
499
512
self .make_unique_blob_name (str (param ) + "_lars" ),
@@ -796,7 +809,7 @@ def _run(self, net, param_init_net, param_info):
796
809
797
810
assert self .alpha > 0
798
811
assert not isinstance (grad , core .GradientSlice ), \
799
- "Doesn't support sparse gradients"
812
+ "YellowFin does not support sparse gradients"
800
813
801
814
if not param_init_net .BlobIsDefined (_OPTIMIZER_ITERATION_NAME ):
802
815
# Add training operators.
0 commit comments