Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Fixed previous weight handling for DCASGD optimizer. #5140

Merged
merged 9 commits into from
Mar 5, 2017
Prev Previous commit
Next Next commit
simplified logic and moved weight_bak initialization
back to before updating the weight.
original bug was in momentum == 0.0 block it seems.
  • Loading branch information
sergeykolychev committed Feb 25, 2017
commit 259d2e7f4c972499b04420c9b54983975e493bfd
18 changes: 6 additions & 12 deletions python/mxnet/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def create_state(self, index, weight):

"""
if self.momentum == 0.0:
return (None, # momentum
return (zeros(weight.shape, weight.context, dtype=weight.dtype), # placeholder
weight.copy()) # previous weight
else:
return (zeros(weight.shape, weight.context, dtype=weight.dtype), # momentum
Expand Down Expand Up @@ -352,17 +352,11 @@ def update(self, index, weight, grad, state):
grad = clip(grad, -self.clip_gradient, self.clip_gradient)

mom, previous_weight = state
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wastes memory and compute. better use different states and test for momentum == 0

if mom:
mom[:] *= self.momentum
mom[:] += -lr * (grad + wd * weight + self.lamda \
* grad * grad * (weight - previous_weight))
weight[:] += mom
previous_weight[:] = weight
else:
assert self.momentum == 0.0
weight[:] += -lr * (grad + wd * weight + self.lamda \
* grad * grad * (weight - previous_weight))
previous_weight[:] = weight
mom[:] *= self.momentum
mom[:] += -lr * (grad + wd * weight + self.lamda \
* grad * grad * (weight - previous_weight))
previous_weight[:] = weight
weight[:] += mom

@register
class NAG(SGD):
Expand Down