-
Notifications
You must be signed in to change notification settings - Fork 113
/
Copy pathoptimizers.py
126 lines (103 loc) · 3.46 KB
/
optimizers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# -*- coding: utf-8 -*-
"""
Created on 2018/9/4 22:26
@author: mick.yi
优化方法
"""
import numpy as np
from modules import Model
def _copy_weights_to_zeros(weights):
result = {}
result.keys()
for key in weights.keys():
result[key] = np.zeros_like(weights[key])
return result
class SGD(object):
"""
小批量梯度下降法
"""
def __init__(self, weights, lr=0.01, momentum=0.9, decay=1e-5):
"""
:param weights: 权重,字典类型
:param lr: 初始学习率
:param momentum: 动量因子
:param decay: 学习率衰减
"""
self.v = _copy_weights_to_zeros(weights) # 累积动量大小
self.iterations = 0 # 迭代次数
self.lr = self.init_lr = lr
self.momentum = momentum
self.decay = decay
def iterate(self, m: Model):
"""
迭代一次
:param m: 模型
:return:
"""
# 更新学习率
self.lr = self.init_lr / (1 + self.iterations * self.decay)
# 更新动量和梯度
for layer in m.layers:
for key in layer.weights.keys():
self.v[key] = self.momentum * self.v[key] + self.lr * layer.gradients[key]
layer.weights[key] -= self.v[key]
# 更新迭代次数
self.iterations += 1
class AdaGrad(object):
def __init__(self, weights, lr=0.01, epsilon=1e-6, decay=0):
"""
:param weights: 权重
:param lr: 学习率
:param epsilon: 平滑数
:param decay: 学习率衰减
"""
self.s = _copy_weights_to_zeros(weights) # 权重平方和累加量
self.iterations = 0 # 迭代次数
self.lr = self.init_lr = lr
self.epsilon = epsilon
self.decay = decay
def iterate(self, m: Model):
"""
迭代一次
:param m: 模型
:return:
"""
# 更新学习率
self.lr = self.init_lr / (1 + self.iterations * self.decay)
# 更新权重平方和累加量 和 梯度
for layer in m.layers:
for key in layer.weights.keys():
self.s[key] += np.square(layer.gradients[key])
layer.weights[key] -= self.lr * layer.gradients[key] / np.sqrt(self.s[key] + self.epsilon)
# 更新迭代次数
self.iterations += 1
class RmsProp(object):
def __init__(self, weights, gamma=0.9, lr=0.01, epsilon=1e-6, decay=0):
"""
:param weights: 权重
:param gamma: 指数
:param lr: 学习率
:param epsilon: 平滑数
:param decay: 学习率衰减
"""
self.s = _copy_weights_to_zeros(weights) # 权重平方和累加量
self.gamma = gamma
self.iterations = 0 # 迭代次数
self.lr = self.init_lr = lr
self.epsilon = epsilon
self.decay = decay
def iterate(self, m: Model):
"""
迭代一次
:param m: 模型
:return:
"""
# 更新学习率
self.lr = self.init_lr / (1 + self.iterations * self.decay)
# 更新权重平方和累加量 和 梯度
for layer in m.layers:
for key in layer.weights.keys():
self.s[key] = self.gamma * self.s[key] + (1 - self.gamma) * np.square(layer.gradients[key])
layer.weights[key] -= self.lr * layer.gradients[key] / np.sqrt(self.s[key] + self.epsilon)
# 更新迭代次数
self.iterations += 1