-
Notifications
You must be signed in to change notification settings - Fork 223
/
Copy pathutils.py
103 lines (85 loc) · 3.81 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import torch.nn.functional as F
from torch import nn
import torch
import math
def evaluate_policy(env, agent, seed, turns = 3):
agent.q_net.eval() # 关闭NoisyNet的噪声
scores = 0
for j in range(turns):
s, info = env.reset(seed=seed)
done = False
while not done:
a = agent.select_action(s, evaluate=True) # choose action with e-greedy = 0.01
s_next, r, dw, tr, info = env.step(a) # dw(dead & win): terminated, tr: truncated
done = (dw or tr)
scores += r
s = s_next
agent.q_net.train()
return int(scores/turns)
#You can just ignore this funciton. Is not related to the RL.
def str2bool(v):
'''Fix the bool BUG for argparse: transfer string to bool'''
if isinstance(v, bool): return v
if v.lower() in ('yes', 'True','true','TRUE', 't', 'y', '1', 'T'): return True
elif v.lower() in ('no', 'False','false','FALSE', 'f', 'n', '0', 'F'): return False
else: print('Wrong Input Type!')
class LinearSchedule(object):
def __init__(self, schedule_timesteps, final_p, initial_p=1.0):
"""Linear interpolation between initial_p and final_p over
schedule_timesteps. After this many timesteps pass final_p is
returned.
Parameters
----------
schedule_timesteps: int
Number of timesteps for which to linearly anneal initial_p
to final_p
initial_p: float
initial output value
final_p: float
final output value
"""
self.schedule_timesteps = schedule_timesteps
self.final_p = final_p
self.initial_p = initial_p
def value(self, t):
fraction = min(float(t) / self.schedule_timesteps, 1.0)
return self.initial_p + fraction * (self.final_p - self.initial_p)
class NoisyLinear(nn.Module):
'''From https://github.com/Lizhi-sjtu/DRL-code-pytorch/blob/main/3.Rainbow_DQN/network.py'''
def __init__(self, in_features, out_features, sigma_init=0.5):
super(NoisyLinear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.sigma_init = sigma_init
self.weight_mu = nn.Parameter(torch.FloatTensor(out_features, in_features))
self.weight_sigma = nn.Parameter(torch.FloatTensor(out_features, in_features))
self.register_buffer('weight_epsilon', torch.FloatTensor(out_features, in_features))
self.bias_mu = nn.Parameter(torch.FloatTensor(out_features))
self.bias_sigma = nn.Parameter(torch.FloatTensor(out_features))
self.register_buffer('bias_epsilon', torch.FloatTensor(out_features))
self.reset_parameters() # for mu and sigma
self.reset_noise() # for epsilon
def forward(self, x):
if self.training:
self.reset_noise()
weight = self.weight_mu + self.weight_sigma.mul(self.weight_epsilon) # mul是对应元素相乘
bias = self.bias_mu + self.bias_sigma.mul(self.bias_epsilon)
else:
weight = self.weight_mu
bias = self.bias_mu
return F.linear(x, weight, bias)
def reset_parameters(self):
mu_range = 1 / math.sqrt(self.in_features)
self.weight_mu.data.uniform_(-mu_range, mu_range)
self.bias_mu.data.uniform_(-mu_range, mu_range)
self.weight_sigma.data.fill_(self.sigma_init / math.sqrt(self.in_features))
self.bias_sigma.data.fill_(self.sigma_init / math.sqrt(self.out_features))
def reset_noise(self):
epsilon_i = self.scale_noise(self.in_features)
epsilon_j = self.scale_noise(self.out_features)
self.weight_epsilon.copy_(torch.ger(epsilon_j, epsilon_i))
self.bias_epsilon.copy_(epsilon_j)
def scale_noise(self, size):
x = torch.randn(size)
x = x.sign().mul(x.abs().sqrt())
return x