forked from hill-a/stable-baselines
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_math_util.py
81 lines (62 loc) · 2.97 KB
/
test_math_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import tensorflow as tf
import numpy as np
from gym.spaces.box import Box
from stable_baselines.common.math_util import discount_with_boundaries, scale_action, unscale_action
def test_discount_with_boundaries():
"""
test the discount_with_boundaries function
"""
gamma = 0.9
rewards = np.array([1.0, 2.0, 3.0, 4.0], 'float32')
episode_starts = [1.0, 0.0, 0.0, 1.0]
discounted_rewards = discount_with_boundaries(rewards, episode_starts, gamma)
assert np.allclose(discounted_rewards, [1 + gamma * 2 + gamma ** 2 * 3, 2 + gamma * 3, 3, 4])
return
def test_scaling_action():
"""
test scaling of scalar, 1d and 2d vectors of finite non-NaN real numbers to and from tanh co-domain (per component)
"""
test_ranges = [(-1, 1), (-10, 10), (-10, 5), (-10, 0), (-10, -5), (0, 10), (5, 10)]
# scalars
for (range_low, range_high) in test_ranges:
check_scaled_actions_from_range(range_low, range_high, scalar=True)
# 1d vectors: wrapped scalars
for test_range in test_ranges:
check_scaled_actions_from_range(*test_range)
# 2d vectors: all combinations of ranges above
for (r1_low, r1_high) in test_ranges:
for (r2_low, r2_high) in test_ranges:
check_scaled_actions_from_range(np.array([r1_low, r2_low], dtype=np.float),
np.array([r1_high, r2_high], dtype=np.float))
def check_scaled_actions_from_range(low, high, scalar=False):
"""
helper method which creates dummy action space spanning between respective components of low and high
and then checks scaling to and from tanh co-domain for low, middle and high value from that action space
:param low: (np.ndarray), (int) or (float)
:param high: (np.ndarray), (int) or (float)
:param scalar: (bool) Whether consider scalar range or wrap it into 1d vector
"""
if scalar and (isinstance(low, float) or isinstance(low, int)):
ones = 1.
action_space = Box(low, high, shape=(1,))
else:
low = np.atleast_1d(low)
high = np.atleast_1d(high)
ones = np.ones_like(low)
action_space = Box(low, high)
mid = 0.5 * (low + high)
expected_mapping = [(low, -ones), (mid, 0. * ones), (high, ones)]
for (not_scaled, scaled) in expected_mapping:
assert np.allclose(scale_action(action_space, not_scaled), scaled)
assert np.allclose(unscale_action(action_space, scaled), not_scaled)
def test_batch_shape_invariant_to_scaling():
"""
test that scaling deals well with batches as tensors and numpy matrices in terms of shape
"""
action_space = Box(np.array([-10., -5., -1.]), np.array([10., 3., 2.]))
tensor = tf.constant(1., shape=[2, 3])
matrix = np.ones((2, 3))
assert scale_action(action_space, tensor).shape == (2, 3)
assert scale_action(action_space, matrix).shape == (2, 3)
assert unscale_action(action_space, tensor).shape == (2, 3)
assert unscale_action(action_space, matrix).shape == (2, 3)