Add option on max weight

chainer · muupan · Aug 31, 2018 · Jun 1, 2018 · Jun 4, 2018 · Jun 4, 2018
commit bcc0b215453aef177f05438f4b538392dae35acf
diff --git a/chainerrl/replay_buffer.py b/chainerrl/replay_buffer.py
@@ -166,14 +166,20 @@ def stop_current_episode(self):
 class PriorityWeightError(object):
     """For propotional prioritization
 
+    alpha determines how much prioritization is used.
+
+    beta determines how much importance sampling weights are used. beta is
+    scheduled by ``beta0`` and ``betasteps``.
+
     Args:
-        alpha (float): A hyperparameter that determines how much
-            prioritization is used
-        beta0, betasteps (float): Schedule of beta.  beta determines how much
-            importance sampling weights are used.
+        alpha (float): Exponent of errors to compute probabilities to sample
+        beta0 (float): Initial value of beta
+        betasteps (float): Steps to anneal beta to 1
         eps (float): To revisit a step after its error becomes near zero
-        normalize_by_max (bool): normalize weights by maximum priority
-            of a batch.
+        normalize_by_max (str): Method to normalize weights. ``'batch'`` or
+            ``True`` (default): divide by the maximum weight in the sampled
+            batch. ``'memory'``: divide by the maximum weight in the memory.
+            ``False``: do not normalize.
     """
 
     def __init__(self, alpha, beta0, betasteps, eps, normalize_by_max):
@@ -186,12 +192,18 @@ def __init__(self, alpha, beta0, betasteps, eps, normalize_by_max):
         else:
             self.beta_add = (1.0 - beta0) / betasteps
         self.eps = eps
+        if normalize_by_max is True:
+            normalize_by_max = 'batch'
+        assert normalize_by_max in [False, 'batch', 'memory']
         self.normalize_by_max = normalize_by_max
 
     def priority_from_errors(self, errors):
         return [d ** self.alpha + self.eps for d in errors]
 
     def weights_from_probabilities(self, probabilities, min_probability):
+        if self.normalize_by_max == 'batch':
+            # discard global min and compute batch min
+            min_probability = np.min(min_probability)
         if self.normalize_by_max:
             weights = [(p / min_probability) ** -self.beta
                        for p in probabilities]

diff --git a/tests/test_replay_buffer.py b/tests/test_replay_buffer.py
@@ -196,13 +196,16 @@ def test_save_and_load(self):
 @testing.parameterize(*testing.product(
     {
         'capacity': [100, None],
+        'normalize_by_max': ['batch', 'memory'],
     }
 ))
 class TestPrioritizedReplayBuffer(unittest.TestCase):
 
     def test_append_and_sample(self):
         capacity = self.capacity
-        rbuf = replay_buffer.PrioritizedReplayBuffer(capacity)
+        rbuf = replay_buffer.PrioritizedReplayBuffer(
+            capacity,
+            normalize_by_max=self.normalize_by_max)
 
         self.assertEqual(len(rbuf), 0)
 
@@ -317,13 +320,15 @@ def exp_return_of_episode(episode):
 @testing.parameterize(*(
     testing.product({
         'capacity': [100],
+        'normalize_by_max': ['batch', 'memory'],
         'wait_priority_after_sampling': [False],
         'default_priority_func': [exp_return_of_episode],
         'uniform_ratio': [0, 0.1, 1.0],
         'return_sample_weights': [True, False],
     }) +
     testing.product({
         'capacity': [100],
+        'normalize_by_max': ['batch', 'memory'],
         'wait_priority_after_sampling': [True],
         'default_priority_func': [None, exp_return_of_episode],
         'uniform_ratio': [0, 0.1, 1.0],
@@ -335,6 +340,7 @@ class TestPrioritizedEpisodicReplayBuffer(unittest.TestCase):
     def test_append_and_sample(self):
         rbuf = replay_buffer.PrioritizedEpisodicReplayBuffer(
             capacity=self.capacity,
+            normalize_by_max=self.normalize_by_max,
             default_priority_func=self.default_priority_func,
             uniform_ratio=self.uniform_ratio,
             wait_priority_after_sampling=self.wait_priority_after_sampling,