Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIFO prioritized replay buffer #277

Merged
merged 7 commits into from
Aug 31, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Add option on max weight
  • Loading branch information
toslunar committed Jul 3, 2018
commit bcc0b215453aef177f05438f4b538392dae35acf
24 changes: 18 additions & 6 deletions chainerrl/replay_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,20 @@ def stop_current_episode(self):
class PriorityWeightError(object):
"""For propotional prioritization

alpha determines how much prioritization is used.

beta determines how much importance sampling weights are used. beta is
scheduled by ``beta0`` and ``betasteps``.

Args:
alpha (float): A hyperparameter that determines how much
prioritization is used
beta0, betasteps (float): Schedule of beta. beta determines how much
importance sampling weights are used.
alpha (float): Exponent of errors to compute probabilities to sample
beta0 (float): Initial value of beta
betasteps (float): Steps to anneal beta to 1
eps (float): To revisit a step after its error becomes near zero
normalize_by_max (bool): normalize weights by maximum priority
of a batch.
normalize_by_max (str): Method to normalize weights. ``'batch'`` or
``True`` (default): divide by the maximum weight in the sampled
batch. ``'memory'``: divide by the maximum weight in the memory.
``False``: do not normalize.
"""

def __init__(self, alpha, beta0, betasteps, eps, normalize_by_max):
Expand All @@ -186,12 +192,18 @@ def __init__(self, alpha, beta0, betasteps, eps, normalize_by_max):
else:
self.beta_add = (1.0 - beta0) / betasteps
self.eps = eps
if normalize_by_max is True:
normalize_by_max = 'batch'
assert normalize_by_max in [False, 'batch', 'memory']
self.normalize_by_max = normalize_by_max

def priority_from_errors(self, errors):
return [d ** self.alpha + self.eps for d in errors]

def weights_from_probabilities(self, probabilities, min_probability):
if self.normalize_by_max == 'batch':
# discard global min and compute batch min
min_probability = np.min(min_probability)
if self.normalize_by_max:
weights = [(p / min_probability) ** -self.beta
for p in probabilities]
Expand Down
8 changes: 7 additions & 1 deletion tests/test_replay_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,16 @@ def test_save_and_load(self):
@testing.parameterize(*testing.product(
{
'capacity': [100, None],
'normalize_by_max': ['batch', 'memory'],
}
))
class TestPrioritizedReplayBuffer(unittest.TestCase):

def test_append_and_sample(self):
capacity = self.capacity
rbuf = replay_buffer.PrioritizedReplayBuffer(capacity)
rbuf = replay_buffer.PrioritizedReplayBuffer(
capacity,
normalize_by_max=self.normalize_by_max)

self.assertEqual(len(rbuf), 0)

Expand Down Expand Up @@ -317,13 +320,15 @@ def exp_return_of_episode(episode):
@testing.parameterize(*(
testing.product({
'capacity': [100],
'normalize_by_max': ['batch', 'memory'],
'wait_priority_after_sampling': [False],
'default_priority_func': [exp_return_of_episode],
'uniform_ratio': [0, 0.1, 1.0],
'return_sample_weights': [True, False],
}) +
testing.product({
'capacity': [100],
'normalize_by_max': ['batch', 'memory'],
'wait_priority_after_sampling': [True],
'default_priority_func': [None, exp_return_of_episode],
'uniform_ratio': [0, 0.1, 1.0],
Expand All @@ -335,6 +340,7 @@ class TestPrioritizedEpisodicReplayBuffer(unittest.TestCase):
def test_append_and_sample(self):
rbuf = replay_buffer.PrioritizedEpisodicReplayBuffer(
capacity=self.capacity,
normalize_by_max=self.normalize_by_max,
default_priority_func=self.default_priority_func,
uniform_ratio=self.uniform_ratio,
wait_priority_after_sampling=self.wait_priority_after_sampling,
Expand Down