Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements Time-based evaluations #367

Merged
merged 45 commits into from
Jan 8, 2019
Merged
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
d09407e
preliminary progress on implementing time based evaluations
prabhatnagarajan Nov 27, 2018
9e936b3
adds some functionality to time based eval
prabhatnagarajan Dec 10, 2018
82f70ba
Merge branch 'master' into time_based_eval
prabhatnagarajan Dec 10, 2018
8c06d7a
modifies batch_eval to support steps, unfinished
prabhatnagarajan Dec 10, 2018
639991a
modifies calls to eval_performance
prabhatnagarajan Dec 10, 2018
235b89d
adds a comment for docs
prabhatnagarajan Dec 10, 2018
47edb15
changes arguments in examples
prabhatnagarajan Dec 12, 2018
e6b422f
resolves merge conflicts
prabhatnagarajan Dec 12, 2018
391e034
fixes misuse of completed_episode
prabhatnagarajan Dec 12, 2018
5672bf9
joins the step-based and episode-based evaluation into one
prabhatnagarajan Dec 12, 2018
5e5b138
address flakes
prabhatnagarajan Dec 12, 2018
dc37582
makes gym examples compatible with new evaluation
prabhatnagarajan Dec 12, 2018
0321632
makes minor fixes to tests and examples
prabhatnagarajan Dec 13, 2018
c96497f
fixes additional errors
prabhatnagarajan Dec 13, 2018
60f9d7c
fixes bugs in evaluation code
prabhatnagarajan Dec 17, 2018
c9ba47b
minor changes
prabhatnagarajan Dec 17, 2018
fe301a6
Merge branch 'master' into time_based_eval
prabhatnagarajan Dec 17, 2018
9e3cb48
makes changes to train_with_eval in several Gym-based agents
prabhatnagarajan Dec 17, 2018
0c0d27e
changes batch train with eval calls
prabhatnagarajan Dec 17, 2018
db9aa1e
additional fixes to tests and examples
prabhatnagarajan Dec 18, 2018
d6087f1
Merge branch 'master' into time_based_eval
prabhatnagarajan Dec 18, 2018
3c13db2
adds fixes to tests and quickstart
prabhatnagarajan Dec 18, 2018
e66ef78
rewrites run_evaluation_episodes to be consistent with tests
prabhatnagarajan Dec 18, 2018
8ff0541
makes batch_run_eval episodes compatible with batch eval test
prabhatnagarajan Dec 19, 2018
150cc03
Merge branch 'master' of https://github.com/prabhatnagarajan/chainerrl
prabhatnagarajan Dec 19, 2018
30f80bf
resolves local merge conflicts
prabhatnagarajan Dec 19, 2018
52f5d87
addresses flake errors
prabhatnagarajan Dec 19, 2018
308b641
adds some tests for the new timestep-based-evaluation protocols
prabhatnagarajan Dec 19, 2018
761f0c4
adds batch tests and does some renaming in order to pass flakes
prabhatnagarajan Dec 25, 2018
0459483
Merge remote-tracking branch 'upstream/master'
prabhatnagarajan Dec 25, 2018
464c7aa
Merge branch 'master' into time_based_eval
prabhatnagarajan Dec 25, 2018
6e03607
makes grasping example compatible with new eval
prabhatnagarajan Dec 25, 2018
af52482
removes noisy nets from basic dqn example
prabhatnagarajan Dec 25, 2018
caeade1
applies autopep
prabhatnagarajan Dec 25, 2018
def7ff6
renames a variable in batch train and eval
prabhatnagarajan Dec 25, 2018
64dc95f
fixes bug
prabhatnagarajan Dec 25, 2018
c35cce9
renames variables in test_evaluator
prabhatnagarajan Dec 26, 2018
e3c4457
tests timesteps with evaluate if necessary
prabhatnagarajan Dec 26, 2018
7ffad8f
fixes redundant logic
prabhatnagarajan Dec 26, 2018
c068816
applies autpep
prabhatnagarajan Dec 26, 2018
98088b4
refactors evaluation code to be more efficient
prabhatnagarajan Jan 7, 2019
87fa084
cleans up print statements from debugging
prabhatnagarajan Jan 7, 2019
707197e
removes more print statements
prabhatnagarajan Jan 7, 2019
f6ac362
refactors testing classes
prabhatnagarajan Jan 7, 2019
696c687
applies autopep
prabhatnagarajan Jan 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
adds batch tests and does some renaming in order to pass flakes
  • Loading branch information
prabhatnagarajan committed Dec 25, 2018
commit 761f0c46218ab09d3d72353f37311a807f272374
116 changes: 95 additions & 21 deletions tests/experiments_tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import mock

import chainerrl
from chainerrl.experiments import evaluator


@testing.parameterize(
Expand All @@ -34,7 +35,7 @@ def test_evaluate_if_necessary(self):
env.reset.return_value = 'obs'
env.step.return_value = ('obs', 0, True, {})

evaluator = chainerrl.experiments.evaluator.Evaluator(
agent_evaluator = evaluator.Evaluator(
agent=agent,
env=env,
n_steps=None,
Expand All @@ -46,14 +47,14 @@ def test_evaluate_if_necessary(self):
save_best_so_far_agent=self.save_best_so_far_agent,
)

evaluator.evaluate_if_necessary(t=1, episodes=1)
agent_evaluator.evaluate_if_necessary(t=1, episodes=1)
self.assertEqual(agent.act.call_count, 0)

evaluator.evaluate_if_necessary(t=2, episodes=2)
agent_evaluator.evaluate_if_necessary(t=2, episodes=2)
self.assertEqual(agent.act.call_count, 0)

# First evaluation
evaluator.evaluate_if_necessary(t=3, episodes=3)
agent_evaluator.evaluate_if_necessary(t=3, episodes=3)
self.assertEqual(agent.act.call_count, self.n_runs)
self.assertEqual(agent.stop_episode.call_count, self.n_runs)
if self.save_best_so_far_agent:
Expand All @@ -62,7 +63,7 @@ def test_evaluate_if_necessary(self):
self.assertEqual(agent.save.call_count, 0)

# Second evaluation with the same score
evaluator.evaluate_if_necessary(t=6, episodes=6)
agent_evaluator.evaluate_if_necessary(t=6, episodes=6)
self.assertEqual(agent.act.call_count, 2 * self.n_runs)
self.assertEqual(agent.stop_episode.call_count, 2 * self.n_runs)
if self.save_best_so_far_agent:
Expand All @@ -72,7 +73,7 @@ def test_evaluate_if_necessary(self):

# Third evaluation with a better score
env.step.return_value = ('obs', 1, True, {})
evaluator.evaluate_if_necessary(t=9, episodes=9)
agent_evaluator.evaluate_if_necessary(t=9, episodes=9)
self.assertEqual(agent.act.call_count, 3 * self.n_runs)
self.assertEqual(agent.stop_episode.call_count, 3 * self.n_runs)
if self.save_best_so_far_agent:
Expand Down Expand Up @@ -100,7 +101,7 @@ def test_evaluate_if_necessary(self):
env.reset.return_value = 'obs'
env.step.return_value = ('obs', 0, True, {})

evaluator = chainerrl.experiments.evaluator.AsyncEvaluator(
agent_evaluator = evaluator.AsyncEvaluator(
n_runs=self.n_runs,
eval_interval=3,
outdir=outdir,
Expand All @@ -109,14 +110,17 @@ def test_evaluate_if_necessary(self):
save_best_so_far_agent=self.save_best_so_far_agent,
)

evaluator.evaluate_if_necessary(t=1, episodes=1, env=env, agent=agent)
agent_evaluator.evaluate_if_necessary(
t=1, episodes=1, env=env, agent=agent)
self.assertEqual(agent.act.call_count, 0)

evaluator.evaluate_if_necessary(t=2, episodes=2, env=env, agent=agent)
agent_evaluator.evaluate_if_necessary(
t=2, episodes=2, env=env, agent=agent)
self.assertEqual(agent.act.call_count, 0)

# First evaluation
evaluator.evaluate_if_necessary(t=3, episodes=3, env=env, agent=agent)
agent_evaluator.evaluate_if_necessary(
t=3, episodes=3, env=env, agent=agent)
self.assertEqual(agent.act.call_count, self.n_runs)
self.assertEqual(agent.stop_episode.call_count, self.n_runs)
if self.save_best_so_far_agent:
Expand All @@ -125,7 +129,8 @@ def test_evaluate_if_necessary(self):
self.assertEqual(agent.save.call_count, 0)

# Second evaluation with the same score
evaluator.evaluate_if_necessary(t=6, episodes=6, env=env, agent=agent)
agent_evaluator.evaluate_if_necessary(
t=6, episodes=6, env=env, agent=agent)
self.assertEqual(agent.act.call_count, 2 * self.n_runs)
self.assertEqual(agent.stop_episode.call_count, 2 * self.n_runs)
if self.save_best_so_far_agent:
Expand All @@ -135,14 +140,16 @@ def test_evaluate_if_necessary(self):

# Third evaluation with a better score
env.step.return_value = ('obs', 1, True, {})
evaluator.evaluate_if_necessary(t=9, episodes=9, env=env, agent=agent)
agent_evaluator.evaluate_if_necessary(
t=9, episodes=9, env=env, agent=agent)
self.assertEqual(agent.act.call_count, 3 * self.n_runs)
self.assertEqual(agent.stop_episode.call_count, 3 * self.n_runs)
if self.save_best_so_far_agent:
self.assertEqual(agent.save.call_count, 2)
else:
self.assertEqual(agent.save.call_count, 0)


@testing.parameterize(
*testing.product({
'n_episodes': [None, 1],
Expand All @@ -167,13 +174,13 @@ def test_timesteps(self):
]

if self.n_episodes:
with self.assertRaises(AssertionError) as _:
scores = chainerrl.experiments.evaluator.run_evaluation_episodes(
env, agent,
n_steps=self.n_timesteps,
n_episodes=self.n_episodes)
with self.assertRaises(AssertionError):
scores = evaluator.run_evaluation_episodes(
env, agent,
n_steps=self.n_timesteps,
n_episodes=self.n_episodes)
else:
scores = chainerrl.experiments.evaluator.run_evaluation_episodes(
scores = evaluator.run_evaluation_episodes(
env, agent,
n_steps=self.n_timesteps,
n_episodes=self.n_episodes)
Expand All @@ -188,7 +195,6 @@ def test_timesteps(self):
self.assertAlmostEqual(scores[0], 0.6)
self.assertAlmostEqual(scores[1], 0.5)


def test_needs_reset(self):
agent = mock.Mock()
env = mock.Mock()
Expand All @@ -203,15 +209,83 @@ def test_needs_reset(self):
(('state', 6), 0, False, {}),
(('state', 7), 1, True, {}),
]
scores = chainerrl.experiments.evaluator.run_evaluation_episodes(
scores = evaluator.run_evaluation_episodes(
env, agent, n_steps=None, n_episodes=2)
self.assertAlmostEqual(len(scores), 2)
self.assertAlmostEqual(scores[0], 0)
self.assertAlmostEqual(scores[1], 0.5)


@testing.parameterize(
*testing.product({
'n_episodes': [None, 1],
'n_timesteps': [2, 5, 6],
})
)
muupan marked this conversation as resolved.
Show resolved Hide resolved
class TestBatchRunEvaluationEpisode(unittest.TestCase):

def test_timesteps(self):
agent = mock.Mock()
agent.batch_act.side_effect = [[1, 1]] * 5

def make_env(idx):
env = mock.Mock()
if idx == 0:
# First episode: 0 -> 1 -> 2 -> 3 (reset)
# Second episode: 4 -> 5 -> 6 -> 7 (done)
env.reset.side_effect = [('state', 0), ('state', 4)]
env.step.side_effect = [
(('state', 1), 0, False, {}),
(('state', 2), 0.1, False, {}),
(('state', 3), 0.2, False, {'needs_reset': True}),
(('state', 5), -0.5, False, {}),
(('state', 6), 0, False, {}),
(('state', 7), 1, True, {}),
]
else:
# First episode: 0 -> 1 (reset)
# Second episode: 2 -> 3 (reset)
# Third episode: 4 -> 5 -> 6 -> 7 (done)
env.reset.side_effect = [
('state', 0), ('state', 2), ('state', 4)]
env.step.side_effect = [
(('state', 1), 2, False, {'needs_reset': True}),
(('state', 3), 3, False, {'needs_reset': True}),
(('state', 5), -0.6, False, {}),
(('state', 6), 0, False, {}),
(('state', 7), 1, True, {}),
]
return env

vec_env = chainerrl.envs.SerialVectorEnv(
[make_env(i) for i in range(2)])
if self.n_episodes:
with self.assertRaises(AssertionError):
scores = evaluator.batch_run_evaluation_episodes(
vec_env, agent,
n_steps=self.n_timesteps,
n_episodes=self.n_episodes)
else:
# First Env: [1 2 (3_a) 5 6 (7_a)]
# Second Env: [(1)(3_b) 5 6 (7_b)]
scores = evaluator.batch_run_evaluation_episodes(
vec_env, agent,
n_steps=self.n_timesteps,
n_episodes=self.n_episodes)
if self.n_timesteps == 2:
self.assertAlmostEqual(len(scores), 1)
self.assertAlmostEqual(scores[0], 0.1)
elif self.n_timesteps == 5:
self.assertAlmostEqual(len(scores), 3)
self.assertAlmostEqual(scores[0], 0.3)
self.assertAlmostEqual(scores[1], 2.0)
self.assertAlmostEqual(scores[2], 3.0)
else:
self.assertAlmostEqual(len(scores), 3)
self.assertAlmostEqual(scores[0], 0.3)
self.assertAlmostEqual(scores[1], 2.0)
self.assertAlmostEqual(scores[2], 3.0)

def test_needs_reset(self):
agent = mock.Mock()
agent.batch_act.side_effect = [[1, 1]] * 5
Expand Down Expand Up @@ -251,7 +325,7 @@ def make_env(idx):
# First Env: [1 2 (3_a) 5 6 (7_a)]
# Second Env: [(1) (3_b) 5 6 (7_b)]
# Results: (1), (3a), (3b), (7b)
scores = chainerrl.experiments.evaluator.batch_run_evaluation_episodes(
scores = evaluator.batch_run_evaluation_episodes(
vec_env, agent, n_steps=None, n_episodes=4)
self.assertAlmostEqual(len(scores), 4)
self.assertAlmostEqual(scores[0], 0)
Expand Down