Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pytest #209

Merged
merged 7 commits into from
Jan 26, 2018
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ install:
pip install "chainer<3.0.0"
else
pip install "chainer>=3.0.0rc1"
pip install pytest-cov
fi
- pip install -r requirements.txt --only-binary=numpy,scipy
- pip install jupyter
Expand All @@ -30,7 +31,12 @@ script:
- flake8 chainerrl
- flake8 tests
- flake8 examples
- nosetests -a '!gpu,!slow' -x tests --with-coverage --cover-package chainerrl
- |
if [ $CHAINER_VERSION -eq 2 ]; then
nosetests -a '!gpu,!slow' -x tests --with-coverage --cover-package chainerrl
else
pytest -m "not gpu and not slow" -x tests --cov=chainerrl
fi
- ./test_examples.sh -1
- if [[ $TRAVIS_PYTHON_VERSION == 3.5.1 ]]; then jupyter nbconvert --to notebook --execute examples/quickstart/quickstart.ipynb --ExecutePreprocessor.timeout=600; fi
after_success:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
]

test_requires = [
'nose',
'pytest',
]

if sys.version_info < (3, 2):
Expand Down
53 changes: 53 additions & 0 deletions tests/agents_tests/basetest_agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
import os
import tempfile
import unittest

from chainer import testing

from chainerrl.envs.abc import ABC
from chainerrl.experiments.train_agent import train_agent


class _TestAgentInterface(unittest.TestCase):

def setUp(self):
self.env = ABC(discrete=self.discrete,
partially_observable=self.partially_observable,
episodic=self.episodic)

def create_agent(self, env):
raise NotImplementedError()

def test_save_load(self):
a = self.create_agent(self.env)
dirname = tempfile.mkdtemp()
a.save(dirname)
self.assertTrue(os.path.exists(dirname))
b = self.create_agent(self.env)
b.load(dirname)

def test_run_episode(self):
agent = self.create_agent(self.env)
done = False
obs = self.env.reset()
t = 0
while t < 10 and not done:
a = agent.act(obs)
obs, r, done, info = self.env.step(a)
t += 1

@testing.attr.slow
def test_train(self):
agent = self.create_agent(self.env)
train_agent(
agent=agent,
env=self.env,
steps=2000,
outdir=tempfile.mkdtemp(),
max_episode_len=10)
111 changes: 111 additions & 0 deletions tests/agents_tests/basetest_ddpg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from builtins import * # NOQA
from future import standard_library
standard_library.install_aliases()

from chainer import optimizers
import numpy as np

from chainerrl.agents.ddpg import DDPGModel
from chainerrl.envs.abc import ABC
from chainerrl.explorers.epsilon_greedy import LinearDecayEpsilonGreedy
from chainerrl.policy import FCBNDeterministicPolicy
from chainerrl.policy import FCLSTMDeterministicPolicy
from chainerrl.q_functions import FCBNLateActionSAQFunction
from chainerrl.q_functions import FCLSTMSAQFunction
from chainerrl import replay_buffer

from basetest_training import _TestTraining


class _TestDDPGOnABC(_TestTraining):

def make_agent(self, env, gpu):
model = self.make_model(env)
policy = model['policy']
q_func = model['q_function']

actor_opt = optimizers.Adam(alpha=1e-4)
actor_opt.setup(policy)

critic_opt = optimizers.Adam(alpha=1e-3)
critic_opt.setup(q_func)

explorer = self.make_explorer(env)
rbuf = self.make_replay_buffer(env)
return self.make_ddpg_agent(env=env, model=model,
actor_opt=actor_opt, critic_opt=critic_opt,
explorer=explorer, rbuf=rbuf, gpu=gpu)

def make_ddpg_agent(self, env, model, actor_opt, critic_opt, explorer,
rbuf, gpu):
raise NotImplementedError()

def make_explorer(self, env):
def random_action_func():
a = env.action_space.sample()
if isinstance(a, np.ndarray):
return a.astype(np.float32)
else:
return a
return LinearDecayEpsilonGreedy(1.0, 0.2, 1000, random_action_func)

def make_replay_buffer(self, env):
return replay_buffer.ReplayBuffer(10 ** 5)


class _TestDDPGOnContinuousPOABC(_TestDDPGOnABC):

def make_model(self, env):
n_dim_obs = env.observation_space.low.size
n_dim_action = env.action_space.low.size
n_hidden_channels = 50
policy = FCLSTMDeterministicPolicy(n_input_channels=n_dim_obs,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels,
action_size=n_dim_action,
min_action=env.action_space.low,
max_action=env.action_space.high,
bound_action=True)

q_func = FCLSTMSAQFunction(n_dim_obs=n_dim_obs,
n_dim_action=n_dim_action,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels)

return DDPGModel(policy=policy, q_func=q_func)

def make_env_and_successful_return(self, test):
return ABC(discrete=False, partially_observable=True,
deterministic=test), 1

def make_replay_buffer(self, env):
return replay_buffer.EpisodicReplayBuffer(10 ** 5)


class _TestDDPGOnContinuousABC(_TestDDPGOnABC):

def make_model(self, env):
n_dim_obs = env.observation_space.low.size
n_dim_action = env.action_space.low.size
n_hidden_channels = 50
policy = FCBNDeterministicPolicy(n_input_channels=n_dim_obs,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels,
action_size=n_dim_action,
min_action=env.action_space.low,
max_action=env.action_space.high,
bound_action=True)

q_func = FCBNLateActionSAQFunction(n_dim_obs=n_dim_obs,
n_dim_action=n_dim_action,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels)

return DDPGModel(policy=policy, q_func=q_func)

def make_env_and_successful_return(self, test):
return ABC(discrete=False, deterministic=test), 1
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from chainerrl import q_functions
from chainerrl import replay_buffer

from test_training import _TestTraining
from basetest_training import _TestTraining


class _TestDQNLike(_TestTraining):
Expand Down
120 changes: 120 additions & 0 deletions tests/agents_tests/basetest_pgt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from builtins import * # NOQA
from future import standard_library
standard_library.install_aliases()

import chainer
from chainer import functions as F
from chainer import links as L
from chainer import optimizers
import numpy as np

from chainerrl.envs.abc import ABC
from chainerrl.explorers.epsilon_greedy import LinearDecayEpsilonGreedy
from chainerrl.links import Sequence
from chainerrl import policies
from chainerrl import q_function
from chainerrl import replay_buffer

from basetest_training import _TestTraining


class _TestPGTOnABC(_TestTraining):

def make_agent(self, env, gpu):
model = self.make_model(env)
policy = model['policy']
q_func = model['q_function']

actor_opt = optimizers.Adam(alpha=1e-4)
actor_opt.setup(policy)

critic_opt = optimizers.Adam(alpha=1e-3)
critic_opt.setup(q_func)

explorer = self.make_explorer(env)
rbuf = self.make_replay_buffer(env)
return self.make_pgt_agent(env=env, model=model,
actor_opt=actor_opt, critic_opt=critic_opt,
explorer=explorer, rbuf=rbuf, gpu=gpu)

def make_pgt_agent(self, env, model, actor_opt, critic_opt, explorer,
rbuf, gpu):
raise NotImplementedError()

def make_explorer(self, env):
def random_action_func():
a = env.action_space.sample()
if isinstance(a, np.ndarray):
return a.astype(np.float32)
else:
return a
return LinearDecayEpsilonGreedy(1.0, 0.2, 1000, random_action_func)

def make_replay_buffer(self, env):
return replay_buffer.ReplayBuffer(10 ** 5)


class _TestPGTOnContinuousPOABC(_TestPGTOnABC):

def make_model(self, env):
n_dim_obs = env.observation_space.low.size
n_dim_action = env.action_space.low.size
n_hidden_channels = 50
policy = Sequence(
L.Linear(n_dim_obs, n_hidden_channels),
F.relu,
L.Linear(n_hidden_channels, n_hidden_channels),
F.relu,
L.LSTM(n_hidden_channels, n_hidden_channels),
policies.FCGaussianPolicy(
n_input_channels=n_hidden_channels,
action_size=n_dim_action,
min_action=env.action_space.low,
max_action=env.action_space.high)
)

q_func = q_function.FCLSTMSAQFunction(
n_dim_obs=n_dim_obs,
n_dim_action=n_dim_action,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels)

return chainer.Chain(policy=policy, q_function=q_func)

def make_env_and_successful_return(self, test):
return ABC(discrete=False, partially_observable=True,
deterministic=test), 1

def make_replay_buffer(self, env):
return replay_buffer.EpisodicReplayBuffer(10 ** 5)


class _TestPGTOnContinuousABC(_TestPGTOnABC):

def make_model(self, env):
n_dim_obs = env.observation_space.low.size
n_dim_action = env.action_space.low.size
n_hidden_channels = 50

policy = policies.FCGaussianPolicy(
n_input_channels=n_dim_obs,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels,
action_size=n_dim_action,
min_action=env.action_space.low,
max_action=env.action_space.high)

q_func = q_function.FCSAQFunction(
n_dim_obs=n_dim_obs,
n_dim_action=n_dim_action,
n_hidden_layers=2,
n_hidden_channels=n_hidden_channels)

return chainer.Chain(policy=policy, q_function=q_func)

def make_env_and_successful_return(self, test):
return ABC(discrete=False, deterministic=test), 1
Loading