Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docopt #38

Merged
merged 29 commits into from
Jan 7, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
new args, gpu oom
  • Loading branch information
jtatusko committed Jan 1, 2019
commit 2958faaba1c525fb650b22b93696c8e7cd20211c
33 changes: 0 additions & 33 deletions adept/environments/_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import abc
import json


class HasEnvMetaData(metaclass=abc.ABCMeta):
Expand All @@ -39,14 +38,6 @@ def gpu_preprocessor(self):


class EnvBase(HasEnvMetaData, metaclass=abc.ABCMeta):
@property
@abc.abstractmethod
def defaults(self):
"""
:return: Dictionary of defaults.
"""
raise NotImplementedError

@abc.abstractmethod
def step(self, action):
raise NotImplementedError
Expand All @@ -59,30 +50,6 @@ def reset(self, **kwargs):
def close(self):
raise NotImplementedError

def prompt(self):
"""
Display defaults as JSON, prompt user for changes.

:return: Dict[str, Any] Updated config dictionary.
"""
if not self.defaults:
return self.defaults

user_input = input(
'\n{} Defaults:\n{}\nPress ENTER to use defaults. Otherwise, '
'modify JSON keys then press ENTER.\n'.format(
self.__class__.__name__,
json.dumps(self.defaults, indent=2, sort_keys=True)
)
)

# use defaults if no changes specified
if user_input == '':
return self.defaults

updates = json.loads(user_input)
return {**self.defaults, **updates}


def reward_normalizer_by_env_id(env_id):
from adept.utils.normalizers import Clip, Scale
Expand Down
37 changes: 36 additions & 1 deletion adept/environments/_env_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,24 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import abc
import json

from adept.environments._env import EnvBase


class EnvPlugin(EnvBase, metaclass=abc.ABCMeta):
"""
Implement this class to add your custom environment.
Implement this class to add your custom environment. Don't forget to
implement defaults.
"""
defaults = None

@classmethod
def check_defaults(cls):
if cls.defaults is None:
raise NotImplementedError(
'Subclass must define class attribute: defaults'
)

def __init__(self, action_space, cpu_preprocessor, gpu_preprocessor):
"""
Expand Down Expand Up @@ -68,3 +78,28 @@ def cpu_preprocessor(self):
@property
def gpu_preprocessor(self):
return self._gpu_preprocessor

@classmethod
def prompt(cls):
"""
Display defaults as JSON, prompt user for changes.

:return: Dict[str, Any] Updated config dictionary.
"""
if not cls.defaults:
return cls.defaults

user_input = input(
'\n{} Defaults:\n{}\nPress ENTER to use defaults. Otherwise, '
'modify JSON keys then press ENTER.\n'.format(
cls.__name__,
json.dumps(cls.defaults, indent=2, sort_keys=True)
)
)

# use defaults if no changes specified
if user_input == '':
return cls.defaults

updates = json.loads(user_input)
return {**cls.defaults, **updates}
2 changes: 1 addition & 1 deletion adept/environments/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def from_args(cls, args, registry=EnvPluginRegistry()):
:param registry: Optionally provide to avoid recreating.
:return: EnvMetaData
"""
plugin_class = registry.lookup_env_class(args.env_id)
plugin_class = registry.lookup_env_class(args.env)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

module_class

return cls(plugin_class, args)

@property
Expand Down
4 changes: 3 additions & 1 deletion adept/environments/deepmind_sc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@


class AdeptSC2Env(EnvPlugin):
defaults = {}

def __init__(self, env):
self.sc2_env = env
self._max_num_actions = len(FUNCTIONS)
Expand Down Expand Up @@ -98,7 +100,7 @@ def from_args(
feature_screen=84, feature_minimap=84, action_space='FEATURES'
)
env = SC2Env(
map_name=args.env_id,
map_name=args.env,
step_mul=8,
game_steps_per_episode=0,
discount=0.99,
Expand Down
4 changes: 2 additions & 2 deletions adept/environments/managers/_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def from_args(
if nb_env is None:
nb_env = args.nb_env

engine = registry.lookup_engine(args.env_id)
env_class = registry.lookup_env_class(args.env_id)
engine = registry.lookup_engine(args.env)
env_class = registry.lookup_env_class(args.env)

env_fns = []
for i in range(nb_env):
Expand Down
20 changes: 15 additions & 5 deletions adept/environments/openai_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@


class AdeptGymEnv(EnvPlugin):
"""
Converts gym observations to dictionaries and reads actions from
dictionaries instead of numpy arrays. This allows the Gym Env to
communicate properly with an EnvManager.
"""
defaults = {
"max_episode_length": 10000,
"skip_rate": 4,
"noop_max": 30
}

def __init__(self, env, do_frame_stack):
# Define the preprocessing operations to be performed on observations
# CPU Ops
Expand Down Expand Up @@ -55,15 +66,14 @@ def __init__(self, env, do_frame_stack):
@classmethod
def from_args(cls, args, seed, **kwargs):
# TODO fix this hack
do_frame_stack = 'Linear' in args.network_body
env = gym.make(args.env_id)
do_frame_stack = 'Linear' in args.netbody
env = gym.make(args.env)
if hasattr(env.unwrapped, 'ale'):
if 'FIRE' in env.unwrapped.get_action_meanings():
env = FireResetEnv(env)
env = NoopResetEnv(env, noop_max=30)
env = NoopResetEnv(env, noop_max=args.noop_max)
env = EpisodicLifeEnv(env)
if 'NoFrameskip' in args.env_id:
assert 'NoFrameskip' in env.spec.id
if 'NoFrameskip' in args.env:
env._max_episode_steps = args.max_episode_length * \
args.skip_rate
env = MaxAndSkipEnv(env, skip=args.skip_rate)
Expand Down
3 changes: 3 additions & 0 deletions adept/registries/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from enum import IntEnum
from adept.environments._env_plugin import EnvPlugin


class Engines(IntEnum):
Expand Down Expand Up @@ -83,6 +84,8 @@ def __init__(self):

def register_env(self, engine_id, env_plugin_class, env_id_set):
# TODO assert no duplicate env_ids
assert issubclass(env_plugin_class, EnvPlugin)
env_plugin_class.check_defaults()
self.engine_ids_by_env_id_set[frozenset(env_id_set)] = engine_id
self.plugin_class_by_engine_id[engine_id] = env_plugin_class

Expand Down
2 changes: 1 addition & 1 deletion adept/scripts/benchmark_atari.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,5 @@
args.mode_name = 'Local'

for env_id in ATARI_6_ENVS:
args.env_id = env_id
args.env = env_id
main(args)
2 changes: 1 addition & 1 deletion adept/scripts/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def main(args, env_registry=EnvPluginRegistry()):
# construct agent
agent = make_agent(
network, device, env.gpu_preprocessor,
env_registry.lookup_engine(train_args.env_id), env.action_space,
env_registry.lookup_engine(train_args.env), env.action_space,
train_args
)
# container
Expand Down
24 changes: 12 additions & 12 deletions adept/scripts/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ def main(args, env_registry=EnvPluginRegistry()):
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
log_id = make_log_id_from_timestamp(
args.tag, args.mode_name, args.agent,
args.network_vision + args.network_body, timestamp
args.net3d + args.netbody, timestamp
)
log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)
log_id_dir = os.path.join(args.logdir, args.env, log_id)
os.makedirs(log_id_dir)
saver = SimpleModelSaver(log_id_dir)
print_ascii_logo()
Expand All @@ -60,9 +60,9 @@ def main(args, env_registry=EnvPluginRegistry()):
if rank != 0:
log_id = make_log_id_from_timestamp(
args.tag, args.mode_name, args.agent,
args.network_vision + args.network_body, timestamp
args.net3d + args.netbody, timestamp
)
log_id_dir = os.path.join(args.log_dir, args.env_id, log_id)
log_id_dir = os.path.join(args.logdir, args.env, log_id)

comm.Barrier()

Expand Down Expand Up @@ -130,7 +130,7 @@ def main(args, env_registry=EnvPluginRegistry()):
torch.backends.cudnn.benchmark = cudnn
agent = make_agent(
network, device, env.gpu_preprocessor,
env_registry.lookup_engine(args.env_id), env.action_space, args
env_registry.lookup_engine(args.env), env.action_space, args
)

# workers
Expand Down Expand Up @@ -181,7 +181,7 @@ def main(args, env_registry=EnvPluginRegistry()):
# Construct the optimizer
def make_optimizer(params):
opt = torch.optim.RMSprop(
params, lr=args.learning_rate, eps=1e-5, alpha=0.99
params, lr=args.lr, eps=1e-5, alpha=0.99
)
if args.load_optimizer:
opt.load_state_dict(
Expand All @@ -197,7 +197,7 @@ def make_optimizer(params):
comm,
make_optimizer,
summary_writer,
args.summary_frequency,
args.summary_freq,
saver,
args.epoch_len,
args.host_training_info_interval,
Expand All @@ -218,14 +218,14 @@ def make_optimizer(params):
container.run(
args.max_dynamic_batch,
args.max_queue_length,
args.max_train_steps,
args.nb_train_frame,
dynamic=True,
min_dynamic_batch=args.min_dynamic_batch
)
else:
container.run(
args.num_rollouts_in_batch, args.max_queue_length,
args.max_train_steps
args.nb_train_frame
)
profiler.stop()
print(profiler.output_text(unicode=True, color=True))
Expand All @@ -234,14 +234,14 @@ def make_optimizer(params):
container.run(
args.max_dynamic_batch,
args.max_queue_length,
args.max_train_steps,
args.nb_train_frame,
dynamic=True,
min_dynamic_batch=args.min_dynamic_batch
)
else:
container.run(
args.num_rollouts_in_batch, args.max_queue_length,
args.max_train_steps
args.nb_train_frame
)


Expand Down Expand Up @@ -321,7 +321,7 @@ def add_args(parser):

if args.debug:
args.nb_env = 3
args.log_dir = '/tmp/'
args.logdir = '/tmp/'

args.mode_name = 'IMPALA'
main(args)
Loading