Skip to content

Commit

Permalink
Switch the main experiment to HyperState
Browse files Browse the repository at this point in the history
+ minor fixes
+ now possible to use CLI arguments for hyperparams and other options
  • Loading branch information
ntoxeg committed Sep 7, 2022
1 parent 2f2b78b commit cd91360
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 34 deletions.
3 changes: 1 addition & 2 deletions experiments/drunk_dwarf/extended_curriculum/drunk_dwarf.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def key_check(_, info) -> bool:
# f"<(<$obj --> [leftward]> &/ ^move_forwards &/ ^rotate_left) =/> <$obj --> [ahead]>>.",
# f"<(<$obj --> [rightward]> &/ ^move_forwards &/ ^rotate_right) =/> <$obj --> [ahead]>>.",
]
background_knowledge = []
background_knowledge: list[str] = []

key_goal_sym = "GOT_KEY"
reach_key = [f"<({ext('key')} --> [reached]) =/> {key_goal_sym}>."]
Expand All @@ -82,7 +82,6 @@ def key_check(_, info) -> bool:
ENV_NAME,
player_observer_type=gd.ObserverType.VECTOR,
level=0,
new_step_api=True,
)
env.enable_history(True) # type: ignore

Expand Down
72 changes: 43 additions & 29 deletions experiments/drunk_dwarf/main/drunk_dwarf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import argparse
import logging
import os
from functools import partial

import griddly # noqa
import gym
import hyperstate
import neptune.new as neptune
from griddly import gd
from icecream import ic
Expand All @@ -17,16 +18,9 @@
logging.basicConfig(filename="nars_drunk_dwarf.log", filemode="w", level=logging.DEBUG)
logger = logging.getLogger("nars")

NUM_EPISODES = 50
MAX_ITERATIONS = 100

ENV_NAME = "GDY-Drunk-Dwarf-v0"
MAIN_TAG = "main"
DIFFICULTY_LEVEL = 1

THINK_TICKS = 3
VIEW_RADIUS = 1
MOTOR_BABBLING = 0.2
DECISION_THRESHOLD = 0.6


def key_check(_, info) -> bool:
Expand All @@ -41,18 +35,38 @@ def key_check(_, info) -> bool:


if __name__ == "__main__":
try:
neprun = neptune.init(
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default=None, help="Path to config file")
parser.add_argument("--hps", nargs="+", help="Override hyperparameter value")
parser.add_argument(
"--log_neptune",
action="store_true",
default=False,
help="Enable logging to Neptune",
)
parser.add_argument(
"--log_tensorboard",
action="store_true",
default=False,
help="Enable logging to TensorBoard",
)
args = parser.parse_args()
config = hyperstate.load(Config, file=args.config, overrides=args.hps)
logger.info("Run configuration: %s", config)

neprun = (
neptune.init(
project=os.environ["NEPTUNE_PROJECT"],
tags=[ENV_NAME, MAIN_TAG, f"difficulty:{DIFFICULTY_LEVEL}"],
tags=[ENV_NAME, MAIN_TAG, f"difficulty:{config.difficulty_level}"],
)
except KeyError:
neprun = None
if args.log_neptune
else None
)

env = gym.make(
ENV_NAME,
player_observer_type=gd.ObserverType.VECTOR,
level=DIFFICULTY_LEVEL - 1,
level=config.difficulty_level - 1,
)
env.enable_history(True) # type: ignore

Expand All @@ -79,11 +93,11 @@ def key_check(_, info) -> bool:

agent = DrunkDwarfAgent(
env,
think_ticks=THINK_TICKS,
view_radius=VIEW_RADIUS,
think_ticks=config.agent.think_ticks,
view_radius=config.agent.view_radius,
background_knowledge=background_knowledge,
motor_babbling=MOTOR_BABBLING,
decision_threshold=DECISION_THRESHOLD,
motor_babbling=config.nars.motor_babbling,
decision_threshold=config.nars.decision_threshold,
)

KEY_GOAL = Goal(
Expand Down Expand Up @@ -116,12 +130,12 @@ def key_check(_, info) -> bool:
if neprun is not None:
neprun["parameters"] = {
"goals": [g.symbol for g in goals],
"think_ticks": THINK_TICKS,
"view_radius": agent.view_radius,
"num_episodes": NUM_EPISODES,
"max_iterations": MAX_ITERATIONS,
"motor_babbling": MOTOR_BABBLING,
"decision_threshold": DECISION_THRESHOLD,
"think_ticks": config.agent.think_ticks,
"view_radius": config.agent.view_radius,
"num_episodes": config.num_episodes,
"max_iterations": config.max_steps,
"motor_babbling": config.nars.motor_babbling,
"decision_threshold": config.nars.decision_threshold,
}

def nep_ep_callback(run_info: dict):
Expand All @@ -138,10 +152,10 @@ def nep_run_callback(run_info: dict):

# Run the agent
runner.run(
NUM_EPISODES,
MAX_ITERATIONS,
log_tb=True,
tb_comment_suffix=f"drunk_dwarf-{MAIN_TAG}:{DIFFICULTY_LEVEL}",
config.num_episodes,
config.max_steps,
log_tb=args.log_tensorboard,
tb_comment_suffix=f"drunk_dwarf-{MAIN_TAG}:{config.difficulty_level}",
callbacks=callbacks,
)
if neprun is not None:
Expand Down
2 changes: 1 addition & 1 deletion narca/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def reset(self, level_string: Optional[str] = None):
else:
return self.env.reset(level_string=level_string) # type: ignore

def step(self) -> tuple[Any, float, float, bool, Any]:
def step(self, observation: np.ndarray) -> tuple[Any, float, float, bool, Any]:
actions = self.plan()
obs = []
reward = 0.0
Expand Down
4 changes: 2 additions & 2 deletions narca/drunk_dwarf.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def where_obj_type(obj_type: str, obs: np.ndarray) -> list[tuple[int, int]]:
return list(zip(xs, ys))

# no threats in the environment, can assume avatar exists.
avatar_loc = where_obj_type(__class__.AVATAR_LABEL, observation)[0]
avatar_loc = where_obj_type(self.__class__.AVATAR_LABEL, observation)[0]

def obj_label(obj_type: str, obloc: tuple[int, int]) -> Optional[str]:
x, y = obloc
Expand All @@ -212,7 +212,7 @@ def obj_label(obj_type: str, obloc: tuple[int, int]) -> Optional[str]:
visible_objects = {
obj_type: where_obj_type(obj_type, observation)
for obj_type in self.obj_names
if obj_type != __class__.AVATAR_LABEL
if obj_type != self.__class__.AVATAR_LABEL
}
obj_type_labels = [
[obj_label(obj_type, obloc) for obloc in visible_objects[obj_type]]
Expand Down

0 comments on commit cd91360

Please sign in to comment.