Switch the main experiment to HyperState

+ minor fixes + now possible to use CLI arguments for hyperparams and other options
ntoxeg · Sep 7, 2022 · cd91360 · cd91360
1 parent 2f2b78b
commit cd91360
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 34 deletions.
diff --git a/experiments/drunk_dwarf/extended_curriculum/drunk_dwarf.py b/experiments/drunk_dwarf/extended_curriculum/drunk_dwarf.py
@@ -62,7 +62,7 @@ def key_check(_, info) -> bool:
         # f"<(<$obj --> [leftward]> &/ ^move_forwards &/ ^rotate_left) =/> <$obj --> [ahead]>>.",
         # f"<(<$obj --> [rightward]> &/ ^move_forwards &/ ^rotate_right) =/> <$obj --> [ahead]>>.",
     ]
-    background_knowledge = []
+    background_knowledge: list[str] = []
 
     key_goal_sym = "GOT_KEY"
     reach_key = [f"<({ext('key')} --> [reached]) =/> {key_goal_sym}>."]
@@ -82,7 +82,6 @@ def key_check(_, info) -> bool:
         ENV_NAME,
         player_observer_type=gd.ObserverType.VECTOR,
         level=0,
-        new_step_api=True,
     )
     env.enable_history(True)  # type: ignore
 

diff --git a/experiments/drunk_dwarf/main/drunk_dwarf.py b/experiments/drunk_dwarf/main/drunk_dwarf.py
@@ -1,9 +1,10 @@
+import argparse
 import logging
 import os
 from functools import partial
 
-import griddly  # noqa
 import gym
+import hyperstate
 import neptune.new as neptune
 from griddly import gd
 from icecream import ic
@@ -17,16 +18,9 @@
 logging.basicConfig(filename="nars_drunk_dwarf.log", filemode="w", level=logging.DEBUG)
 logger = logging.getLogger("nars")
 
-NUM_EPISODES = 50
-MAX_ITERATIONS = 100
+
 ENV_NAME = "GDY-Drunk-Dwarf-v0"
 MAIN_TAG = "main"
-DIFFICULTY_LEVEL = 1
-
-THINK_TICKS = 3
-VIEW_RADIUS = 1
-MOTOR_BABBLING = 0.2
-DECISION_THRESHOLD = 0.6
 
 
 def key_check(_, info) -> bool:
@@ -41,18 +35,38 @@ def key_check(_, info) -> bool:
 
 
 if __name__ == "__main__":
-    try:
-        neprun = neptune.init(
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, default=None, help="Path to config file")
+    parser.add_argument("--hps", nargs="+", help="Override hyperparameter value")
+    parser.add_argument(
+        "--log_neptune",
+        action="store_true",
+        default=False,
+        help="Enable logging to Neptune",
+    )
+    parser.add_argument(
+        "--log_tensorboard",
+        action="store_true",
+        default=False,
+        help="Enable logging to TensorBoard",
+    )
+    args = parser.parse_args()
+    config = hyperstate.load(Config, file=args.config, overrides=args.hps)
+    logger.info("Run configuration: %s", config)
+
+    neprun = (
+        neptune.init(
             project=os.environ["NEPTUNE_PROJECT"],
-            tags=[ENV_NAME, MAIN_TAG, f"difficulty:{DIFFICULTY_LEVEL}"],
+            tags=[ENV_NAME, MAIN_TAG, f"difficulty:{config.difficulty_level}"],
         )
-    except KeyError:
-        neprun = None
+        if args.log_neptune
+        else None
+    )
 
     env = gym.make(
         ENV_NAME,
         player_observer_type=gd.ObserverType.VECTOR,
-        level=DIFFICULTY_LEVEL - 1,
+        level=config.difficulty_level - 1,
     )
     env.enable_history(True)  # type: ignore
 
@@ -79,11 +93,11 @@ def key_check(_, info) -> bool:
 
     agent = DrunkDwarfAgent(
         env,
-        think_ticks=THINK_TICKS,
-        view_radius=VIEW_RADIUS,
+        think_ticks=config.agent.think_ticks,
+        view_radius=config.agent.view_radius,
         background_knowledge=background_knowledge,
-        motor_babbling=MOTOR_BABBLING,
-        decision_threshold=DECISION_THRESHOLD,
+        motor_babbling=config.nars.motor_babbling,
+        decision_threshold=config.nars.decision_threshold,
     )
 
     KEY_GOAL = Goal(
@@ -116,12 +130,12 @@ def key_check(_, info) -> bool:
     if neprun is not None:
         neprun["parameters"] = {
             "goals": [g.symbol for g in goals],
-            "think_ticks": THINK_TICKS,
-            "view_radius": agent.view_radius,
-            "num_episodes": NUM_EPISODES,
-            "max_iterations": MAX_ITERATIONS,
-            "motor_babbling": MOTOR_BABBLING,
-            "decision_threshold": DECISION_THRESHOLD,
+            "think_ticks": config.agent.think_ticks,
+            "view_radius": config.agent.view_radius,
+            "num_episodes": config.num_episodes,
+            "max_iterations": config.max_steps,
+            "motor_babbling": config.nars.motor_babbling,
+            "decision_threshold": config.nars.decision_threshold,
         }
 
         def nep_ep_callback(run_info: dict):
@@ -138,10 +152,10 @@ def nep_run_callback(run_info: dict):
 
     # Run the agent
     runner.run(
-        NUM_EPISODES,
-        MAX_ITERATIONS,
-        log_tb=True,
-        tb_comment_suffix=f"drunk_dwarf-{MAIN_TAG}:{DIFFICULTY_LEVEL}",
+        config.num_episodes,
+        config.max_steps,
+        log_tb=args.log_tensorboard,
+        tb_comment_suffix=f"drunk_dwarf-{MAIN_TAG}:{config.difficulty_level}",
         callbacks=callbacks,
     )
     if neprun is not None:

diff --git a/narca/agent.py b/narca/agent.py
@@ -29,7 +29,7 @@ def reset(self, level_string: Optional[str] = None):
         else:
             return self.env.reset(level_string=level_string)  # type: ignore
 
-    def step(self) -> tuple[Any, float, float, bool, Any]:
+    def step(self, observation: np.ndarray) -> tuple[Any, float, float, bool, Any]:
         actions = self.plan()
         obs = []
         reward = 0.0

diff --git a/narca/drunk_dwarf.py b/narca/drunk_dwarf.py
@@ -194,7 +194,7 @@ def where_obj_type(obj_type: str, obs: np.ndarray) -> list[tuple[int, int]]:
             return list(zip(xs, ys))
 
         # no threats in the environment, can assume avatar exists.
-        avatar_loc = where_obj_type(__class__.AVATAR_LABEL, observation)[0]
+        avatar_loc = where_obj_type(self.__class__.AVATAR_LABEL, observation)[0]
 
         def obj_label(obj_type: str, obloc: tuple[int, int]) -> Optional[str]:
             x, y = obloc
@@ -212,7 +212,7 @@ def obj_label(obj_type: str, obloc: tuple[int, int]) -> Optional[str]:
         visible_objects = {
             obj_type: where_obj_type(obj_type, observation)
             for obj_type in self.obj_names
-            if obj_type != __class__.AVATAR_LABEL
+            if obj_type != self.__class__.AVATAR_LABEL
         }
         obj_type_labels = [
             [obj_label(obj_type, obloc) for obloc in visible_objects[obj_type]]