Skip to content

Commit

Permalink
Factor running the agent out into a class
Browse files Browse the repository at this point in the history
This also fixes the problem of goals not being checked correctly during demonstrations.
  • Loading branch information
ntoxeg committed May 31, 2022
1 parent 25bda0c commit 6c5c9bf
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 74 deletions.
57 changes: 11 additions & 46 deletions experiments/zelda/demonstrate/nars_zelda.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from narca.nar import *
from narca.utils import *
from narca.zelda import ZeldaAgent, ZeldaLevelGenerator, demo_goal
from narca.zelda import Runner, ZeldaAgent, ZeldaLevelGenerator

# setup a logger for nars output
logging.basicConfig(filename="nars_zelda.log", filemode="w", level=logging.DEBUG)
Expand Down Expand Up @@ -128,6 +128,7 @@ def key_check(_, info) -> bool:
think_ticks=10,
background_knowledge=background_knowledge,
)
runner = Runner(agent, goals, levelgen)

# DEMONSTRATE
for _ in range(NUM_DEMOS):
Expand All @@ -139,49 +140,13 @@ def key_check(_, info) -> bool:
"^move_forwards",
"^move_forwards",
]
agent.reset(level_string=levelgen.generate_for_plan(plan))
print("Demonstration: completing a level...")
demo_goal(COMPLETE_GOAL, agent, plan)

total_reward = 0.0
episode_reward = 0.0
tb_writer = SummaryWriter(comment="-nars-zelda-demonstrate")
done = False
# TRAINING LOOP
for episode in range(NUM_EPISODES):
agent.reset(level_string=levelgen.generate())

for i in range(MAX_ITERATIONS):
agent.observe(complete=i % 10 == 0)

obs, reward, cumr, done, info = agent.step()
episode_reward += cumr

env_state = agent.env.get_state() # type: ignore
env_state["reward"] = reward

satisfied_goals = [g.satisfied(env_state, info) for g in goals]
for g, sat in zip(goals, satisfied_goals):
if sat:
print(f"{g.symbol} satisfied.")
send_input(agent.process, nal_now(g.symbol))
get_raw_output(agent.process)

if g.symbol == key_goal_sym:
agent.has_key = True

env.render(observer="global") # type: ignore # Renders the entire environment
# sleep(1)

if done:
break

print(f"Episode {episode+1} finished with reward {episode_reward}.")
total_reward += episode_reward
tb_writer.add_scalar("train/episode_reward", episode_reward, episode)
tb_writer.add_scalar("train/total_reward", total_reward, episode)
episode_reward = 0.0
send_input(agent.process, nal_now("RESET"))

print(f"Average total reward per episode: {total_reward / NUM_EPISODES}.")
env.close() # Call explicitly to avoid exception on quit
runner.demo_goal(plan)

# Run the agent
runner.run(
NUM_EPISODES,
MAX_ITERATIONS,
log_tb=True,
comment_suffix="-demonstrate",
)
134 changes: 106 additions & 28 deletions narca/zelda.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import gym
import numpy as np
from griddly.util.rllib.environment.level_generator import LevelGenerator
from tensorboardX import SummaryWriter

from .agent import Agent
from .astar import pathfind
Expand Down Expand Up @@ -260,9 +261,14 @@ def narsify_from_state(env_state: dict[str, Any]) -> list[str]:
if obj["Name"] == "wall"
]

avatar = next(obj for obj in env_state["Objects"] if obj["Name"] == "avatar")
avatar_loc = f"<({ext('SELF')} * {loc(avatar['Location'])}) --> at>. :|:"
avatar_orient = f"<{ext('SELF')} --> [orient-{avatar['Orientation'].lower()}]>. :|:"
try:
avatar = next(obj for obj in env_state["Objects"] if obj["Name"] == "avatar")
avatar_loc = f"<({ext('SELF')} * {loc(avatar['Location'])}) --> at>. :|:"
avatar_orient = (
f"<{ext('SELF')} --> [orient-{avatar['Orientation'].lower()}]>. :|:"
)
except StopIteration:
return []
avatar_beliefs = [avatar_loc, avatar_orient]

object_beliefs = [
Expand All @@ -274,7 +280,6 @@ def narsify_from_state(env_state: dict[str, Any]) -> list[str]:
f"<({ext('wall' + str(i))} * {loc(pos)}) --> at>. :|:" for i, pos in walls
]

# NEXT: remove absolute position beliefs
return relative_beliefs(env_state)


Expand Down Expand Up @@ -561,32 +566,105 @@ def demo_reach_key(symbol: str, agent: ZeldaAgent) -> None:
demo_reach_key(symbol, agent)


def demo_goal(goal: Goal, agent: ZeldaAgent, plan: list[str]) -> None:
"""Demonstrate reaching the key"""
goals = [goal]
for action in plan:
send_input(agent.process, f"{action}. :|:")
gym_actions = agent.determine_actions(
{"executions": [{"operator": action, "arguments": []}]}
class Runner:
"""Functionality for running agent interaction episodes"""

def __init__(
self,
agent: ZeldaAgent,
goals: list[Goal],
levelgen: Optional[ZeldaLevelGenerator] = None,
):
self.agent = agent
self.goals = goals
self.levelgen = levelgen

def run(
self,
num_episodes: int,
max_iterations: int,
log_tb: bool = False,
comment_suffix: str = "",
) -> None:
"""Run agent interaction episodes"""
total_reward = 0.0
episode_reward = 0.0
done = False
tb_writer = (
SummaryWriter(comment=f"-nars-zelda{comment_suffix}") if log_tb else None
)
_, reward, done, info = agent.env.step(gym_actions[0])
agent.observe()

env_state = agent.env.get_state() # type: ignore
env_state["reward"] = reward
for episode in range(num_episodes):
lvl_str = self.levelgen.generate() if self.levelgen is not None else None
self.agent.reset(level_string=lvl_str)

satisfied_goals = [g.satisfied(env_state, info) for g in goals]
for g, sat in zip(goals, satisfied_goals):
if sat:
print(f"{g.symbol} satisfied.")
send_input(agent.process, nal_now(g.symbol))
get_raw_output(agent.process)
for i in range(max_iterations):
self.agent.observe(complete=i % 10 == 0)

if g.symbol == "GOT_KEY":
agent.has_key = True
_, reward, cumr, done, info = self.agent.step()
episode_reward += cumr

agent.env.render(observer="global") # type: ignore
sleep(1)
if done:
agent.reset() # TODO: track level string in agent
demo_goal(goal, agent, plan)
env_state = self.agent.env.get_state() # type: ignore
env_state["reward"] = reward

satisfied_goals = [g.satisfied(env_state, info) for g in self.goals]
for g, sat in zip(self.goals, satisfied_goals):
if sat:
print(f"{g.symbol} satisfied.")
send_input(self.agent.process, nal_now(g.symbol))
get_raw_output(self.agent.process)

if g.symbol == "GOT_KEY":
self.agent.has_key = True

self.agent.env.render(observer="global") # type: ignore # Renders the entire environment

if done:
break

print(f"Episode {episode+1} finished with reward {episode_reward}.")
total_reward += episode_reward
if tb_writer is not None:
tb_writer.add_scalar("train/episode_reward", episode_reward, episode)
tb_writer.add_scalar("train/total_reward", total_reward, episode)
episode_reward = 0.0
send_input(self.agent.process, nal_now("RESET"))

print(f"Average total reward per episode: {total_reward / num_episodes}.")
self.agent.env.close() # Call explicitly to avoid exception on quit

def demo_goal(self, plan: list[str]) -> None:
"""Demonstrate reaching the goal
Generates levels that fit a given plan.
"""
lvl_str = (
self.levelgen.generate_for_plan(plan) if self.levelgen is not None else None
)

self.agent.reset(level_string=lvl_str)
for action in plan:
send_input(self.agent.process, f"{action}. :|:")
gym_actions = self.agent.determine_actions(
{"executions": [{"operator": action, "arguments": []}]}
)
_, reward, done, info = self.agent.env.step(gym_actions[0])
self.agent.observe()

env_state = self.agent.env.get_state() # type: ignore
env_state["reward"] = reward

satisfied_goals = [g.satisfied(env_state, info) for g in self.goals]
for g, sat in zip(self.goals, satisfied_goals):
if sat:
print(f"{g.symbol} satisfied.")
send_input(self.agent.process, nal_now(g.symbol))
get_raw_output(self.agent.process)

if g.symbol == "GOT_KEY":
self.agent.has_key = True

self.agent.env.render(observer="global") # type: ignore
sleep(1)
if done:
self.demo_goal(plan)

0 comments on commit 6c5c9bf

Please sign in to comment.