From c96d7428c86bb227ce1323f27f2195985b7e3295 Mon Sep 17 00:00:00 2001
From: Adrian Borucki <ab@synthillect.ai>
Date: Tue, 16 Jul 2024 13:40:54 +0200
Subject: [PATCH] Add some documentation

---
 README.md                   | 16 +++++++
 experiments/experiment01.jl |  2 +-
 experiments/experiment02.jl |  3 +-
 src/NACE.jl                 | 95 +++++++++++++++++++++++++++++++++++--
 4 files changed, 110 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 22f8343..ca44c2a 100644
--- a/README.md
+++ b/README.md
@@ -17,3 +17,19 @@
         - [ ] `oldest_observed`
 - [ ] Remove old code (everything currently outside `NACE.jl`), preserving useful parts.
 - [ ] Lastly, refactor `NACE.jl` into multiple files to have a clean library structure.
+
+## Install
+I recommend using Poetry to make a Python virtual env.
+Spawn the env's shell and run the REPL with `julia --project`, before installing deps make sure
+to execute `ENV["PYTHON"] = Sys.which("python")` -- that will set PyCall to use your env.
+
+In the REPL invoke `] instantiate` to install Julia deps.
+
+## Run
+In the REPL, `using NACE` should be enough, some things maybe not exported (the API is not yet stable, needless
+to say) -- those you have to access under the packages namespace (`NACE`).
+
+There are experiment files that you can run with `julia --project `experiments/<exp name>.jl`.
+
+Under the hood, environments are provided by the Farama Foundation's Minigrid library, which
+relies on the Gymnasium package, also maintained by the Foundation.
diff --git a/experiments/experiment01.jl b/experiments/experiment01.jl
index 3554a11..1d288b3 100644
--- a/experiments/experiment01.jl
+++ b/experiments/experiment01.jl
@@ -2,4 +2,4 @@ using NACE
 env = NACE.gym.make("MiniGrid-LavaCrossingS11N5-v0", render_mode="human")
 obs, info = env.reset()
 
-runthis_random(env)
+run_example_random(env)
diff --git a/experiments/experiment02.jl b/experiments/experiment02.jl
index a1cc29c..83a8824 100644
--- a/experiments/experiment02.jl
+++ b/experiments/experiment02.jl
@@ -2,4 +2,5 @@ using NACE
 env = NACE.gym.make("MiniGrid-LavaCrossingS11N5-v0", render_mode="human")
 obs, info = env.reset()
 
-runthis(env)
+# TODO: move code from NACE.jl here.
+run_example(env)
diff --git a/src/NACE.jl b/src/NACE.jl
index 96d6e29..3f93b19 100644
--- a/src/NACE.jl
+++ b/src/NACE.jl
@@ -1,5 +1,5 @@
 module NACE
-export make_random_policy, runthis, runthis_random
+export make_random_policy, run_example, run_example_random
 
 using PyCall
 
@@ -8,13 +8,18 @@ function __init__()
     global miniwrap = pyimport("minigrid.wrappers")
 end
 
+# TODO: refactor into those files
 include("envs.jl")
 include("rule.jl")
 include("agent.jl")
 
+# example of running in the REPL
 # env = NACE.gym.make("MiniGrid-LavaCrossingS11N5-v0", render_mode="human");
 # obs, info = env.reset();
 
+"""
+Actions available in minigrid environments
+"""
 IDX_TO_ACTION = Dict(
     0 => "Turn left",
     1 => "Turn right",
@@ -26,6 +31,11 @@ IDX_TO_ACTION = Dict(
 )
 ACTION_TO_IDX = Dict(value => key for (key, value) ∈ IDX_TO_ACTION)
 
+"""
+    make_random_policy(env)
+
+Return a function that generates a random policy for the given environment.
+"""
 function make_random_policy(env)
     n = convert(Int, env.action_space.n)
 
@@ -36,7 +46,12 @@ function make_random_policy(env)
     random_policy
 end
 
-function runthis_random(env)
+"""
+    run_example_random(env)
+
+@deprecated Run a random policy on an environment.
+"""
+function run_example_random(env)
     obs, info = env.reset()
     agent = Agent(nothing, make_random_policy(env))
     for _ ∈ 1:10
@@ -47,6 +62,20 @@ function runthis_random(env)
     end
 end
 
+"""
+    NaceState(t, focus, perceived_externals, per_ext_ante, act_ante, rules)
+
+Agent state structure
+
+# Arguments
+
+  - `t` :: Int: Current time step.
+  - `focus` :: Set: Set of objects the agent is currently focused on.
+  - `perceived_externals` :: Dict: Perceived external state, including objects, walls, and agents.
+  - `per_ext_ante` :: Dict: Previous perceived external state from the previous time step.
+  - `act_ante` :: String: Action taken in the previous time step.
+  - `rules` :: Set: Set of rules that the agent is currently believes.
+"""
 struct NaceState
     t::Int
     focus::Set
@@ -56,8 +85,30 @@ struct NaceState
     rules::Set
 end
 
-empty_state() = NaceState(0, Set(), Dict(), Dict(), "", Set())
+"""
+    init_state()
+
+Create an empty state with time step zero.
+"""
+init_state() = NaceState(0, Set(), Dict(), Dict(), "", Set())
+
+"""
+    NaceAgent(state, policy, perceptor, effector)
+
+Non-Axiomatic Causal Explorer agent.
+
+Holds the top-level structure of the agent.
+This is what you need to instantiate in order to run NACE.
 
+# Arguments
+
+  - `state` :: NaceState: Current state of the agent.
+  - `policy` :: Function: Policy function that generates an action based on the current state.
+  - `perceptor` :: Function: Perceptor function that generates perceived external state based on the
+    received environment observation.
+  - `effector` :: Function: Effector function that takes an action as input and returns data usable
+    for executing the action via the environment's API.
+"""
 mutable struct NaceAgent
     state::NaceState
     policy::Function
@@ -65,6 +116,15 @@ mutable struct NaceAgent
     effector::Function
 end
 
+"""
+    (agent::NaceAgent)(obs)
+
+Run a step.
+
+Run the complete pipeline from perceiving from the environment observation to
+determining the next action to take.
+Returns the chosen action, does not have side-effects except for updating the agent's state.
+"""
 function (agent::NaceAgent)(obs)
     percept_state = agent.perceptor(obs)
     per_ext_ante =
@@ -81,6 +141,15 @@ function (agent::NaceAgent)(obs)
     agent.effector(agent.state.act_ante)
 end
 
+"""
+    nace_perceptor(obs)
+
+Run the perceptor.
+
+Run the perceptor -- the function that consumes an environment observation
+data structure and returns a representation usable within the agent's internal
+logic.
+"""
 function nace_perceptor(obs)
     objects = map(i -> IDX_TO_OBJECT[i], obs["image"][:, :, 1])
     Dict(
@@ -91,6 +160,11 @@ function nace_perceptor(obs)
     )
 end
 
+"""
+    nace_policy(state)
+
+Run the policy.
+"""
 function nace_policy(state)
     rules, action, focus = cycle(state)
     NaceState(
@@ -104,11 +178,24 @@ function nace_policy(state)
     # IDX_TO_ACTION[rand(0:3)],
 end
 
+"""
+    nace_effector(action)
+
+Run the effector.
+
+Run the effector -- the function that takes an action as input and returns its representation
+usable with the environment's API.
+"""
 function nace_effector(action)
     ACTION_TO_IDX[action]
 end
 
-function runthis(env)
+"""
+    run_example(env)
+
+Run an example on an environment.
+"""
+function run_example(env)
     obs, info = env.reset()
     agent = NaceAgent(
         NaceState(0, Set(), Dict(), Dict(), "Unused", Set()),