using simple_velocity_bot

AOS55 · Oct 26, 2022 · a29bf56 · a29bf56
1 parent a19aab8
commit a29bf56
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 12 deletions.
diff --git a/configs/mpc.yaml b/configs/mpc.yaml
@@ -4,8 +4,8 @@ defaults:
 
 
 # Task Settings
-task: SimplePointBot_goal
-env: SimplePointBot
+task: SimpleVelocityBot_goal
+env: SimpleVelocityBot
 obs_type: states  # [states, pixels]
 frame_stack: 1
 action_repeat: 1
@@ -97,7 +97,7 @@ constr_hidden_size: 200
 constr_lr: 1e-4
 
 # Replay Buffer
-data_dirs: datasets/states/SimplePointBot/controller/prioritized_sampling_1000_1200_50000
+data_dirs: datasets/states/SimpleVelocityBot/controller/prioritized_sampling_1200
 data_counts: 600
 buffer_size: 35000
 

diff --git a/libraries/safe/simple_velocity_bot.py b/libraries/safe/simple_velocity_bot.py
@@ -36,14 +36,16 @@ def __init__(self, from_pixels=True,
                  horizon=150,
                  constr_penalty=-100,
                  goal_thresh=10.0,
-                 noise_scale=0.125):
+                 noise_scale=0.125,
+                 random_reset=False):
         utils.EzPickle.__init__(self)
         self.done = self.state = None
         self.horizon = horizon
         self.start_pos = start_pos
         self.end_pos = end_pos
         self.goal_thresh = goal_thresh
         self.noise_scale = noise_scale
+        self.random_reset = random_reset
         self.constr_penalty = constr_penalty
         self.action_space = Box(-np.ones(2) * MAX_ACCEL,
                                 np.ones(2) * MAX_ACCEL)
@@ -74,18 +76,19 @@ def step(self, a):
         if self._from_pixels:
             obs = self._state_to_image(self.state)
         else:
-            obs = self.state
+            obs = self.state.astype(dtype=np.float32)
         return obs, cur_reward, self.done, {
             "constraint": constr,
             "reward": cur_reward,
             "state": old_state,
             "next_state": next_state,
-            "action": a
+            "action": a, 
+            "done": self.done
         }
 
     def reset(self, random_start=False):
         self.state = np.ones(4)  # Preallocate with 4 values
-        if random_start:
+        if random_start or self.random_reset:
             self.state[:2] = np.random.random(2) * (WINDOW_WIDTH, WINDOW_HEIGHT)
             self.state[2:] = np.random.uniform(low=-MAX_START_VEL, high=MAX_START_VEL, size=2)
             if self.obstacle(self.state[:2]):
@@ -98,11 +101,11 @@ def reset(self, random_start=False):
         if self._from_pixels:
             obs = self._state_to_image(self.state)
         else:
-            obs = self.state
+            obs = self.state.astype(dtype=np.float32)
         return obs
 
     def render(self, mode='human'):
-        return self._draw_state(self.state)
+        return self._draw_state(self.state[:2])
 
     def _draw_state(self, state):
         BCKGRND_COLOR = (0, 0, 0)

diff --git a/utils/env_constructor.py b/utils/env_constructor.py
@@ -9,7 +9,7 @@
 from dm_env import StepType, specs
 
 import libraries.dmc as cdmc
-from libraries.safe import SimplePointBot as SPB
+from libraries.safe import SimplePointBot, SimpleVelocityBot
 from .wrappers import GymWrapper
 from .wrappers import FrameStack
 
@@ -25,6 +25,11 @@
     'SimpleVelocityBot': 'safe'
 }
 
+SAFE_ENVS = {
+    'SimplePointBot': SimplePointBot,
+    'SimpleVelocityBot': SimpleVelocityBot
+}
+
 class ExtendedTimeStep(NamedTuple):
     step_type: Any
     reward: Any
@@ -323,11 +328,11 @@ def _make_gym(obs_type, domain, task, frame_stack, action_repeat, seed, random_s
 def _make_custom(obs_type, domain, task, frame_stack, action_repeat, seed, random_start=False):
     if obs_type == 'states':
         from_pixels = False
-        env = SPB(from_pixels=from_pixels, random_reset=random_start)
+        env = SAFE_ENVS[domain](from_pixels=from_pixels, random_reset=random_start)
         env = GymWrapper(env)
     else:
         from_pixels = True
-        env = SPB(from_pixels=from_pixels, random_reset=random_start)
+        env = SAFE_ENVS[domain](from_pixels=from_pixels, random_reset=random_start)
         env = FrameStack(env, num_stack=frame_stack)
         env = GymWrapper(env)
     env = ActionDTypeWrapper(env, np.float32)