Skip to content

Commit

Permalink
sampling prioritized replay
Browse files Browse the repository at this point in the history
  • Loading branch information
AOS55 committed Oct 31, 2022
1 parent ef8983f commit c07c727
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 24 deletions.
3 changes: 1 addition & 2 deletions agents/unsupervised_learning/smm.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,7 @@ def update(self, replay_iter, step):

if self.obs_type=='pixels':
# p^*(s) is ignored, as state space dimension is inaccessible from pixel input
intr_reward = pred_log_ratios + self.latent_ent_coef * h_z + self.latent_cond_ent_coef * h_z_s.detach(
)
intr_reward = pred_log_ratios + self.latent_ent_coef * h_z + self.latent_cond_ent_coef * h_z_s.detach()
reward = intr_reward
else:
# p^*(s) is based on the goal hitting time
Expand Down
39 changes: 20 additions & 19 deletions prioritized_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,24 @@ def replay_iter(self):
if self._replay_iter is None:
self._replay_iter = iter(self.replay_loader)
return self._replay_iter


def sample(self):
sample_until_step = utils.Until(self.cfg.num_sample_episodes)
prioritize_sample_until_step = utils.Until(self.cfg.num_prioritize_sample_episodes)
seed_until_step = utils.Until(self.cfg.num_seed_frames, self.cfg.action_repeat)

# random start samples
random_start_path = self.generate_samples(self.random_sample_env, sample_until_step=sample_until_step, seed_until_step=seed_until_step, sampling_name='random_sample')
constraint_path = self.make_constraint_dir(random_start_path, 'constraints') # make constraint dir
os.makedirs(os.path.join(self.work_dir, 'buffer'))
start_path = self.generate_samples(self.sample_env, sample_until_step=sample_until_step, seed_until_step=seed_until_step, sampling_name='sample')
norm_skill_reward = np.array(self.skill_reward_sum(start_path))
print(f'normalized_skill_reward: {norm_skill_reward}')
reward_skill_set = np.where(norm_skill_reward > -0.985)[0]
os.makedirs(os.path.join(self.work_dir, 'buffer'))
reward_path = self.generate_samples(self.sample_env, sample_until_step=prioritize_sample_until_step, seed_until_step=seed_until_step, sampling_name='rewards', skill_set=reward_skill_set)
self.make_training_set(reward_path, constraint_path)

def generate_samples(self, env, sample_until_step, seed_until_step, sampling_name=None, skill_set=None):
# Sample based on input and mode

Expand All @@ -144,7 +161,7 @@ def generate_samples(self, env, sample_until_step, seed_until_step, sampling_nam
skill = np.zeros(self.cfg.skill_dim, dtype=np.float32)
skill[np.random.choice(skill_set)] = 1.0
meta = OrderedDict()
meta['skill'] = skill
meta[self.skill_key] = skill
else:
meta = self.agent.init_meta()

Expand Down Expand Up @@ -200,22 +217,6 @@ def generate_samples(self, env, sample_until_step, seed_until_step, sampling_nam

return source_path

def sample(self):
sample_until_step = utils.Until(self.cfg.num_sample_episodes)
prioritize_sample_until_step = utils.Until(self.cfg.num_prioritize_sample_episodes)
seed_until_step = utils.Until(self.cfg.num_seed_frames, self.cfg.action_repeat)

# random start samples
random_start_path = self.generate_samples(self.random_sample_env, sample_until_step=sample_until_step, seed_until_step=seed_until_step, sampling_name='random_sample')
constraint_path = self.make_constraint_dir(random_start_path, 'constraints') # make constraint dir
os.makedirs(os.path.join(self.work_dir, 'buffer'))
start_path = self.generate_samples(self.sample_env, sample_until_step=sample_until_step, seed_until_step=seed_until_step, sampling_name='sample')
norm_skill_reward = np.array(self.skill_reward_sum(start_path))
reward_skill_set = np.where(norm_skill_reward > -100.0)[0]
os.makedirs(os.path.join(self.work_dir, 'buffer'))
reward_path = self.generate_samples(self.sample_env, sample_until_step=prioritize_sample_until_step, seed_until_step=seed_until_step, sampling_name='rewards', skill_set=reward_skill_set)
self.make_training_set(reward_path, constraint_path)

def make_training_set(self, reward_source_path, constraint_source_path, target_dir='mpc_train'):
idfile = 0
target_path = os.path.join(self.work_dir, target_dir)
Expand Down Expand Up @@ -267,7 +268,7 @@ def skill_reward_sum(self, source_path):
ep = np.load(path)
skill = np.where(ep[self.skill_key][0] == 1)
reward = np.sum(ep['reward'])
skill_sum[skill[0][0]] += reward
skill_sum[skill[0][0]] += reward/len(ep['reward'])
skill_count[skill[0][0]] += 1

def _divide(sum, count):
Expand Down
5 changes: 2 additions & 3 deletions test_obstacle.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from libraries.safe.dmc import ant
from libraries.safe.dmc import ant_obstacle

import os
import numpy as np
Expand All @@ -16,14 +16,13 @@ def cameras(env):

def main():

env = ant.make(task='navigate')
env = ant_obstacle.make(task='navigate')

out = env.reset()
while out.step_type != 2:
action = np.random.random(8)
action = np.zeros(8)
out = env.step(action)
print(out)
pixel_image = cameras(env)
pixel_image.save("obstacle_image.jpg")

Expand Down

0 comments on commit c07c727

Please sign in to comment.