Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature(rjy): add crowdsim env and related configs #208

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 65 additions & 23 deletions lzero/worker/muzero_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,12 +571,28 @@ def collect(self,
self._env_info[env_id]['time'] += self._timer.value + interaction_duration
if timestep.done:
reward = timestep.info['eval_episode_return']
info = {
'reward': reward,
'time': self._env_info[env_id]['time'],
'step': self._env_info[env_id]['step'],
'visit_entropy': visit_entropies_lst[env_id] / eps_steps_lst[env_id],
}
if timestep.info.get('performance_info') is not None:
nighood marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

description增加这个PR的功能介绍,以及实验的benchmark结果

mean_aoi = timestep.info['performance_info']['mean_aoi']
mean_energy_consumption = timestep.info['performance_info']['mean_energy_consumption']
collected_data_amount = timestep.info['performance_info']['collected_data_amount']
human_coverage = timestep.info['performance_info']['human_coverage']
info = {
'reward': reward,
'time': self._env_info[env_id]['time'],
'step': self._env_info[env_id]['step'],
'visit_entropy': visit_entropies_lst[env_id] / eps_steps_lst[env_id],
'mean_aoi': mean_aoi,
'mean_energy_consumption': mean_energy_consumption,
'collected_data_amount': collected_data_amount,
'human_coverage': human_coverage,
}
else:
info = {
'reward': reward,
'time': self._env_info[env_id]['time'],
'step': self._env_info[env_id]['step'],
'visit_entropy': visit_entropies_lst[env_id] / eps_steps_lst[env_id],
}
if self.policy_config.gumbel_algo:
info['completed_value'] = completed_value_lst[env_id] / eps_steps_lst[env_id]
collected_episode += 1
Expand Down Expand Up @@ -711,23 +727,49 @@ def _output_log(self, train_iter: int) -> None:
if self.policy_config.gumbel_algo:
completed_value = [d['completed_value'] for d in self._episode_info]
self._total_duration += duration
info = {
'episode_count': episode_count,
'envstep_count': envstep_count,
'avg_envstep_per_episode': envstep_count / episode_count,
'avg_envstep_per_sec': envstep_count / duration,
'avg_episode_per_sec': episode_count / duration,
'collect_time': duration,
'reward_mean': np.mean(episode_reward),
'reward_std': np.std(episode_reward),
'reward_max': np.max(episode_reward),
'reward_min': np.min(episode_reward),
'total_envstep_count': self._total_envstep_count,
'total_episode_count': self._total_episode_count,
'total_duration': self._total_duration,
'visit_entropy': np.mean(visit_entropy),
# 'each_reward': episode_reward,
}
if self._episode_info[0].get('mean_aoi') is not None:
nighood marked this conversation as resolved.
Show resolved Hide resolved
episode_aoi = [d['mean_aoi'] for d in self._episode_info]
episode_energy_consumption = [d['mean_energy_consumption'] for d in self._episode_info]
episode_collected_data_amount = [d['collected_data_amount'] for d in self._episode_info]
episode_human_coverage = [d['human_coverage'] for d in self._episode_info]
info = {
'episode_count': episode_count,
'envstep_count': envstep_count,
'avg_envstep_per_episode': envstep_count / episode_count,
'avg_envstep_per_sec': envstep_count / duration,
'avg_episode_per_sec': episode_count / duration,
'collect_time': duration,
'reward_mean': np.mean(episode_reward),
'reward_std': np.std(episode_reward),
'reward_max': np.max(episode_reward),
'reward_min': np.min(episode_reward),
'total_envstep_count': self._total_envstep_count,
'total_episode_count': self._total_episode_count,
'total_duration': self._total_duration,
'visit_entropy': np.mean(visit_entropy),
'episode_mean_aoi': np.mean(episode_aoi),
'episode_mean_energy_consumption': np.mean(episode_energy_consumption),
'episode_mean_collected_data_amount': np.mean(episode_collected_data_amount),
'episode_mean_human_coverage': np.mean(episode_human_coverage),
}
else:
info = {
'episode_count': episode_count,
'envstep_count': envstep_count,
'avg_envstep_per_episode': envstep_count / episode_count,
'avg_envstep_per_sec': envstep_count / duration,
'avg_episode_per_sec': episode_count / duration,
'collect_time': duration,
'reward_mean': np.mean(episode_reward),
'reward_std': np.std(episode_reward),
'reward_max': np.max(episode_reward),
'reward_min': np.min(episode_reward),
'total_envstep_count': self._total_envstep_count,
'total_episode_count': self._total_episode_count,
'total_duration': self._total_duration,
'visit_entropy': np.mean(visit_entropy),
# 'each_reward': episode_reward,
}
if self.policy_config.gumbel_algo:
info['completed_value'] = np.mean(completed_value)
self._episode_info.clear()
Expand Down
29 changes: 15 additions & 14 deletions lzero/worker/muzero_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,21 +337,22 @@ def eval(
action_mask_dict[env_id] = to_ndarray(obs['action_mask'])
to_play_dict[env_id] = to_ndarray(obs['to_play'])

dones[env_id] = done
if t.done:
# Env reset is done by env_manager automatically.
self._policy.reset([env_id])
reward = t.info['eval_episode_return']
saved_info = {'eval_episode_return': t.info['eval_episode_return']}
if 'episode_info' in t.info:
saved_info.update(t.info['episode_info'])
eval_monitor.update_info(env_id, saved_info)
eval_monitor.update_reward(env_id, reward)
self._logger.info(
"[EVALUATOR]env {} finish episode, final reward: {}, current episode: {}".format(
env_id, eval_monitor.get_latest_reward(env_id), eval_monitor.get_current_episode()
)
dones[env_id] = done
if t.done:
# Env reset is done by env_manager automatically.
self._policy.reset([env_id])
reward = t.info['eval_episode_return']
# 'performance_info' and 'episode_info' only choose one
if 'performance_info' in t.info:
nighood marked this conversation as resolved.
Show resolved Hide resolved
eval_monitor.update_info(env_id, t.info['performance_info'])
elif 'episode_info' in t.info:
eval_monitor.update_info(env_id, t.info['episode_info'])
eval_monitor.update_reward(env_id, reward)
self._logger.info(
"[EVALUATOR]env {} finish episode, final reward: {}, current episode: {}".format(
env_id, eval_monitor.get_latest_reward(env_id), eval_monitor.get_current_episode()
)
)

# reset the finished env and init game_segments
if n_episode > self._env_num:
Expand Down
Empty file added zoo/CrowdSim/__init__.py
Empty file.
103 changes: 103 additions & 0 deletions zoo/CrowdSim/config/CrowdSim_efficientzero_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from easydict import EasyDict
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '2'
nighood marked this conversation as resolved.
Show resolved Hide resolved

# ==============================================================
# begin of the most frequently changed config specified by the user
# ==============================================================
collector_env_num = 8
n_episode = 8
evaluator_env_num = 3
num_simulations = 25
update_per_collect = 100
batch_size = 256
max_env_step = int(3e5)
reanalyze_ratio = 0.
robot_num = 2
human_num = 10 # purdue
nighood marked this conversation as resolved.
Show resolved Hide resolved
# human_num = 33 # NCSU
# human_num = 92 # KAIST
one_uav_action_space = [[0, 0], [30, 0], [-30, 0], [0, 30], [0, -30]]
# ==============================================================
# end of the most frequently changed config specified by the user
# ==============================================================

CrowdSim_efficientzero_config = dict(
exp_name=
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

code需要reformate一下

f'result/crowd_num_human/CrowdSim_efficientzero_step{max_env_step}_uav{robot_num}_human{human_num}_upc{update_per_collect}_rr{reanalyze_ratio}_seed0',
env=dict(
env_name='CrowdSim-v0',
robot_num = robot_num,
human_num = human_num,
one_uav_action_space = one_uav_action_space,
continuous=False,
manually_discretization=False,
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
n_evaluator_episode=evaluator_env_num,
manager=dict(shared_memory=False, ),
),
policy=dict(
model=dict(
observation_shape=(robot_num+human_num)*4,
action_space_size=(len(one_uav_action_space))**robot_num,
model_type='mlp',
lstm_hidden_size=256,
latent_state_dim=256,
discrete_action_encoding_type='one_hot',
# res_connection_in_dynamics=True,
norm_type='BN',
),
cuda=True,
env_type='not_board_games',
game_segment_length=200,
update_per_collect=update_per_collect,
batch_size=batch_size,
optim_type='Adam',
lr_piecewise_constant_decay=False,
learning_rate=0.003,
num_simulations=num_simulations,
reanalyze_ratio=reanalyze_ratio,
n_episode=n_episode,
eval_freq=int(1e3),
replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions.
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
),
)

CrowdSim_efficientzero_config = EasyDict(CrowdSim_efficientzero_config)
main_config = CrowdSim_efficientzero_config

CrowdSim_efficientzero_create_config = dict(
env=dict(
type='crowdsim_lightzero',
import_names=['zoo.CrowdSim.envs.CrowdSim_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='efficientzero',
import_names=['lzero.policy.efficientzero'],
),
collector=dict(
type='episode_muzero',
import_names=['lzero.worker.muzero_collector'],
)
)
CrowdSim_efficientzero_create_config = EasyDict(CrowdSim_efficientzero_create_config)
create_config = CrowdSim_efficientzero_create_config

if __name__ == "__main__":
# Users can use different train entry by specifying the entry_type.
entry_type = "train_muzero" # options={"train_muzero", "train_muzero_with_gym_env"}

if entry_type == "train_muzero":
from lzero.entry import train_muzero
elif entry_type == "train_muzero_with_gym_env":
"""
The ``train_muzero_with_gym_env`` entry means that the environment used in the training process is generated by wrapping the original gym environment with LightZeroEnvWrapper.
Users can refer to lzero/envs/wrappers for more details.
"""
from lzero.entry import train_muzero_with_gym_env as train_muzero

train_muzero([main_config, create_config], seed=0, max_env_step=max_env_step)
nighood marked this conversation as resolved.
Show resolved Hide resolved
105 changes: 105 additions & 0 deletions zoo/CrowdSim/config/CrowdSim_muzero_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from easydict import EasyDict
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '2'
# ==============================================================
# begin of the most frequently changed config specified by the user
# ==============================================================
collector_env_num = 8
n_episode = 8
evaluator_env_num = 3
num_simulations = 25
update_per_collect = 100
batch_size = 256
max_env_step = int(3e5)
reanalyze_ratio = 0.
robot_num = 2
human_num = 10 # purdue
# human_num = 33 # NCSU
# human_num = 92 # KAIST
one_uav_action_space = [[0, 0], [30, 0], [-30, 0], [0, 30], [0, -30]]
# ==============================================================
# end of the most frequently changed config specified by the user
# ==============================================================

CrowdSim_muzero_config = dict(
exp_name=
f'result/crowd_num_human/CrowdSim_muzero_ssl_step{max_env_step}_uav{robot_num}__human{human_num}_upc{update_per_collect}_rr{reanalyze_ratio}_seed0',
env=dict(
env_name='CrowdSim-v0',
robot_num = robot_num,
human_num = human_num,
one_uav_action_space = one_uav_action_space,
continuous=False,
manually_discretization=False,
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
n_evaluator_episode=evaluator_env_num,
manager=dict(shared_memory=False, ),
),
policy=dict(
model=dict(
observation_shape=(robot_num+human_num)*4,
action_space_size=(len(one_uav_action_space))**robot_num,
model_type='mlp',
lstm_hidden_size=256,
latent_state_dim=256,
self_supervised_learning_loss=True, # NOTE: default is False.
discrete_action_encoding_type='one_hot',
res_connection_in_dynamics=True,
norm_type='BN',
),
cuda=True,
env_type='not_board_games',
game_segment_length=200,
update_per_collect=update_per_collect,
batch_size=batch_size,
optim_type='Adam',
lr_piecewise_constant_decay=False,
learning_rate=0.003,
ssl_loss_weight=2, # NOTE: default is 0.
grad_clip_value=0.5,
num_simulations=num_simulations,
reanalyze_ratio=reanalyze_ratio,
n_episode=n_episode,
eval_freq=int(1e3),
replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions.
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
),
)

CrowdSim_muzero_config = EasyDict(CrowdSim_muzero_config)
main_config = CrowdSim_muzero_config

CrowdSim_muzero_create_config = dict(
env=dict(
type='crowdsim_lightzero',
import_names=['zoo.CrowdSim.envs.CrowdSim_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='muzero',
import_names=['lzero.policy.muzero'],
),
collector=dict(
type='episode_muzero',
import_names=['lzero.worker.muzero_collector'],
)
)
CrowdSim_muzero_create_config = EasyDict(CrowdSim_muzero_create_config)
create_config = CrowdSim_muzero_create_config

if __name__ == "__main__":
# Users can use different train entry by specifying the entry_type.
entry_type = "train_muzero" # options={"train_muzero", "train_muzero_with_gym_env"}

if entry_type == "train_muzero":
from lzero.entry import train_muzero
elif entry_type == "train_muzero_with_gym_env":
"""
The ``train_muzero_with_gym_env`` entry means that the environment used in the training process is generated by wrapping the original gym environment with LightZeroEnvWrapper.
Users can refer to lzero/envs/wrappers for more details.
"""
from lzero.entry import train_muzero_with_gym_env as train_muzero

train_muzero([main_config, create_config], seed=0, max_env_step=max_env_step)
Empty file added zoo/CrowdSim/config/__init__.py
Empty file.
Loading