From d3f3948ce71dab24ca8d39cb9680737c0045da01 Mon Sep 17 00:00:00 2001 From: Jackory <1579823519@qq.com> Date: Fri, 1 Jul 2022 16:52:14 +0800 Subject: [PATCH] update artillery --- envs/JSBSim/tasks/singlecombat_task.py | 21 +++++++++++---------- render_2v2_indepent.py | 3 ++- scripts/train_selfplay.sh | 8 ++++---- scripts/train_share_selfplay.sh | 8 ++++---- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/envs/JSBSim/tasks/singlecombat_task.py b/envs/JSBSim/tasks/singlecombat_task.py index a7fa210..6fede55 100755 --- a/envs/JSBSim/tasks/singlecombat_task.py +++ b/envs/JSBSim/tasks/singlecombat_task.py @@ -15,7 +15,7 @@ class SingleCombatTask(BaseTask): def __init__(self, config): super().__init__(config) self.use_baseline = getattr(self.config, 'use_baseline', False) - self.use_artllery = getattr(self.config, 'use_artillery', False) + self.use_artillery = getattr(self.config, 'use_artillery', False) if self.use_baseline: self.baseline_agent = self.load_agent(self.config.baseline_type) @@ -155,19 +155,19 @@ def reset(self, env): def step(self, env): def _orientation_fn(AO): - if AO >= 0 and AO <= 1.0472: # [0, pi/3] - return 1 - AO / 1.0472 - elif AO >= -1.0472 and AO <= 0: # [-pi/3, 0] - return 1 + AO / 1.0472 + if AO >= 0 and AO <= 0.5236: # [0, pi/6] + return 1 - AO / 0.5236 + elif AO >= -0.5236 and AO <= 0: # [-pi/6, 0] + return 1 + AO / 0.5236 return 0 def _distance_fn(R): - if R <=3: # [0, 3km] + if R <=1: # [0, 1km] return 1 - elif R > 3 and R <= 10: # [3km, 10km] - return (10 - R) / 7. + elif R > 1 and R <= 3: # [1km, 3km] + return (3 - R) / 2. else: return 0 - if self.use_artllery: + if self.use_artillery: for agent_id in env.agents.keys(): ego_feature = np.hstack([env.agents[agent_id].get_position(), env.agents[agent_id].get_velocity()]) @@ -176,8 +176,9 @@ def _distance_fn(R): enm_feature = np.hstack([enm.get_position(), enm.get_velocity()]) AO, _, R = get_AO_TA_R(ego_feature, enm_feature) - enm.bloods -= _orientation_fn(AO) * _distance_fn(R/1000) + # if agent_id == 'A0100' and enm.uid == 'B0100': + # print(f"AO: {AO * 180 / np.pi}, {_orientation_fn(AO)}, dis:{R/1000}, {_distance_fn(R/1000)}") def get_reward(self, env, agent_id, info=...): if self._agent_die_flag.get(agent_id, False): diff --git a/render_2v2_indepent.py b/render_2v2_indepent.py index 6445dcc..50bf596 100644 --- a/render_2v2_indepent.py +++ b/render_2v2_indepent.py @@ -4,6 +4,7 @@ from envs.env_wrappers import SubprocVecEnv, DummyVecEnv from envs.JSBSim.core.catalog import Catalog as c from algorithms.ppo.ppo_actor import PPOActor +from gym import spaces import time import logging logging.basicConfig(level=logging.DEBUG) @@ -49,7 +50,7 @@ def convert(obs): args = Args() ego_policy = PPOActor(args, env.observation_space, env.action_space, device=torch.device("cuda")) -enm_policy = PPOActor(args, SingleCombatEnv("1v1/NoWeapon/HierarchySelfplay").observation_space, env.action_space, device=torch.device("cuda")) +enm_policy = PPOActor(args, spaces.Box(low=-10, high=10., shape=(15,)), env.action_space, device=torch.device("cuda")) ego_policy.eval() enm_policy.eval() ego_policy.load_state_dict(torch.load(ego_run_dir + f"/actor_{ego_policy_index}.pt")) diff --git a/scripts/train_selfplay.sh b/scripts/train_selfplay.sh index 9a82d3b..35858f1 100644 --- a/scripts/train_selfplay.sh +++ b/scripts/train_selfplay.sh @@ -1,14 +1,14 @@ #!/bin/sh -env="MultipleCombat" -scenario="2v2/NoWeapon/HierarchySelfplay" +env="SingleCombat" +scenario="1v1/NoWeapon/HierarchySelfplay" algo="ppo" -exp="artillery_check" +exp="artillery_update" seed=1 echo "env is ${env}, scenario is ${scenario}, algo is ${algo}, exp is ${exp}, seed is ${seed}" CUDA_VISIBLE_DEVICES=1 python train/train_jsbsim.py \ --env-name ${env} --algorithm-name ${algo} --scenario-name ${scenario} --experiment-name ${exp} \ - --seed ${seed} --n-training-threads 1 --n-rollout-threads 64 --cuda --log-interval 1 --save-interval 1 \ + --seed ${seed} --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 1 \ --use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 4 \ --use-eval --n-eval-rollout-threads 1 --eval-interval 1 --eval-episodes 4 \ --num-mini-batch 5 --buffer-size 3000 --num-env-steps 1e8 \ diff --git a/scripts/train_share_selfplay.sh b/scripts/train_share_selfplay.sh index c8ad7dc..ad76ef5 100644 --- a/scripts/train_share_selfplay.sh +++ b/scripts/train_share_selfplay.sh @@ -2,16 +2,16 @@ env="MultipleCombat" scenario="2v2/NoWeapon/HierarchySelfplay" algo="mappo" -exp="cooperate" +exp="artillery_update" seed=0 echo "env is ${env}, scenario is ${scenario}, algo is ${algo}, exp is ${exp}, seed is ${seed}" CUDA_VISIBLE_DEVICES=0 python train/train_jsbsim.py \ --env-name ${env} --algorithm-name ${algo} --scenario-name ${scenario} --experiment-name ${exp} \ - --seed 1 --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 10 \ + --seed 1 --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 1 \ --num-mini-batch 5 --buffer-size 3000 --num-env-steps 1e8 \ --lr 3e-4 --gamma 0.99 --ppo-epoch 4 --clip-params 0.2 --max-grad-norm 2 --entropy-coef 1e-3 \ --hidden-size "128 128" --act-hidden-size "128 128" --recurrent-hidden-size 128 --recurrent-hidden-layers 1 --data-chunk-length 8 \ - --use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 1 \ - --use-eval --n-eval-rollout-threads 1 --eval-interval 20 --eval-episodes 2 \ + --use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 4 \ + --use-eval --n-eval-rollout-threads 1 --eval-interval 1 --eval-episodes 4 \ --user-name "jyh" --use-wandb --wandb-name "jyh" \