From d3f3948ce71dab24ca8d39cb9680737c0045da01 Mon Sep 17 00:00:00 2001
From: Jackory <1579823519@qq.com>
Date: Fri, 1 Jul 2022 16:52:14 +0800
Subject: [PATCH] update artillery

---
 envs/JSBSim/tasks/singlecombat_task.py | 21 +++++++++++----------
 render_2v2_indepent.py                 |  3 ++-
 scripts/train_selfplay.sh              |  8 ++++----
 scripts/train_share_selfplay.sh        |  8 ++++----
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/envs/JSBSim/tasks/singlecombat_task.py b/envs/JSBSim/tasks/singlecombat_task.py
index a7fa210..6fede55 100755
--- a/envs/JSBSim/tasks/singlecombat_task.py
+++ b/envs/JSBSim/tasks/singlecombat_task.py
@@ -15,7 +15,7 @@ class SingleCombatTask(BaseTask):
     def __init__(self, config):
         super().__init__(config)
         self.use_baseline = getattr(self.config, 'use_baseline', False)
-        self.use_artllery = getattr(self.config, 'use_artillery', False)
+        self.use_artillery = getattr(self.config, 'use_artillery', False)
         if self.use_baseline:
             self.baseline_agent = self.load_agent(self.config.baseline_type)
 
@@ -155,19 +155,19 @@ def reset(self, env):
 
     def step(self, env):
         def _orientation_fn(AO):
-            if AO >= 0 and AO <= 1.0472:  # [0, pi/3]
-                return 1 - AO / 1.0472
-            elif AO >= -1.0472 and AO <= 0: # [-pi/3, 0]
-                return 1 + AO / 1.0472
+            if AO >= 0 and AO <= 0.5236:  # [0, pi/6]
+                return 1 - AO / 0.5236
+            elif AO >= -0.5236 and AO <= 0: # [-pi/6, 0]
+                return 1 + AO / 0.5236
             return 0
         def _distance_fn(R):
-            if R <=3: # [0, 3km]
+            if R <=1: # [0, 1km]
                 return 1
-            elif R > 3 and R <= 10: # [3km, 10km]
-                return (10 - R) / 7.
+            elif R > 1 and R <= 3: # [1km, 3km]
+                return (3 - R) / 2.
             else:
                 return 0
-        if self.use_artllery:
+        if self.use_artillery:
             for agent_id in env.agents.keys():
                 ego_feature = np.hstack([env.agents[agent_id].get_position(),
                                         env.agents[agent_id].get_velocity()])
@@ -176,8 +176,9 @@ def _distance_fn(R):
                         enm_feature = np.hstack([enm.get_position(),
                                                 enm.get_velocity()])
                         AO, _, R = get_AO_TA_R(ego_feature, enm_feature)
-
                         enm.bloods -= _orientation_fn(AO) * _distance_fn(R/1000)
+                        # if agent_id == 'A0100' and enm.uid == 'B0100':
+                        #     print(f"AO: {AO * 180 / np.pi}, {_orientation_fn(AO)}, dis:{R/1000}, {_distance_fn(R/1000)}")
 
     def get_reward(self, env, agent_id, info=...):
         if self._agent_die_flag.get(agent_id, False):
diff --git a/render_2v2_indepent.py b/render_2v2_indepent.py
index 6445dcc..50bf596 100644
--- a/render_2v2_indepent.py
+++ b/render_2v2_indepent.py
@@ -4,6 +4,7 @@
 from envs.env_wrappers import SubprocVecEnv, DummyVecEnv
 from envs.JSBSim.core.catalog import Catalog as c
 from algorithms.ppo.ppo_actor import PPOActor
+from gym import spaces
 import time
 import logging
 logging.basicConfig(level=logging.DEBUG)
@@ -49,7 +50,7 @@ def convert(obs):
 args = Args()
 
 ego_policy = PPOActor(args, env.observation_space, env.action_space, device=torch.device("cuda"))
-enm_policy = PPOActor(args, SingleCombatEnv("1v1/NoWeapon/HierarchySelfplay").observation_space, env.action_space, device=torch.device("cuda"))
+enm_policy = PPOActor(args, spaces.Box(low=-10, high=10., shape=(15,)), env.action_space, device=torch.device("cuda"))
 ego_policy.eval()
 enm_policy.eval()
 ego_policy.load_state_dict(torch.load(ego_run_dir + f"/actor_{ego_policy_index}.pt"))
diff --git a/scripts/train_selfplay.sh b/scripts/train_selfplay.sh
index 9a82d3b..35858f1 100644
--- a/scripts/train_selfplay.sh
+++ b/scripts/train_selfplay.sh
@@ -1,14 +1,14 @@
 #!/bin/sh
-env="MultipleCombat"
-scenario="2v2/NoWeapon/HierarchySelfplay"
+env="SingleCombat"
+scenario="1v1/NoWeapon/HierarchySelfplay"
 algo="ppo"
-exp="artillery_check"
+exp="artillery_update"
 seed=1
 
 echo "env is ${env}, scenario is ${scenario}, algo is ${algo}, exp is ${exp}, seed is ${seed}"
 CUDA_VISIBLE_DEVICES=1 python train/train_jsbsim.py \
     --env-name ${env} --algorithm-name ${algo} --scenario-name ${scenario} --experiment-name ${exp} \
-    --seed ${seed} --n-training-threads 1 --n-rollout-threads 64 --cuda --log-interval 1 --save-interval 1 \
+    --seed ${seed} --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 1 \
     --use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 4 \
     --use-eval --n-eval-rollout-threads 1 --eval-interval 1 --eval-episodes 4 \
     --num-mini-batch 5 --buffer-size 3000 --num-env-steps 1e8 \
diff --git a/scripts/train_share_selfplay.sh b/scripts/train_share_selfplay.sh
index c8ad7dc..ad76ef5 100644
--- a/scripts/train_share_selfplay.sh
+++ b/scripts/train_share_selfplay.sh
@@ -2,16 +2,16 @@
 env="MultipleCombat"
 scenario="2v2/NoWeapon/HierarchySelfplay"
 algo="mappo"
-exp="cooperate"
+exp="artillery_update"
 seed=0
 
 echo "env is ${env}, scenario is ${scenario}, algo is ${algo}, exp is ${exp}, seed is ${seed}"
 CUDA_VISIBLE_DEVICES=0 python train/train_jsbsim.py \
     --env-name ${env} --algorithm-name ${algo} --scenario-name ${scenario} --experiment-name ${exp} \
-    --seed 1 --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 10 \
+    --seed 1 --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 1 \
     --num-mini-batch 5 --buffer-size 3000 --num-env-steps 1e8 \
     --lr 3e-4 --gamma 0.99 --ppo-epoch 4 --clip-params 0.2 --max-grad-norm 2 --entropy-coef 1e-3 \
     --hidden-size "128 128" --act-hidden-size "128 128" --recurrent-hidden-size 128 --recurrent-hidden-layers 1 --data-chunk-length 8 \
-    --use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 1 \
-    --use-eval --n-eval-rollout-threads 1 --eval-interval 20 --eval-episodes 2 \
+    --use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 4 \
+    --use-eval --n-eval-rollout-threads 1 --eval-interval 1 --eval-episodes 4 \
     --user-name "jyh" --use-wandb --wandb-name "jyh" \