Skip to content

Commit

Permalink
update artillery
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackory committed Jul 5, 2022
1 parent 351cca4 commit d3f3948
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 19 deletions.
21 changes: 11 additions & 10 deletions envs/JSBSim/tasks/singlecombat_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class SingleCombatTask(BaseTask):
def __init__(self, config):
super().__init__(config)
self.use_baseline = getattr(self.config, 'use_baseline', False)
self.use_artllery = getattr(self.config, 'use_artillery', False)
self.use_artillery = getattr(self.config, 'use_artillery', False)
if self.use_baseline:
self.baseline_agent = self.load_agent(self.config.baseline_type)

Expand Down Expand Up @@ -155,19 +155,19 @@ def reset(self, env):

def step(self, env):
def _orientation_fn(AO):
if AO >= 0 and AO <= 1.0472: # [0, pi/3]
return 1 - AO / 1.0472
elif AO >= -1.0472 and AO <= 0: # [-pi/3, 0]
return 1 + AO / 1.0472
if AO >= 0 and AO <= 0.5236: # [0, pi/6]
return 1 - AO / 0.5236
elif AO >= -0.5236 and AO <= 0: # [-pi/6, 0]
return 1 + AO / 0.5236
return 0
def _distance_fn(R):
if R <=3: # [0, 3km]
if R <=1: # [0, 1km]
return 1
elif R > 3 and R <= 10: # [3km, 10km]
return (10 - R) / 7.
elif R > 1 and R <= 3: # [1km, 3km]
return (3 - R) / 2.
else:
return 0
if self.use_artllery:
if self.use_artillery:
for agent_id in env.agents.keys():
ego_feature = np.hstack([env.agents[agent_id].get_position(),
env.agents[agent_id].get_velocity()])
Expand All @@ -176,8 +176,9 @@ def _distance_fn(R):
enm_feature = np.hstack([enm.get_position(),
enm.get_velocity()])
AO, _, R = get_AO_TA_R(ego_feature, enm_feature)

enm.bloods -= _orientation_fn(AO) * _distance_fn(R/1000)
# if agent_id == 'A0100' and enm.uid == 'B0100':
# print(f"AO: {AO * 180 / np.pi}, {_orientation_fn(AO)}, dis:{R/1000}, {_distance_fn(R/1000)}")

def get_reward(self, env, agent_id, info=...):
if self._agent_die_flag.get(agent_id, False):
Expand Down
3 changes: 2 additions & 1 deletion render_2v2_indepent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from envs.env_wrappers import SubprocVecEnv, DummyVecEnv
from envs.JSBSim.core.catalog import Catalog as c
from algorithms.ppo.ppo_actor import PPOActor
from gym import spaces
import time
import logging
logging.basicConfig(level=logging.DEBUG)
Expand Down Expand Up @@ -49,7 +50,7 @@ def convert(obs):
args = Args()

ego_policy = PPOActor(args, env.observation_space, env.action_space, device=torch.device("cuda"))
enm_policy = PPOActor(args, SingleCombatEnv("1v1/NoWeapon/HierarchySelfplay").observation_space, env.action_space, device=torch.device("cuda"))
enm_policy = PPOActor(args, spaces.Box(low=-10, high=10., shape=(15,)), env.action_space, device=torch.device("cuda"))
ego_policy.eval()
enm_policy.eval()
ego_policy.load_state_dict(torch.load(ego_run_dir + f"/actor_{ego_policy_index}.pt"))
Expand Down
8 changes: 4 additions & 4 deletions scripts/train_selfplay.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/bin/sh
env="MultipleCombat"
scenario="2v2/NoWeapon/HierarchySelfplay"
env="SingleCombat"
scenario="1v1/NoWeapon/HierarchySelfplay"
algo="ppo"
exp="artillery_check"
exp="artillery_update"
seed=1

echo "env is ${env}, scenario is ${scenario}, algo is ${algo}, exp is ${exp}, seed is ${seed}"
CUDA_VISIBLE_DEVICES=1 python train/train_jsbsim.py \
--env-name ${env} --algorithm-name ${algo} --scenario-name ${scenario} --experiment-name ${exp} \
--seed ${seed} --n-training-threads 1 --n-rollout-threads 64 --cuda --log-interval 1 --save-interval 1 \
--seed ${seed} --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 1 \
--use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 4 \
--use-eval --n-eval-rollout-threads 1 --eval-interval 1 --eval-episodes 4 \
--num-mini-batch 5 --buffer-size 3000 --num-env-steps 1e8 \
Expand Down
8 changes: 4 additions & 4 deletions scripts/train_share_selfplay.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
env="MultipleCombat"
scenario="2v2/NoWeapon/HierarchySelfplay"
algo="mappo"
exp="cooperate"
exp="artillery_update"
seed=0

echo "env is ${env}, scenario is ${scenario}, algo is ${algo}, exp is ${exp}, seed is ${seed}"
CUDA_VISIBLE_DEVICES=0 python train/train_jsbsim.py \
--env-name ${env} --algorithm-name ${algo} --scenario-name ${scenario} --experiment-name ${exp} \
--seed 1 --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 10 \
--seed 1 --n-training-threads 1 --n-rollout-threads 32 --cuda --log-interval 1 --save-interval 1 \
--num-mini-batch 5 --buffer-size 3000 --num-env-steps 1e8 \
--lr 3e-4 --gamma 0.99 --ppo-epoch 4 --clip-params 0.2 --max-grad-norm 2 --entropy-coef 1e-3 \
--hidden-size "128 128" --act-hidden-size "128 128" --recurrent-hidden-size 128 --recurrent-hidden-layers 1 --data-chunk-length 8 \
--use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 1 \
--use-eval --n-eval-rollout-threads 1 --eval-interval 20 --eval-episodes 2 \
--use-selfplay --selfplay-algorithm "fsp" --n-choose-opponents 4 \
--use-eval --n-eval-rollout-threads 1 --eval-interval 1 --eval-episodes 4 \
--user-name "jyh" --use-wandb --wandb-name "jyh" \

0 comments on commit d3f3948

Please sign in to comment.