Skip to content

Commit

Permalink
add pomdp-solve example
Browse files Browse the repository at this point in the history
  • Loading branch information
Limmen committed May 7, 2024
1 parent d0ab5ac commit c50aefd
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 41 deletions.
11 changes: 0 additions & 11 deletions examples/plotting/orlando_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ def plot_returns(returns_means, returns_stds, file_name: str, fontsize: int = 18
plt.rcParams.update({'font.size': fontsize})
plt.rcParams['font.family'] = ['serif']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(11, 4))
# ax.plot(np.array(list(range(len(returns_means))))*10, returns_means, label=r"$\pi^{(i)}_D$ simulation",
# marker="o", ls='-', color="black", markevery=1, markersize=3, lw=0.75)
# ax.fill_between(np.array(list(range(len(returns_means))))*10, returns_means - returns_stds,
# returns_means + returns_stds, alpha=0.35, color="black", lw=0.75)

ax.plot(np.array(list(range(len(returns_means)))) * 200, returns_means, label=r"$\pi^{(i)}_D$ simulation",
marker="o", ls='-', color="black", markevery=1, markersize=3, lw=0.75)
Expand All @@ -32,10 +28,6 @@ def plot_returns(returns_means, returns_stds, file_name: str, fontsize: int = 18
ax.spines['right'].set_visible(False)
ax.set_xlabel(r"iteration $k$")
ax.set_ylabel(r"Avg cumulative reward")
# ax.legend(loc='upper center', bbox_to_anchor=(0.51, -0.15),
# ncol=4, fancybox=True, shadow=False, handletextpad=0.4, labelspacing=0.5, columnspacing=0.65,
# fontsize=fontsize)
# ax.set_title(r"\textsc{ppo-base}-1")
ax.set_title(r"\textsc{dqn-base}-1")
fig.tight_layout()
fig.subplots_adjust(wspace=0.0, hspace=0.75)
Expand All @@ -58,11 +50,8 @@ def plot_returns(returns_means, returns_stds, file_name: str, fontsize: int = 18
metrics = experiment_result.all_metrics[seed]
returns = metrics["average_return"]
folder = "/home/kim/orlando_results/12_jan/"
# plot_file_name = "ppo_base_1"
result_file_name = "ppo_base_1.json"
plot_file_name = "dqn_base_1"
experiment_result.to_json_file(f"{folder}/{result_file_name}")
# plot_returns(returns_means=np.array(returns), returns_stds=np.array([0]*len(returns)),
# file_name=f"{folder}{plot_file_name}")
plot_returns(returns_means=np.array(dqn_results), returns_stds=np.array([0] * len(dqn_results)),
file_name=f"{folder}{plot_file_name}")
30 changes: 0 additions & 30 deletions examples/plotting/plot_learning_curves_tolerance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@
from csle_common.metastore.metastore_facade import MetastoreFacade

if __name__ == '__main__':
# cross_entropy_ids = MetastoreFacade.list_experiment_executions_ids()
# for id in cross_entropy_ids:
# ex = MetastoreFacade.get_experiment_execution(id=id[0])
# print(f"id: {ex.id}, agent: {ex.config.agent_type}, BTR: {ex.config.hparams['L'].value}")
# import sys
# sys.exit(0)
fontsize: int = 14
lw: float = 0.75
alpha: float = 0.35
Expand Down Expand Up @@ -54,32 +48,8 @@
col += 1
if col >= 8:
col = 0
# for i in range(len(cross_entropy_ids)):
# BTR = 11 + i
# ax[1][i].spines['top'].set_visible(False)
# ax[1][i].spines['right'].set_visible(False)
# ax[1][i].set_xlabel(r"Time (min)", fontsize=fontsize)
# if i == 0:
# ax[1][i].set_ylabel(r"Average cost $J_i$", fontsize=fontsize)
# ax[1][i].set_title(r"$\Delta_{\mathrm{R}}=" + str(BTR) + "$", fontsize=fontsize)
# ax[1][i].tick_params(axis='both', which='major', labelsize=fontsize)
# ax[1][i].tick_params(axis='both', which='minor', labelsize=fontsize)
# ax[1][i].set_xlabel(r"Time (min)", fontsize=fontsize)
# for i in range(len(cross_entropy_ids)):
# BTR = 11 + i
# ax[2][i].spines['top'].set_visible(False)
# ax[2][i].spines['right'].set_visible(False)
# ax[2][i].set_xlabel(r"Time (min)", fontsize=fontsize)
# if i == 0:
# ax[2][i].set_ylabel(r"Average cost $J_i$", fontsize=fontsize)
# ax[2][i].set_title(r"$\Delta_{\mathrm{R}}=" + str(BTR) + "$", fontsize=fontsize)
# ax[2][i].tick_params(axis='both', which='major', labelsize=fontsize)
# ax[2][i].tick_params(axis='both', which='minor', labelsize=fontsize)
# ax[2][i].set_xlabel(r"Time (min)", fontsize=fontsize)
fig.tight_layout()
fig.subplots_adjust(wspace=0.55, hspace=0.4, bottom=0.14)
# fig.savefig(file_name + ".png", format="png", dpi=600)
# fig.savefig(file_name + ".pdf", format='pdf', dpi=600, bbox_inches='tight', transparent=True)
handles, labels = ax[0][0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.52, -0.02),
ncol=8, fancybox=False, shadow=False, handletextpad=0.4, labelspacing=0.5, columnspacing=0.65,
Expand Down
25 changes: 25 additions & 0 deletions examples/training/pomdp_solve/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# pomdp-solve

This directory contains example scripts for solving pomdps using [pomdp-solve](https://www.pomdp.org/code/index.html).

Command for running pomdp-solve with infinite time horizon with a discount factor:
```bash
nohup pomdp-solve -pomdp intrusion_recovery.pomdp -discount 0.995 -method incprune > inf_gamma_099.log &
```

Command for running pomdp-solve with a fixed time horizon and no discount:
```bash
nohup pomdp-solve -pomdp intrusion_recovery.pomdp -horizon 100 -method incprune > 100_solve.log &
```

## Author & Maintainer

Kim Hammar <kimham@kth.se>

## Copyright and license

[LICENSE](../../../LICENSE.md)

Creative Commons

(C) 2020-2024, Kim Hammar
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import numpy as np
from csle_tolerance.dao.intrusion_recovery_pomdp_config import IntrusionRecoveryPomdpConfig
from csle_tolerance.util.intrusion_recovery_pomdp_util import IntrusionRecoveryPomdpUtil
from csle_tolerance.util.pomdp_solve_parser import PomdpSolveParser


if __name__ == '__main__':
eta = 2
p_a = 0.05
p_c_1 = 0.01
p_c_2 = 0.01
p_u = 0.0
BTR = np.inf
negate_costs = False
discount_factor = 1-p_c_1
num_observations = 100
simulation_name = "csle-tolerance-intrusion-recovery-pomdp-defender-001"
cost_tensor = IntrusionRecoveryPomdpUtil.cost_tensor(eta=eta, states=IntrusionRecoveryPomdpUtil.state_space(),
actions=IntrusionRecoveryPomdpUtil.action_space(),
negate=negate_costs)
observation_tensor = IntrusionRecoveryPomdpUtil.observation_tensor(
states=IntrusionRecoveryPomdpUtil.state_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations))
transition_tensor = IntrusionRecoveryPomdpUtil.transition_tensor(
states=IntrusionRecoveryPomdpUtil.state_space(), actions=IntrusionRecoveryPomdpUtil.action_space(), p_a=p_a,
p_c_1=p_c_1, p_c_2=p_c_2, p_u=p_u)
config = IntrusionRecoveryPomdpConfig(
eta=eta, p_a=p_a, p_c_1=p_c_1, p_c_2=p_c_2, p_u=p_u, BTR=BTR, negate_costs=negate_costs, seed=999,
discount_factor=discount_factor, states=IntrusionRecoveryPomdpUtil.state_space(),
actions=IntrusionRecoveryPomdpUtil.action_space(),
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
)
pomdp_solve_file_str = IntrusionRecoveryPomdpUtil.pomdp_solver_file(config=config)
with open("/home/kim/gamesec24/intrusion_recovery.pomdp", 'w') as f:
f.write(pomdp_solve_file_str)

alpha_vectors = PomdpSolveParser.parse_alpha_vectors(
file_path="/home/kim/gamesec24/intrusion_recovery-3361312.alpha")
belief_space = np.linspace(0.0, 1, int(1.0/0.01))
print(belief_space)
for i in range(len(alpha_vectors)):
print(f"a*:{alpha_vectors[i][0]}, vector: {list(-np.array(alpha_vectors[i][1][0:2]))}")
values_01 = []
for j, b in enumerate(belief_space):
b_vec = [1-b, b]
dot_vals = []
for i in range(len(alpha_vectors)):
dot_vals.append(np.dot(b_vec, list(-np.array(alpha_vectors[i][1][0:2]))))
min_index = np.argmin(dot_vals)
values_01.append(dot_vals[min_index])
vec_dots = []
print(f"{b} {values_01[-1]}")
for b in belief_space:
b_vec = [1-b, b]
vec_dots.append(-np.dot(b_vec, list(-np.array(alpha_vectors[min_index][1][0:2]))))

0 comments on commit c50aefd

Please sign in to comment.