-
Notifications
You must be signed in to change notification settings - Fork 0
/
random_exploration.py
116 lines (92 loc) · 3.22 KB
/
random_exploration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import math
import mlflow
from numpy import mean
from torch.distributions import Categorical
import torch.nn as nn
import argparse as arg
import ray
import torch
from config.config import Config
from agent.rollout_worker import RolloutWorker, Transition
from utils.dataset_actor.dataset_actor import DatasetActor
class RandomAgent(nn.Module):
def __init__(self) -> None:
super(RandomAgent,self).__init__()
def forward(self, data=None, actions_mask=None, action=None):
logits = - actions_mask * 1e8
probs = Categorical(logits=logits)
return probs.sample(), torch.tensor([0]),torch.tensor([0]),torch.tensor([0])
total_steps = 10_000_000
batch_size = 1000
if "__main__" == __name__ :
parser = arg.ArgumentParser()
parser.add_argument("--num-nodes", default=1, type=int)
parser.add_argument("--name", type=str, default="experiment_101")
args = parser.parse_args()
run_name = args.name
NUM_ROLLOUT_WORKERS = args.num_nodes
if NUM_ROLLOUT_WORKERS > 1 :
ray.init("auto")
else :
ray.init()
Config.init()
dataset_worker = DatasetActor.remote(Config.config.dataset)
device = "cpu"
random_agent = RandomAgent().to(device)
rollout_workers = [
RolloutWorker.options(
num_cpus=24, num_gpus=0, scheduling_strategy="SPREAD"
).remote(dataset_worker, Config.config, worker_id=200+i)
for i in range(NUM_ROLLOUT_WORKERS)
]
with mlflow.start_run(
run_name=run_name,
) as run:
mlflow.log_params(
{
"Total Steps" : total_steps,
"NUM_ROLLOUT_WORKERS": NUM_ROLLOUT_WORKERS,
}
)
global_steps = 0
while global_steps < total_steps :
num_steps = 0
b_speedups = []
avg_episode_length = 0
m = 0
while num_steps < batch_size:
results = ray.get(
[
rollout_workers[i].rollout.remote(random_agent.to("cpu"), "cpu")
for i in range(NUM_ROLLOUT_WORKERS)
]
)
for result in results:
b_speedups.append(math.log(result["speedup"], 4))
trajectory_len = len(result["trajectory"])
avg_episode_length = (m * avg_episode_length) / (
m + 1
) + trajectory_len / (m + 1)
m += 1
num_steps += trajectory_len
ray.get(
[
rollout_workers[i].reset.remote()
for i in range(NUM_ROLLOUT_WORKERS)
]
)
global_steps += num_steps
speedups_mean = mean(b_speedups)
infos = {
"Reward average": speedups_mean,
"Reward min": min(b_speedups),
"Reward max": max(b_speedups),
"Episode length mean": avg_episode_length,
}
print(infos)
mlflow.log_metrics(
infos,
step=global_steps,
)
mlflow.end_run()
ray.shutdown()