-
Notifications
You must be signed in to change notification settings - Fork 2
/
shortest.py
76 lines (62 loc) · 2.07 KB
/
shortest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import itertools
"""
Use Floyd-Warshall to compute shortest distances between all states to compute optimal reward in expectation
"""
if __name__ == '__main__':
board = ['##########',
'# #',
'# #',
'# # #',
'# ## #',
'# ## #',
'# # #',
'# #',
'# #',
'##########']
arr = np.array([list(row) for row in board])
free_spaces = list(map(tuple, np.argwhere(arr != '#')))
dist = {(x, y) : np.inf for x in free_spaces for y in free_spaces}
for (u, v) in dist.keys():
d = abs(u[0] - v[0]) + abs(u[1] - v[1])
if d == 0:
dist[(u, v)] = 0
elif d == 1:
dist[(u, v)] = 1
for k in free_spaces:
for i in free_spaces:
for j in free_spaces:
if dist[(i, j)] > dist[(i, k)] + dist[(k, j)]:
dist[(i, j)] = dist[(i, k)] + dist[(k, j)]
reward = []
count = 0
N = 2
for points in itertools.combinations(free_spaces, N):
distances = [dist[(points[0], points[i])] for i in range(1, N)]
d = np.min(distances)
if d > 0:
reward.append(1 + -0.1 * (d-1))
print(reward)
print(np.mean(reward))
print(np.std(reward))
# for i in range(1):
#
# env = CollectEnv(goal_condition=lambda x: x.colour == 'purple' and x.shape == 'square')
# env.reset()
# env.render()
# for _ in range(10000):
# obs, reward, done, _ = env.step(env.action_space.sample())
# env.render()
# if done:
# env.reset()
# 1 let dist be a |V| × |V| array of minimum distances initialized to ∞ (infinity)
# 2 for each edge (u,v)
# 3 dist[u][v] ← w(u,v) // the weight of the edge (u,v)
# 4 for each vertex v
# 5 dist[v][v] ← 0
# 6 for k from 1 to |V|
# 7 for i from 1 to |V|
# 8 for j from 1 to |V|
# 9 if dist[i][j] > dist[i][k] + dist[k][j]
# 10 dist[i][j] ← dist[i][k] + dist[k][j]
# 11 end if