-
Notifications
You must be signed in to change notification settings - Fork 0
/
game.py
77 lines (59 loc) · 1.75 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
class Labyrinth:
direction = {
0: (-1, 0),
1: (0, 1),
2: (1, 0),
3: (0, -1)
}
wall = 0
empty = 1
player = 2
goal = 3
def __init__(self):
self.reset()
self.border = self.state.shape
def reset(self):
self.state = np.array([
[1, 1, 1, 1],
[1, 0, 3, 1],
[1, 0, 0, 0],
[1, 1, 2, 1]
], dtype=np.float32)
return self.state.copy()
def step(self, action):
position = np.where(self.state == self.player)
self.state[position] = self.empty
position = tuple(position[i] + self.direction[action][i] for i in range(2))
np_position = np.array(position)
outside_bounds = (
(np_position <= -1).any() or
(np_position >= self.state.shape).any()
)
if outside_bounds:
return self.state, 0.0, 1.0
win = (self.state[position] == self.goal).any()
game_over = (
win or
(self.state[position] == self.wall).any()
)
if not game_over:
self.state[position] = self.player
reward = win.astype(np.float32)
return self.state.copy(), reward, game_over.astype(np.float32)
class HumanGame:
def __init__(self, game):
self.game = game
def play(self):
ob = self.game.reset()
for _ in range(100):
print(ob)
ac = input('action: ')
if ac == 'q' or ac == '':
break
ac = float(ac)
ob, reward, game_over = self.game.step(ac)
print('reward: {}'.format(reward))
if game_over:
print('game_over')
break