-
Notifications
You must be signed in to change notification settings - Fork 1
/
env.py
71 lines (53 loc) · 1.74 KB
/
env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from random import randint, random
# make the hit and stack actions numbers so my if statements are cleaner
HIT = 1
STICK = 0
def draw_card():
black = randint(1, 10)
red = -randint(1, 10)
# black is drwan with probability 2/3
probability = random()
if probability <= 2/3.0:
return black
else:
return red
def is_burst(score):
return score > 21 or score < 1
def step(state, action):
player_is_burst = False
dealer_is_burst = False
if action is HIT:
state.player += draw_card()
player_is_burst = is_burst(state.player)
if player_is_burst:
state.terminal = True
elif action is STICK:
dealer_action = HIT
# dealer's turn to play since player has given up
while dealer_action == HIT and not dealer_is_burst:
state.dealer += draw_card()
dealer_is_burst = is_burst(state.dealer)
# STICK if > 17 else HIT
dealer_action = HIT if 1 <= state.dealer <= 16 else STICK
state.terminal = True
# if the player has gone burst, reward = -1
# if the dealer has gone burst, reward = 1
# if the player has a larger score, it wins(reward=1) otherwise it looses(reward=0)
# a draw gives reward = 0
# Only compute reward if we are in a terminal state
reward = 0
if state.terminal:
if player_is_burst:
reward = -1
elif dealer_is_burst:
reward = 1
else:
if state.player > state.dealer:
reward = 1
elif state.player < state.dealer:
reward = -1
elif state.player == state.dealer: # a draw
reward = 0
else:
reward = 0
return reward