-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEnv.py
238 lines (206 loc) · 7.98 KB
/
Env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import numpy as np
from datetime import datetime, date
from datetime import timedelta
import copy
from OrderGenerate import OrderGenerator, readRoute
import matplotlib.pyplot as plt
class Env():
def __init__(self, data, mode=1, history_take_off=2,order_num = 10):
self.data = data
self.his_t = history_take_off
self.mode = mode
self.order_num = order_num
self.totalReward = 0.5
self.reset()
def reset(self):
self.today = 0
self.done = False
self.profit = 0
self.orders = []
self.order_left = self.order_num
self.his_order = []
self.his_accept = []
self.buy_ticket_value = 0
self.his_price = np.zeros((self.his_t, 87))
# 不限制最小值会导致没有起飞更早的航班
# self.routeId = np.random.randint(low=self.his_t, high=len(self.data))
self.routeId =21
self.order_distribution = OrderGenerator(self.data[self.routeId], self.mode)
self.totalReward = 0
for i in range(self.his_t):
for j in range(i + 1):
if self.today + j < 87:
self.his_price[i, self.today + j] = self.data[self.routeId - i][self.today + j]
self.his_order.append(self.order_distribution[self.today])
state = {}
state['buy_ticket_value'] = self.buy_ticket_value
state['his_price'] = self.his_price
state['his_order'] = self.his_order
state['his_accept'] = self.his_accept
state['orders'] = self.orders
return state, self.done
def step(self, act):
"""
:param act: 0 hold, 1 accept order, 2 buy ticket for all orders
:return: obs, reward , done , info
"""
today_price = self.data[self.routeId][self.today]
reward = 0
info = {}
order_accept = 0
if act == 1:
if self.order_distribution[self.today] != -1 and self.order_left > 0:
self.orders.append(self.order_distribution[self.today])
order_accept = 1
self.order_left -= 1
reward = self.getAcceptReward(act)
elif act == 2:
profit = 0
reward = self.getBuyReward(act, self.orders)
profit = reward
self.profit += profit
self.orders = []
self.his_accept.append(order_accept)
if self.today >= 86 or (len(self.orders) == 0 and self.order_left == 0):
self.done = True
else:
# 如果done了,return的state不使用,因此不更新也没事
self.today += 1
self.buy_ticket_value = 0
self.his_order.append(self.order_distribution[self.today])
today_price = self.data[self.routeId][self.today]
for order in self.orders:
self.buy_ticket_value += order - today_price
for i in range(self.his_t):
if self.today + i < 87:
self.his_price[i, self.today + i] = self.data[self.routeId - i][self.today + i]
state = {}
state['buy_ticket_value'] = self.buy_ticket_value
state['his_price'] = self.his_price
state['his_order'] = self.his_order
state['his_accept'] = self.his_accept
state['orders'] = self.orders
return state, reward, self.done, info
def separateStep(self, accpet_act, buy_act):
'''
:param accpet_act: 0 hold, 1 accept order
:param buy_act: a list for each orders action, 0 hold, 1 buy
:return: obs, reward , done , info
'''
today_price = self.data[self.routeId][self.today]
reward_accept = 0
reward_buy = 0
info = {}
order_accept = 0
if self.today >= 86:
if type(buy_act).__name__ == 'list' or type(buy_act).__name__ == 'ndarray':
buy_act = np.ones_like(buy_act)
else:
buy_act = 1
reward_buy = self.getBuyReward(buy_act, self.orders)
if len(self.orders) == len(buy_act):
orders_after_action = []
for i in range(len(buy_act)):
if buy_act[i] == 0:
orders_after_action.append(self.orders[i])
self.orders = orders_after_action
profit = reward_buy
self.profit += profit
elif buy_act == 1:
self.orders = []
elif buy_act ==0:
pass
else:
raise ValueError("len buy_act %d , len orders %d, not match" % (len(buy_act), len(self.orders)))
if accpet_act == 1:
if self.order_distribution[self.today] != -1 and self.order_left > 0:
self.orders.append(self.order_distribution[self.today])
order_accept = 1
self.order_left -= 1
reward_accept = self.getAcceptReward(accpet_act)
# print("Accept:",self.today+1)a
else:
reward_accept = -1
self.his_accept.append(order_accept)
if self.today >= 86 or (len(self.orders) == 0 and self.order_left == 0):
self.done = True
else :
self.today += 1
self.buy_ticket_value = 0
self.his_order.append(self.order_distribution[self.today])
today_price = self.data[self.routeId][self.today]
for order in self.orders:
self.buy_ticket_value += order - today_price
for i in range(self.his_t):
if self.today + i < 87:
self.his_price[i, self.today + i] = self.data[self.routeId - i][self.today + i]
state = {}
state['buy_ticket_value'] = self.buy_ticket_value
state['his_price'] = self.his_price
state['his_order'] = self.his_order
state['his_accept'] = self.his_accept
state['orders'] = self.orders
reward = {}
reward['reward_accept'] = reward_accept
reward['reward_buy'] = reward_buy
return state, reward, self.done, info
def render(self):
print(f'Day: {self.today}')
print(f'Orders: {self.orders}')
print(f'Order left: {self.order_left}')
print(f'Profit: {self.profit}')
def getAcceptReward(self, act):
'''
:param act: 0 reject, 1 accept
:return: reward
'''
reward = 0
return reward
def getBuyReward(self, act, orders):
'''
:param act: 0 hold, 1 buy
:return: reward
'''
reward = 0
today_price = self.data[self.routeId][self.today]
if type(act).__name__ == 'list' or type(act).__name__ == 'ndarray':
for i in range(len(act)):
# print("LOOK:", orders[i], today_price)
if act[i] != 0:
if len(orders) >0:
reward += orders[i] - today_price
else:
reward -= 1
else:
reward += 0
elif act != 0 and len(orders) >= 1:
for order in orders:
reward += order - today_price
elif act != 0 and len(orders) == 0:
reward -= 1
self.totalReward += reward
return reward
def getTotalReward(self):
return self.totalReward
def getTodayIndex(self):
if self.order_num == self.order_left:
return -1
else:
return self.today
def getNextPrice(self):
next_price = np.zeros((self.his_t, 87))
if self.today >= 86:
next_price = self.his_price
else:
for i in range(self.his_t):
if self.today + i < 87:
next_price[i, self.today + i] = self.data[self.routeId - i][self.today + i]
return next_price
if __name__ == "__main__":
route_list = readRoute("./wang/data/route")
env = Env(route_list,history_take_off=2)
for i in range(86):
action = np.random.randint(3)
obs, reward, done, info = env.step(action)
env.render()
print(obs["his_price"])