-
Notifications
You must be signed in to change notification settings - Fork 21
/
nyctaxi20150103.py
117 lines (111 loc) · 5.73 KB
/
nyctaxi20150103.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# link: https://github.com/tangxianfeng/STDN
import util
import numpy as np
import pandas as pd
import json
# outputdir = 'output/NYCTaxi20150103'
outputdir = '../NYCTaxi20150103'
util.ensure_dir(outputdir)
# dataurl = 'input/NYCTaxi20150103/'
dataurl = '../NYCTaxi20150103/'
dataname = outputdir + '/NYCTaxi20150103'
dataset = np.load(open(dataurl + "volume_train.npz", "rb"))["volume"]
idset = set()
geo = []
x = dataset.shape[1]
y = dataset.shape[2]
for i in range(x):
for j in range(y):
id = i * y + j
idset.add(id)
geo.append([str(id), 'Polygon', '[]', str(i), str(j)])
geo = pd.DataFrame(geo, columns=['geo_id', 'type', 'coordinates', 'row_id', 'column_id'])
geo.to_csv(dataname + '.geo', index=False)
grid_id = 0
grid_file = open(dataname + '.grid', 'w')
grid_file.write(
'dyna_id' + ',' + 'type' + ',' + 'time' + ',' + 'row_id' + ',' + 'column_id' + ',' + 'departing_volume' + ',' +
'arriving_volume' + '\n')
volume_train_dataset = np.load(open(dataurl + "volume_train.npz", "rb"))["volume"]
volume_test_dataset = np.load(open(dataurl + "volume_test.npz", "rb"))["volume"]
train_time = volume_train_dataset.shape[0]
test_time = volume_test_dataset.shape[0]
train_start_timestamp = util.datetime_timestamp("2015-01-01T00:00:00Z")
test_start_timestamp = util.datetime_timestamp("2015-02-10T00:00:00Z")
x = volume_train_dataset.shape[1]
y = volume_train_dataset.shape[2]
for i in range(x):
for j in range(y):
for t in range(train_time):
grid_file.write(str(grid_id) + ',' + 'state' + ',' + str(
util.timestamp_datetime(train_start_timestamp + t * 1800)) + ',' + str(i) + ',' + str(j) + ',' + str(
volume_train_dataset[t][i][j][0]) + ',' + str(volume_train_dataset[t][i][j][1]) + '\n')
grid_id = grid_id + 1
for t in range(test_time):
grid_file.write(str(grid_id) + ',' + 'state' + ',' + str(
util.timestamp_datetime(test_start_timestamp + t * 1800)) + ',' + str(i) + ',' + str(j) + ',' + str(
volume_test_dataset[t][i][j][0]) + ',' + str(volume_test_dataset[t][i][j][1]) + '\n')
grid_id = grid_id + 1
grid_file.close()
gridod_id = 0
gridod_file = open(dataname + '.gridod', 'w')
gridod_file.write(
'dyna_id' + ',' + 'type' + ',' + 'time' + ',' + 'origin_row_id' + ',' + 'origin_column_id' + ',' +
'destination_row_id' + ',' + 'destination_column_id' + ',' + 'flow_from_cur' + ',' + 'flow_from_last' + '\n')
flow_train_dataset = np.load(open(dataurl + "flow_train.npz", "rb"))["flow"]
flow_test_dataset = np.load(open(dataurl + "flow_test.npz", "rb"))["flow"]
train_time = flow_train_dataset.shape[1]
test_time = flow_test_dataset.shape[1]
train_start_timestamp = util.datetime_timestamp("2015-01-01T00:00:00Z")
test_start_timestamp = util.datetime_timestamp("2015-02-10T00:00:00Z")
x = flow_train_dataset.shape[2]
y = flow_train_dataset.shape[3]
for origin_row_id in range(x):
for origin_column_id in range(y):
for destination_row_id in range(x):
for destination_column_id in range(y):
for t in range(train_time):
gridod_file.write(str(gridod_id) + ',' + 'state' + ',' + str(
util.timestamp_datetime(train_start_timestamp + t * 1800)) + ',' + str(
origin_row_id) + ',' + str(
origin_column_id) + ',' + str(destination_row_id) + ',' + str(
destination_column_id) + ',' + str(
flow_train_dataset[0][t][origin_row_id][origin_column_id][destination_row_id][
destination_column_id]) + ',' + str(
flow_train_dataset[1][t][origin_row_id][origin_column_id][destination_row_id][
destination_column_id]) + '\n')
gridod_id = gridod_id + 1
for t in range(test_time):
gridod_file.write(str(gridod_id) + ',' + 'state' + ',' + str(
util.timestamp_datetime(test_start_timestamp + t * 1800)) + ',' + str(
origin_row_id) + ',' + str(
origin_column_id) + ',' + str(destination_row_id) + ',' + str(
destination_column_id) + ',' + str(
flow_test_dataset[0][t][origin_row_id][origin_column_id][destination_row_id][
destination_column_id]) + ',' + str(
flow_test_dataset[1][t][origin_row_id][origin_column_id][destination_row_id][
destination_column_id]) + '\n')
gridod_id = gridod_id + 1
gridod_file.close()
config = dict()
config['geo'] = dict()
config['geo']['including_types'] = ['Polygon']
config['geo']['Polygon'] = {'row_id': 'num', 'column_id': 'num'}
config['grid'] = dict()
config['grid']['including_types'] = ['state']
config['grid']['state'] = {'row_id': x, 'column_id': y, 'departing_volume': 'num', 'arriving_volume': 'num'}
config['gridod'] = dict()
config['gridod']['including_types'] = ['state']
config['gridod']['state'] = {'origin_row_id': x, 'origin_column_id': y, 'destination_row_id': x,
'destination_column_id': y, 'flow_from_cur': 'num', 'flow_from_last': 'num'}
config['info'] = dict()
config['info']['data_col'] = ['departing_volume', 'arriving_volume']
config['info']['data_files'] = ['NYCTaxi20150103']
config['info']['geo_file'] = 'NYCTaxi20150103'
config['info']['output_dim'] = 2
config['info']['time_intervals'] = 1800
config['info']['init_weight_inf_or_zero'] = 'inf'
config['info']['set_weight_link_or_dist'] = 'dist'
config['info']['calculate_weight_adj'] = False
config['info']['weight_adj_epsilon'] = 0.1
json.dump(config, open(outputdir + '/config.json', 'w', encoding='utf-8'), ensure_ascii=False)