Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions deepgravity/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import argparse

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data.distributed

Expand Down Expand Up @@ -152,7 +150,7 @@ def evaluate():
for data_temp in test_loader:
b_data = data_temp[0]
b_target = data_temp[1]
ids[0] = ids[0] + 1
ids[0] = data_temp[2][0][0]
for id, data, target in zip(ids, b_data, b_target):
if args.cuda:
data, target = data.cuda(), target.cuda()
Expand All @@ -164,7 +162,7 @@ def evaluate():

def cpc_from_num(edf, oa2tile, o2d2flow):
edf['tile'] = edf['locID'].apply(lambda x: oa2tile[x])
edf['tot_flow'] = edf['locID'].apply(lambda x: sum(o2d2flow[x].values()) if x in o2d2flow else 0)
edf['tot_flow'] = edf['locID'].apply(lambda x: sum(o2d2flow[x].values()) if x in o2d2flow else 1e-6)
cpc_df = pd.DataFrame(edf.groupby('tile').apply(\
lambda x: x['cpc_num'].sum() / 2 / x['tot_flow'].sum()), \
columns=['cpc']).reset_index()
Expand All @@ -177,6 +175,21 @@ def cpc_from_num(edf, oa2tile, o2d2flow):

cpc_df.to_csv(fname, index=False)

# flow_list = []

# for loc1 in tqdm(oa2features.keys()):
# features = [all_dataset.get_features(loc1, loc2) for loc2 in oa2features.keys()]
# true_flows = [all_dataset.get_flow(loc1, loc2) for loc2 in oa2features.keys()]
# features = torch.from_numpy(np.array(features)).float().reshape(1, len(features), -1)
# true_flows = torch.from_numpy(np.array(true_flows)).float().reshape(1, -1)
# output = model.forward(features)
# flows = torch.nn.LogSoftmax()(output.squeeze()) * -1 * true_flows.squeeze()
# flows = flows.detach().numpy()
# for idx, loc2 in enumerate(oa2features.keys()):
# flow_list.append([loc1, loc2, flows[idx]])

# pd.DataFrame(flow_list, columns=["o", "d", "flow"]).to_csv('./results/pred_flows_{}_{}.csv'.format(model_type, args.dataset))




Expand Down Expand Up @@ -223,6 +236,9 @@ def cpc_from_num(edf, oa2tile, o2d2flow):
test_dataset = dgd.FlowDataset(test_data, **test_dataset_args)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size)

# all_data = test_data + train_data
# all_dataset = dgd.FlowDataset(all_data, **test_dataset_args)

dim_input = len(train_dataset.get_features(train_data[0], train_data[0]))


Expand Down
56 changes: 42 additions & 14 deletions deepgravity/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import random
import numpy as np
import pandas as pd
import json
import zipfile
import gzip
import ast
import pickle
import torch
import string
import os

import geopandas
Expand All @@ -20,9 +17,15 @@
ffnn = SourceFileLoader('ffnn', path).load_module()

def _is_support_files_computed(db):
if os.path.isdir(db+'/processed'):
if os.path.isdir(db + '/processed'):
base = db + '/processed/'
return os.path.isfile(base+'tileid2oa2handmade_features.json') and os.path.isfile(base+'oa_gdf.csv.gz') and os.path.isfile(base+'flows_oa.csv.zip') and os.path.isfile(base+'msoa_df_all.csv.zip') and os.path.isfile(base+'oa2features.pkl') and os.path.isfile(base+'oa2centroid.pkl')
print(base)
return os.path.isfile(base+'tileid2oa2handmade_features.json') and \
os.path.isfile(base+'oa_gdf.csv.gz') and \
os.path.isfile(base+'flows_oa.csv.zip') and \
os.path.isfile(base+'oa2features.pkl') and \
os.path.isfile(base+'oa2centroid.pkl') and \
os.path.isfile(base+'test_tiles.csv')
else:
return False

Expand All @@ -41,6 +44,13 @@ def _check_base_files(db_dir):
def _compute_support_files(db_dir, tile_id_column, tile_geometry, oa_id_column, oa_geometry, flow_origin_column, flow_destination_column, flow_flows_column):
# first, we check if there are at least the needed files into the base directory.
_check_base_files(db_dir)

if not os.path.isdir(db_dir + '/processed/'):
os.mkdir(db_dir + '/processed')

if not os.path.isdir('./results/'):
os.mkdir('./results')

print('Generating the processed files - it may take a while....')
print('Reading tessellation....')
try:
Expand Down Expand Up @@ -78,7 +88,7 @@ def _compute_support_files(db_dir, tile_id_column, tile_geometry, oa_id_column,

oa2centroid = {}
for i,row in temp_out.iterrows():
oa2centroid[row['geo_code']] = row['centroid']
oa2centroid[row['geo_code']] = ast.literal_eval(row['centroid'])

with open(db_dir+'/processed/oa2centroid.pkl', 'wb') as handle:
pickle.dump(oa2centroid, handle)
Expand All @@ -103,27 +113,44 @@ def _compute_support_files(db_dir, tile_id_column, tile_geometry, oa_id_column,
od2flow[(row['residence'],row['workplace'])] = row['commuters']

with open(db_dir+'/processed/od2flow.pkl', 'wb') as handle:
pickle.dump(oa2centroid, handle)
pickle.dump(od2flow, handle)

features = pd.read_csv(db_dir+'features.csv', dtype={oa_id_column:str})
features = pd.read_csv(db_dir+'/features.csv', dtype={oa_id_column:str})

oa2features = {}
for i,row in features.iterrows():
oa2features[row[0]]=row[1:].values
oa2features[row[1]]=row[2:].values.tolist()

with open(db_dir+'/processed/oa2features.pkl', 'wb') as handle:
pickle.dump(oa2features, handle)

tileid2oa2handmade_features = dict()
mapping_dict = {oa_id: [] for oa_id in mapping[oa_id_column].unique()}

for i,row in mapping.iterrows():
if row[tile_id_column] not in tileid2oa2handmade_features:
tileid2oa2handmade_features[row[tile_id_column]] = dict()
tileid2oa2handmade_features[row[tile_id_column]][row[oa_id_column]]=dict()
else:
tileid2oa2handmade_features[row[tile_id_column]][row[oa_id_column]]=dict()
mapping_dict[row[oa_id_column]].append(row[tile_id_column])
for i,row in features.iterrows():
for item in zip(list(row.keys()),row.values):
tileid2oa2handmade_features[row[tile_id_column]][item[0]]=[item[1]]
if "named" in item[0]:
continue
for tile_id in mapping_dict[row[oa_id_column]]:
tileid2oa2handmade_features[tile_id][row[oa_id_column]][item[0]]=[item[1]]

with open('tileid2oa2handmade_features.json', 'w') as f:
with open(db_dir + '/processed/tileid2oa2handmade_features.json', 'w') as f:
json.dump(tileid2oa2handmade_features, f)

all_tiles = mapping[tile_id_column].unique()

train = np.random.choice(all_tiles, int(len(all_tiles)*0.7), False)
test = [x for x in all_tiles if x not in train]

pd.DataFrame(train).to_csv(db_dir + '/processed/train_tiles.csv', index=False, header=False)
pd.DataFrame(test).to_csv(db_dir + '/processed/test_tiles.csv', index=False, header=False)


def tessellation_definition(db_dir,name,size):
Expand All @@ -133,8 +160,9 @@ def tessellation_definition(db_dir,name,size):

def load_data(db_dir, tile_id_column, tile_geometry, oa_id_column, oa_geometry, flow_origin_column, flow_destination_column, flow_flows_column):
# check if there are the computed information
#if not _is_support_files_computed(db_dir):
# _compute_support_files(db_dir, tile_id_column, tile_geometry, oa_id_column, oa_geometry, flow_origin_column, flow_destination_column, flow_flows_column)
if not _is_support_files_computed(db_dir):
print("Computing support files! ")
_compute_support_files(db_dir, tile_id_column, tile_geometry, oa_id_column, oa_geometry, flow_origin_column, flow_destination_column, flow_flows_column)

# tileid2oa2features2vals
with open(db_dir + '/processed/tileid2oa2handmade_features.json') as f:
Expand Down