-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalytics.py
194 lines (171 loc) · 7.61 KB
/
analytics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""
analytics.py
Produce analytic data for debugging and visualisation
"""
import pandas as pd
import os
from geojson import dump, FeatureCollection, Feature, GeometryCollection, LineString, MultiLineString
from globals import *
from utils import loadMatrix
from math import sqrt
"""
runAnalytics
Function to run the full analytics package
"""
#def runAnalytics():
#produce a flow geojson for the top flows from a probability matrix e.g. job flows
#runAnalyticsRetail(0.004) #was 0.002 then 0.008
#runAnalyticsSchools(0.008)
#runAnalyticsHospitals(0.008)
#runAnalyticsJobs(0.008)
################################################################################
"""
graphProbabilities - GB
Produce graph data for the retail, schools and hospitals model
@param threshold The threshold below which to ignore low probability trips
@param dfPointsPopulation MSOA list
@param dfPointsZones Point list
@param pointsProbSij matrix of probabilities
@param pointsZonesIDField field name of the unique identifier field in the points files e.g. school id, retail id etc
@returns a feature collection as a geojson object to be written to file (probably)
"""
def graphProbabilities_GB(threshold,dfPointsPopulation,dfPointsZones,pointsProbSij,pointsZonesIDField):
#east,north in retail points zones file (look in zonecodes for the lat lon)
#east, north and lat,lon in retail points population file
count=0
features = []
m,n = pointsProbSij.shape
for i in range(m): #this is the zonei
row_i = dfPointsPopulation.loc[dfPointsPopulation['zonei'] == i]
i_msoaiz = row_i['msoaiz'].values[0]
i_east = float(row_i['osgb36_east'].values[0])
i_north = float(row_i['osgb36_north'].values[0])
#print("graphProbabilities ",i_msoaiz,count)
print("graphProbabilities ", i_msoaiz ,"iteration ", i, "of ", m)
for j in range(n):
p = pointsProbSij[i,j]
if p>=threshold:
row2 = dfPointsZones.loc[dfPointsZones['zonei'] == j] #yes, zonei==j is correct, they're always called 'zonei'
j_id = str(row2[pointsZonesIDField].values[0]) #won't serialise a float64 otherwise!
j_east = float(row2['east'].values[0])
j_north = float(row2['north'].values[0])
the_geom = LineString([(i_east,i_north),(j_east,j_north)])
f = Feature(geometry=the_geom, properties={"o": i_msoaiz, "d": j_id, "prob":p})
features.append(f)
count+=1
#end if
#end for
#end for
return FeatureCollection(features)
"""
graphProbabilities - Athens
Produce graph data for the journey to work model
@param threshold The threshold below which to ignore low probability trips
@param dfPointsPopulation equivalent Attica zones
@param dfPointsZones Point list
@param pointsProbSij matrix of probabilities
@param pointsZonesIDField field name of the unique identifier field in the points files e.g. school id, retail id etc --> ATH zone to zone, not zone to point
@returns a feature collection as a geojson object to be written to file
"""
# def graphProbabilities(threshold,dfPointsPopulation,dfPointsZones,pointsProbSij,pointsZonesIDField): # original code
def graphProbabilities(threshold, dfOriginsPopulation, ProbSij):
#east,north in retail points zones file (look in zonecodes for the lat lon)
#east, north and lat,lon in retail points population file
count=0
features = []
m,n = ProbSij.shape
for i in range(m): #this is the zonei
row_i = dfOriginsPopulation.loc[dfOriginsPopulation['zonei'] == i]
i_zone = str(row_i['zone'].values[0])
i_east = float(row_i['Greek_Grid_east'].values[0])
i_north = float(row_i['Greek_Grid_north'].values[0])
#print("graphProbabilities ",i_zone,count)
# print("graphProbabilities ", i_zone ,"iteration ", i, "of ", m)
for j in range(n):
p = ProbSij[i,j]
if p>=threshold:
row2 = dfOriginsPopulation.loc[dfOriginsPopulation['zonei'] == j] #yes, zonei==j is correct, they're always called 'zonei'
j_id = str(row2['zone'].values[0]) #won't serialise a float64 otherwise!
j_east = float(row2['Greek_Grid_east'].values[0])
j_north = float(row2['Greek_Grid_north'].values[0])
the_geom = LineString([(i_east,i_north),(j_east,j_north)])
f = Feature(geometry=the_geom, properties={"o": i_zone, "d": j_id, "prob":p})
features.append(f)
count+=1
#end if
#end for
#end for
return FeatureCollection(features)
################################################################################
"""
flowArrowsGeoJSON
Take each Aj residential zone and add up the vectors of all the
flows leaving that zone for work in an i zone. This gives you
a residential zone to work zone vector field.
@param Tij the Tij trips matrix to make the flows from
@param The zone codes file as a dataframe, Zone codes from
zonesdatacoordinates.csv as zonecodes_ATH in main program doesn't have
the necessary centroid coordinates
@returns a feature collection that you can make a geojson from. This is
in the Greek grid system
"""
def flowArrowsGeoJSON(Tij,dfZoneCodes):
#go through all origin zones and find average flow direction
#print(dfZoneCodes.head())
#dfZoneCodes.set_index('zonei')
#make a faster zone lookup as pandas is much too slow
zonelookup = {}
for index, row in dfZoneCodes.iterrows():
zonei = row['zonei']
east = row['Greek_Grid_east']
north = row['Greek_Grid_north']
zonelookup[zonei] = (east,north)
#end for
arrowpts = [ [0,0], [0,0.9], [-0.1,0.9], [0,1.0], [0.1,0.9], [0,0.9] ]
features = []
m, n = Tij.shape
for j in range(n): #for all residential zones
centroidj = zonelookup[j]
xcj = centroidj[0]
ycj = centroidj[1]
dxji=0
dyji=0
for i in range(m): #sum all work zone flows to get average flow
if j==i:
continue #don't do the self flow
value=Tij[i,j] #this is flow from originj (residence) to desti (work)
centroidi = zonelookup[i]
xci = centroidi[0]
yci = centroidi[1]
dx = xci-xcj # j->i vector between centroids - need to normalise this
dy = yci-ycj
mag = sqrt(dx*dx+dy*dy)
#sum normalised direction times value of number of people travelling on link
dxji+= value * dx/mag
dyji+= value * dy/mag
#end for i
#and make an arrow (xcj,ycj)+(dxji,dyji)*value
#print("i=",i,"dxji=",dxji,"dyji=",dyji)
r = sqrt(dxji*dxji+dyji*dyji) #need magnitude of vector as we have to rotate and scale it
if (r<1): #guard for zero flow, we want it to come out as a dot
r=1
#and normalise
dxji/=r
dyji/=r
#now normal to vector j->i
nxji = dyji
nyji = -dxji
ls_pts = [] #to make a linestring
s = r*1 #scale factor on arrows
for p in arrowpts:
#rotated axes are: y along j->i and x along normal(j->i)
#V = S*AY*(j->i) + S*AX*(Normal(j->i)) where S=scaling, AX,AY=arrow point
ax = s*p[1]*dxji + s*p[0]*nxji #along ji plus normal
ay = s*p[1]*dyji + s*p[0]*nyji
ls_pts.append((xcj+ax,ycj+ay)) #NOTE: east, north fits geojson x,y
#print(ls_pts)
the_geom = LineString(ls_pts)
f = Feature(geometry=the_geom, properties={"originzonei": j})
features.append(f)
#end for j
return FeatureCollection(features)