-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
klaricch
committed
Nov 11, 2015
1 parent
7c9370c
commit aa8d9c2
Showing
2 changed files
with
197 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
library(rgdal) | ||
library(rgeos) | ||
library(leaflet) | ||
library(dplyr) | ||
library(htmlwidgets) | ||
setwd("~/Desktop/Data_Incubator") | ||
#download grocery stores file | ||
#url<-"http://data.cityofchicago.org/api/views/53t8-wyrc/rows.csv?accessType=DOWNLOAD" | ||
#download_dir<-getwd() | ||
#dest_name<-"grocery_stores.csv" | ||
#download.file(url, dest_name) | ||
|
||
gf<-read.csv("chicago_green_roof.txt") | ||
gs<-read.csv("grocery_stores.csv") | ||
|
||
#filter for grocery stores > 10,000 sq feet (used in definition of food desert) | ||
large_store<-filter(gs,SQUARE.FEET>10000) | ||
|
||
#download zipcodes: | ||
url <- "http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_zcta510_500k.zip" | ||
download_dir<-getwd() | ||
dest_name<-"all_zip.zip" | ||
download.file(url, dest_name) | ||
unzip(dest_name, exdir=download_dir, junkpaths=TRUE) | ||
filename<-list.files(download_dir, pattern=".shp", full.names=FALSE) | ||
filename<-gsub(".shp", "", filename) | ||
zip_codes<-readOGR(download_dir, "cb_2014_us_zcta510_500k") | ||
|
||
chi_zips<-c(60647,60639,60707,60622,60651,60611,60638,60652,60626,60621,60645,60631,60646,60628,60660,60640,60625,60641,60657,60615,60636,60649,60617,60643,60633,60643,60612,60604,60656,60624,60655,60644,60603,60605,60653,60609,60666,60618,60616,60602,60601,60608,60607,60661,60606,60614,60827,60630,60642,60659,60707,60634,60613,60610,60654,60632,60623,60629,60620,60637,60619) | ||
chicago <- zip_codes[zip_codes$ZCTA5CE10 %in% chi_zips,] | ||
interest<-subset(chicago, grepl('^(60636)|(60628)|(60644).*', chicago$ZCTA5CE10,perl=T)) | ||
|
||
#epsg:4326 transform | ||
chicago<-spTransform(chicago, CRS("+init=epsg:4326")) | ||
interest<-spTransform(interest, CRS("+init=epsg:4326")) | ||
chicago_data<-chicago@data[,c("GEOID10", "ALAND10")] | ||
interest_data<-interest@data[,c("GEOID10", "ALAND10")] | ||
#gSimplify | ||
chicago<-gSimplify(chicago,tol=0.01, topologyPreserve=TRUE) | ||
interest<-gSimplify(interest,tol=0.01, topologyPreserve=TRUE) | ||
#create SpatialPolygonsDataFrame | ||
chicago<-SpatialPolygonsDataFrame(chicago, data=chicago_data) | ||
interest<-SpatialPolygonsDataFrame(interest, data=interest_data) | ||
|
||
#leaflet map | ||
m <- leaflet(gf) %>% | ||
addTiles() %>% | ||
setView(lng =-87.7738, lat =41.78798, zoom = 10) %>% | ||
|
||
addPolygons(data=chicago,weight=2,fillOpacity = 0.15,color="blue")%>% | ||
addPolygons(data=interest,weight=2,fillOpacity = 0.6,color="turquoise")%>% | ||
addCircles(~LONGITUDE, ~LATITUDE, weight =4, radius=20, | ||
color="darkgreen", stroke = TRUE, fillOpacity = 0.8) %>% | ||
addCircles(large_store$LONGITUDE, large_store$LATITUDE, weight = 4, radius=20, | ||
color="purple", stroke = TRUE, fillOpacity = 0.8) | ||
m | ||
|
||
#save map | ||
saveWidget(m, file="chicago_map.html") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
#!/usr/bin/env python | ||
# this script: | ||
# 1) downloads the green roof and parks database from the Chicago data portal | ||
# 2) generates a boxplot comparing the percent vegetation on academic buildings to that of non-academic buildings | ||
# 3) performs a rank correlation test to determine if the number of community gardens within a zipcode is correlated with the number of green rooftops | ||
# USAGE: python green_roof.py | ||
# NOTE: requires installation of rpy2, geopy, andd ggplot2(R) | ||
|
||
import urllib | ||
import re | ||
import csv | ||
from collections import namedtuple, defaultdict | ||
from rpy2 import robjects | ||
from geopy.geocoders import Nominatim | ||
geolocator = Nominatim() | ||
|
||
#retrieve data files on Chicago green rooftops and parks | ||
file_url = 'https://data.cityofchicago.org/api/views/q3z3-udcz/rows.csv?accessType=DOWNLOAD' | ||
urllib.urlretrieve(file_url,"chicago_green_roof.txt") | ||
file_url='https://data.cityofchicago.org/api/views/wwy2-k7b3/rows.csv?accessType=DOWNLOAD' | ||
urllib.urlretrieve(file_url,"chicago_parks.txt") | ||
|
||
|
||
#FOR PLOT 1, boxplot: | ||
academic_buildings={} #key:building ID, value:percent of rooftop that is vegetated | ||
non_academic_buildings={} #key:building ID, value:percent of rooftop that is vegetated | ||
academic_patterns=('school|university|academy|college|u of|univ\.') # patterns to search for in building names | ||
|
||
with open("chicago_green_roof.txt", 'r') as IN: | ||
GREEN=csv.reader(IN) | ||
headers=next(IN) | ||
Row=namedtuple('Row',headers) | ||
for r in GREEN: | ||
row=Row(*r) | ||
#calculate percent of the roof that is vegetated | ||
per_veg=round((float(row.VEGETATED_SQFT)/float(row.TOTAL_ROOF_SQFT))*100,2) | ||
#check the two building name columns to determine if the buidlding can be classfied as "academic" | ||
if re.search(academic_patterns,row.BUILDING_NAME1,flags=re.IGNORECASE) or re.search(academic_patterns,row.BUILDING_NAME2,flags=re.IGNORECASE): | ||
academic_buildings[row.ID]=per_veg | ||
else: | ||
non_academic_buildings[row.ID]=per_veg | ||
|
||
#write data to output file: | ||
OUT=open("percent_vegetated.txt", 'w') | ||
for key,value in academic_buildings.items(): | ||
OUT.write("{key}\t{value}\tacademic_building\n".format(**locals())) | ||
for key,value in non_academic_buildings.items(): | ||
OUT.write("{key}\t{value}\tnon_academic_building\n".format(**locals())) | ||
OUT.close() | ||
|
||
#generate box plot with R: | ||
robjects.r(""" | ||
library(ggplot2) | ||
d1<-read.table("percent_vegetated.txt") | ||
colnames(d1)<-c("ID","percent_vegetated", "building_type") #add column names | ||
m <- ggplot(d1, aes(x=building_type, y=percent_vegetated)) + | ||
geom_boxplot()+ | ||
theme_bw()+ | ||
scale_x_discrete(breaks=c("academic_building","non_academic_building"), #rename tick mark labels | ||
labels=c("Academic Buildings", "Non-Academic Buildings"))+ | ||
labs(x = "Building Type", y = "Percent of Rooftop Vegetated") | ||
ggsave(filename="Percent_Vegetated_by_Building_Type.png", | ||
dpi=300, | ||
width=4, | ||
height=4, | ||
units="in") | ||
""" | ||
) | ||
|
||
#FOR PLOT 2,rank correlation test: | ||
gardens=defaultdict(int) #key:zip code, value:number of community gardens | ||
with open("chicago_parks.txt", 'r') as IN: | ||
PARKS=csv.reader(IN) | ||
#scrub headers,some contain nonvalid identifying characters | ||
headers = [ re.sub('[^a-zA-Z_]','_',h) for h in next(PARKS)] | ||
Row=namedtuple('Row',headers) | ||
for r in PARKS: | ||
row=Row(*r) | ||
if int(row.COMMUNITY_GARDEN) !=0: # only count parks with community gardens | ||
gardens[row.ZIP] += int(row.COMMUNITY_GARDEN) | ||
|
||
green_roofs=defaultdict(int) #key:zip code, value:number of buildings with green rooftops | ||
with open("chicago_green_roof.txt", 'r') as IN: | ||
GREEN=csv.reader(IN) | ||
headers=next(IN) | ||
Row=namedtuple('Row',headers) | ||
for r in GREEN: | ||
row=Row(*r) | ||
coordinate=str(row.LATITUDE+","+row.LONGITUDE) | ||
location = geolocator.reverse(coordinate,timeout=5) #reverse geocode | ||
location_info=location.raw | ||
address=location_info[u'address'] | ||
zip_codes= address[u'postcode'] | ||
match=re.search('(\d+)',zip_codes) | ||
zip_code=match.group(1) #take first zip code | ||
green_roofs[zip_code]+=1 | ||
|
||
#write data to output file: | ||
OUT=open("vegetation_by_building_type.txt","w") | ||
all_zip_codes=set(gardens.keys()+green_roofs.keys()) | ||
for zip_code in list(all_zip_codes): | ||
if zip_code in green_roofs.keys(): | ||
green_roof_no=green_roofs[zip_code] | ||
else: | ||
green_roof_no=0 | ||
if zip_code in gardens.keys(): | ||
garden_no=gardens[zip_code] | ||
else: | ||
garden_no=0 | ||
OUT.write("{zip_code}\t{garden_no}\t{green_roof_no}\n".format(**locals())) | ||
OUT.close() | ||
|
||
#generate plot2 scatterplot with R: | ||
robjects.r(""" | ||
library(ggplot2) | ||
d2<-read.table("vegetation_by_building_type.txt") | ||
colnames(d2)<-c("zip_code","gardens", "green_roofs") #add column names | ||
#spearman correlation | ||
correlation<-cor.test(d2$gardens, d2$green_roofs,method="spearman",exact=FALSE) | ||
rho=round(correlation$estimate,3) | ||
r_label <- paste("italic(r)[s] == ", rho) | ||
m <- ggplot(d2, aes(x=gardens, y=green_roofs))+ | ||
geom_point(size=1.25)+ | ||
geom_smooth(method="lm",se=FALSE,col="red")+ | ||
annotate("text", x=.5, y=28,label=r_label,parse=TRUE,size=2.5)+ | ||
theme_bw()+ | ||
labs(x = "Number of Community Gardens Per Zip Code", y = "Number of Green Roofs Per Zip Code") | ||
ggsave(filename="Green_Rooftop_vs_Gardens.png", | ||
dpi=300, | ||
width=4, | ||
height=4, | ||
units="in") | ||
""" | ||
) | ||
|
||
|