From aa8d9c2ea3fe85e019fbf6b31ebbf80dde00a4a3 Mon Sep 17 00:00:00 2001 From: klaricch Date: Wed, 11 Nov 2015 13:46:25 -0600 Subject: [PATCH] add project scripts --- generate_map.R | 59 +++++++++++++++++++++ green_roof.py | 138 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 generate_map.R create mode 100644 green_roof.py diff --git a/generate_map.R b/generate_map.R new file mode 100644 index 0000000..1fba53b --- /dev/null +++ b/generate_map.R @@ -0,0 +1,59 @@ +library(rgdal) +library(rgeos) +library(leaflet) +library(dplyr) +library(htmlwidgets) +setwd("~/Desktop/Data_Incubator") +#download grocery stores file +#url<-"http://data.cityofchicago.org/api/views/53t8-wyrc/rows.csv?accessType=DOWNLOAD" +#download_dir<-getwd() +#dest_name<-"grocery_stores.csv" +#download.file(url, dest_name) + +gf<-read.csv("chicago_green_roof.txt") +gs<-read.csv("grocery_stores.csv") + +#filter for grocery stores > 10,000 sq feet (used in definition of food desert) +large_store<-filter(gs,SQUARE.FEET>10000) + +#download zipcodes: +url <- "http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_zcta510_500k.zip" +download_dir<-getwd() +dest_name<-"all_zip.zip" +download.file(url, dest_name) +unzip(dest_name, exdir=download_dir, junkpaths=TRUE) +filename<-list.files(download_dir, pattern=".shp", full.names=FALSE) +filename<-gsub(".shp", "", filename) +zip_codes<-readOGR(download_dir, "cb_2014_us_zcta510_500k") + +chi_zips<-c(60647,60639,60707,60622,60651,60611,60638,60652,60626,60621,60645,60631,60646,60628,60660,60640,60625,60641,60657,60615,60636,60649,60617,60643,60633,60643,60612,60604,60656,60624,60655,60644,60603,60605,60653,60609,60666,60618,60616,60602,60601,60608,60607,60661,60606,60614,60827,60630,60642,60659,60707,60634,60613,60610,60654,60632,60623,60629,60620,60637,60619) +chicago <- zip_codes[zip_codes$ZCTA5CE10 %in% chi_zips,] +interest<-subset(chicago, grepl('^(60636)|(60628)|(60644).*', chicago$ZCTA5CE10,perl=T)) + +#epsg:4326 transform +chicago<-spTransform(chicago, CRS("+init=epsg:4326")) +interest<-spTransform(interest, CRS("+init=epsg:4326")) +chicago_data<-chicago@data[,c("GEOID10", "ALAND10")] +interest_data<-interest@data[,c("GEOID10", "ALAND10")] +#gSimplify +chicago<-gSimplify(chicago,tol=0.01, topologyPreserve=TRUE) +interest<-gSimplify(interest,tol=0.01, topologyPreserve=TRUE) +#create SpatialPolygonsDataFrame +chicago<-SpatialPolygonsDataFrame(chicago, data=chicago_data) +interest<-SpatialPolygonsDataFrame(interest, data=interest_data) + +#leaflet map +m <- leaflet(gf) %>% + addTiles() %>% + setView(lng =-87.7738, lat =41.78798, zoom = 10) %>% + + addPolygons(data=chicago,weight=2,fillOpacity = 0.15,color="blue")%>% + addPolygons(data=interest,weight=2,fillOpacity = 0.6,color="turquoise")%>% +addCircles(~LONGITUDE, ~LATITUDE, weight =4, radius=20, + color="darkgreen", stroke = TRUE, fillOpacity = 0.8) %>% + addCircles(large_store$LONGITUDE, large_store$LATITUDE, weight = 4, radius=20, + color="purple", stroke = TRUE, fillOpacity = 0.8) +m + +#save map +saveWidget(m, file="chicago_map.html") diff --git a/green_roof.py b/green_roof.py new file mode 100644 index 0000000..27046a4 --- /dev/null +++ b/green_roof.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# this script: +# 1) downloads the green roof and parks database from the Chicago data portal +# 2) generates a boxplot comparing the percent vegetation on academic buildings to that of non-academic buildings +# 3) performs a rank correlation test to determine if the number of community gardens within a zipcode is correlated with the number of green rooftops +# USAGE: python green_roof.py +# NOTE: requires installation of rpy2, geopy, andd ggplot2(R) + +import urllib +import re +import csv +from collections import namedtuple, defaultdict +from rpy2 import robjects +from geopy.geocoders import Nominatim +geolocator = Nominatim() + +#retrieve data files on Chicago green rooftops and parks +file_url = 'https://data.cityofchicago.org/api/views/q3z3-udcz/rows.csv?accessType=DOWNLOAD' +urllib.urlretrieve(file_url,"chicago_green_roof.txt") +file_url='https://data.cityofchicago.org/api/views/wwy2-k7b3/rows.csv?accessType=DOWNLOAD' +urllib.urlretrieve(file_url,"chicago_parks.txt") + + +#FOR PLOT 1, boxplot: +academic_buildings={} #key:building ID, value:percent of rooftop that is vegetated +non_academic_buildings={} #key:building ID, value:percent of rooftop that is vegetated +academic_patterns=('school|university|academy|college|u of|univ\.') # patterns to search for in building names + +with open("chicago_green_roof.txt", 'r') as IN: + GREEN=csv.reader(IN) + headers=next(IN) + Row=namedtuple('Row',headers) + for r in GREEN: + row=Row(*r) + #calculate percent of the roof that is vegetated + per_veg=round((float(row.VEGETATED_SQFT)/float(row.TOTAL_ROOF_SQFT))*100,2) + #check the two building name columns to determine if the buidlding can be classfied as "academic" + if re.search(academic_patterns,row.BUILDING_NAME1,flags=re.IGNORECASE) or re.search(academic_patterns,row.BUILDING_NAME2,flags=re.IGNORECASE): + academic_buildings[row.ID]=per_veg + else: + non_academic_buildings[row.ID]=per_veg + +#write data to output file: +OUT=open("percent_vegetated.txt", 'w') +for key,value in academic_buildings.items(): + OUT.write("{key}\t{value}\tacademic_building\n".format(**locals())) +for key,value in non_academic_buildings.items(): + OUT.write("{key}\t{value}\tnon_academic_building\n".format(**locals())) +OUT.close() + +#generate box plot with R: +robjects.r(""" + library(ggplot2) + d1<-read.table("percent_vegetated.txt") + colnames(d1)<-c("ID","percent_vegetated", "building_type") #add column names + m <- ggplot(d1, aes(x=building_type, y=percent_vegetated)) + + geom_boxplot()+ + theme_bw()+ + scale_x_discrete(breaks=c("academic_building","non_academic_building"), #rename tick mark labels + labels=c("Academic Buildings", "Non-Academic Buildings"))+ + labs(x = "Building Type", y = "Percent of Rooftop Vegetated") + ggsave(filename="Percent_Vegetated_by_Building_Type.png", + dpi=300, + width=4, + height=4, + units="in") +""" + ) + +#FOR PLOT 2,rank correlation test: +gardens=defaultdict(int) #key:zip code, value:number of community gardens +with open("chicago_parks.txt", 'r') as IN: + PARKS=csv.reader(IN) + #scrub headers,some contain nonvalid identifying characters + headers = [ re.sub('[^a-zA-Z_]','_',h) for h in next(PARKS)] + Row=namedtuple('Row',headers) + for r in PARKS: + row=Row(*r) + if int(row.COMMUNITY_GARDEN) !=0: # only count parks with community gardens + gardens[row.ZIP] += int(row.COMMUNITY_GARDEN) + +green_roofs=defaultdict(int) #key:zip code, value:number of buildings with green rooftops +with open("chicago_green_roof.txt", 'r') as IN: + GREEN=csv.reader(IN) + headers=next(IN) + Row=namedtuple('Row',headers) + for r in GREEN: + row=Row(*r) + coordinate=str(row.LATITUDE+","+row.LONGITUDE) + location = geolocator.reverse(coordinate,timeout=5) #reverse geocode + location_info=location.raw + address=location_info[u'address'] + zip_codes= address[u'postcode'] + match=re.search('(\d+)',zip_codes) + zip_code=match.group(1) #take first zip code + green_roofs[zip_code]+=1 + +#write data to output file: +OUT=open("vegetation_by_building_type.txt","w") +all_zip_codes=set(gardens.keys()+green_roofs.keys()) +for zip_code in list(all_zip_codes): + if zip_code in green_roofs.keys(): + green_roof_no=green_roofs[zip_code] + else: + green_roof_no=0 + if zip_code in gardens.keys(): + garden_no=gardens[zip_code] + else: + garden_no=0 + OUT.write("{zip_code}\t{garden_no}\t{green_roof_no}\n".format(**locals())) +OUT.close() + +#generate plot2 scatterplot with R: +robjects.r(""" + library(ggplot2) + d2<-read.table("vegetation_by_building_type.txt") + colnames(d2)<-c("zip_code","gardens", "green_roofs") #add column names + + #spearman correlation + correlation<-cor.test(d2$gardens, d2$green_roofs,method="spearman",exact=FALSE) + rho=round(correlation$estimate,3) + + r_label <- paste("italic(r)[s] == ", rho) + m <- ggplot(d2, aes(x=gardens, y=green_roofs))+ + geom_point(size=1.25)+ + geom_smooth(method="lm",se=FALSE,col="red")+ + annotate("text", x=.5, y=28,label=r_label,parse=TRUE,size=2.5)+ + theme_bw()+ + labs(x = "Number of Community Gardens Per Zip Code", y = "Number of Green Roofs Per Zip Code") + ggsave(filename="Green_Rooftop_vs_Gardens.png", + dpi=300, + width=4, + height=4, + units="in") +""" +) + +