add project scripts

klaricch · Nov 11, 2015 · aa8d9c2 · aa8d9c2
1 parent 7c9370c
commit aa8d9c2
Show file tree

Hide file tree

Showing 2 changed files with 197 additions and 0 deletions.
diff --git a/generate_map.R b/generate_map.R
@@ -0,0 +1,59 @@
+library(rgdal)
+library(rgeos)
+library(leaflet)
+library(dplyr)
+library(htmlwidgets)
+setwd("~/Desktop/Data_Incubator")
+#download grocery stores file
+#url<-"http://data.cityofchicago.org/api/views/53t8-wyrc/rows.csv?accessType=DOWNLOAD"
+#download_dir<-getwd()
+#dest_name<-"grocery_stores.csv"
+#download.file(url, dest_name)
+
+gf<-read.csv("chicago_green_roof.txt")
+gs<-read.csv("grocery_stores.csv")
+
+#filter for grocery stores > 10,000 sq feet (used in definition of food desert)
+large_store<-filter(gs,SQUARE.FEET>10000)
+
+#download zipcodes:
+url <- "http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_zcta510_500k.zip"
+download_dir<-getwd()
+dest_name<-"all_zip.zip"
+download.file(url, dest_name)
+unzip(dest_name, exdir=download_dir, junkpaths=TRUE)
+filename<-list.files(download_dir, pattern=".shp", full.names=FALSE)
+filename<-gsub(".shp", "", filename)
+zip_codes<-readOGR(download_dir, "cb_2014_us_zcta510_500k")
+
+chi_zips<-c(60647,60639,60707,60622,60651,60611,60638,60652,60626,60621,60645,60631,60646,60628,60660,60640,60625,60641,60657,60615,60636,60649,60617,60643,60633,60643,60612,60604,60656,60624,60655,60644,60603,60605,60653,60609,60666,60618,60616,60602,60601,60608,60607,60661,60606,60614,60827,60630,60642,60659,60707,60634,60613,60610,60654,60632,60623,60629,60620,60637,60619)
+chicago <- zip_codes[zip_codes$ZCTA5CE10  %in% chi_zips,]
+interest<-subset(chicago, grepl('^(60636)|(60628)|(60644).*', chicago$ZCTA5CE10,perl=T)) 
+
+#epsg:4326 transform
+chicago<-spTransform(chicago, CRS("+init=epsg:4326"))
+interest<-spTransform(interest, CRS("+init=epsg:4326"))
+chicago_data<-chicago@data[,c("GEOID10", "ALAND10")]
+interest_data<-interest@data[,c("GEOID10", "ALAND10")]
+#gSimplify
+chicago<-gSimplify(chicago,tol=0.01, topologyPreserve=TRUE)
+interest<-gSimplify(interest,tol=0.01, topologyPreserve=TRUE)
+#create SpatialPolygonsDataFrame
+chicago<-SpatialPolygonsDataFrame(chicago, data=chicago_data)
+interest<-SpatialPolygonsDataFrame(interest, data=interest_data)
+
+#leaflet map
+m <- leaflet(gf) %>%
+  addTiles() %>% 
+  setView(lng =-87.7738, lat =41.78798, zoom = 10) %>% 
+
+  addPolygons(data=chicago,weight=2,fillOpacity = 0.15,color="blue")%>%
+  addPolygons(data=interest,weight=2,fillOpacity = 0.6,color="turquoise")%>%
+addCircles(~LONGITUDE, ~LATITUDE, weight =4, radius=20, 
+           color="darkgreen", stroke = TRUE, fillOpacity = 0.8) %>% 
+  addCircles(large_store$LONGITUDE, large_store$LATITUDE, weight = 4, radius=20, 
+             color="purple", stroke = TRUE, fillOpacity = 0.8)
+m
+
+#save map
+saveWidget(m, file="chicago_map.html")
diff --git a/green_roof.py b/green_roof.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# this script:
+# 1) downloads the green roof and parks database from the Chicago data portal
+# 2) generates a boxplot comparing the percent vegetation on academic buildings to that of non-academic buildings
+# 3) performs a rank correlation test to determine if the number of community gardens within a zipcode is correlated with the number of green rooftops
+# USAGE: python green_roof.py
+# NOTE: requires installation of rpy2, geopy, andd ggplot2(R)
+
+import urllib
+import re
+import csv
+from collections import namedtuple, defaultdict
+from rpy2 import robjects
+from geopy.geocoders import Nominatim
+geolocator = Nominatim()
+
+#retrieve data files on Chicago green rooftops and parks
+file_url = 'https://data.cityofchicago.org/api/views/q3z3-udcz/rows.csv?accessType=DOWNLOAD'
+urllib.urlretrieve(file_url,"chicago_green_roof.txt")
+file_url='https://data.cityofchicago.org/api/views/wwy2-k7b3/rows.csv?accessType=DOWNLOAD'
+urllib.urlretrieve(file_url,"chicago_parks.txt")
+
+
+#FOR PLOT 1, boxplot:
+academic_buildings={} #key:building ID, value:percent of rooftop that is vegetated
+non_academic_buildings={} #key:building ID, value:percent of rooftop that is vegetated
+academic_patterns=('school|university|academy|college|u of|univ\.') # patterns to search for in building names
+
+with open("chicago_green_roof.txt", 'r') as IN:
+	GREEN=csv.reader(IN)
+	headers=next(IN)
+	Row=namedtuple('Row',headers)
+	for r in GREEN:
+		row=Row(*r)
+		#calculate percent of the roof that is vegetated
+		per_veg=round((float(row.VEGETATED_SQFT)/float(row.TOTAL_ROOF_SQFT))*100,2)
+		#check the two building name columns to determine if the buidlding can be classfied as "academic"
+		if re.search(academic_patterns,row.BUILDING_NAME1,flags=re.IGNORECASE) or re.search(academic_patterns,row.BUILDING_NAME2,flags=re.IGNORECASE):
+			academic_buildings[row.ID]=per_veg
+		else:
+			non_academic_buildings[row.ID]=per_veg
+
+#write data to output file:
+OUT=open("percent_vegetated.txt", 'w')
+for key,value in academic_buildings.items():
+	OUT.write("{key}\t{value}\tacademic_building\n".format(**locals()))
+for key,value in non_academic_buildings.items():
+	OUT.write("{key}\t{value}\tnon_academic_building\n".format(**locals()))		
+OUT.close()
+
+#generate box plot with R:
+robjects.r("""
+	library(ggplot2)
+	d1<-read.table("percent_vegetated.txt")
+	colnames(d1)<-c("ID","percent_vegetated", "building_type") #add column names
+	m <- ggplot(d1, aes(x=building_type, y=percent_vegetated)) + 
+		geom_boxplot()+
+		theme_bw()+
+		scale_x_discrete(breaks=c("academic_building","non_academic_building"), #rename tick mark labels
+                   labels=c("Academic Buildings", "Non-Academic Buildings"))+
+		labs(x = "Building Type", y = "Percent of Rooftop Vegetated")
+	ggsave(filename="Percent_Vegetated_by_Building_Type.png",
+       dpi=300,
+       width=4,
+       height=4,
+       units="in")
+"""
+	)
+
+#FOR PLOT 2,rank correlation test:
+gardens=defaultdict(int) #key:zip code, value:number of community gardens
+with open("chicago_parks.txt", 'r') as IN:
+	PARKS=csv.reader(IN)
+	#scrub headers,some contain nonvalid identifying characters
+	headers = [ re.sub('[^a-zA-Z_]','_',h) for h in next(PARKS)]
+	Row=namedtuple('Row',headers)
+	for r in PARKS:
+		row=Row(*r)
+		if int(row.COMMUNITY_GARDEN) !=0: # only count parks with community gardens
+			gardens[row.ZIP] += int(row.COMMUNITY_GARDEN)
+
+green_roofs=defaultdict(int) #key:zip code, value:number of buildings with green rooftops
+with open("chicago_green_roof.txt", 'r') as IN:
+	GREEN=csv.reader(IN)
+	headers=next(IN)
+	Row=namedtuple('Row',headers)
+	for r in GREEN:
+		row=Row(*r)
+		coordinate=str(row.LATITUDE+","+row.LONGITUDE)
+		location = geolocator.reverse(coordinate,timeout=5) #reverse geocode
+		location_info=location.raw
+		address=location_info[u'address']
+		zip_codes= address[u'postcode']
+		match=re.search('(\d+)',zip_codes)
+		zip_code=match.group(1) #take first zip code
+		green_roofs[zip_code]+=1
+
+#write data to output file:
+OUT=open("vegetation_by_building_type.txt","w")
+all_zip_codes=set(gardens.keys()+green_roofs.keys())
+for zip_code in list(all_zip_codes):
+	if zip_code in green_roofs.keys():
+		green_roof_no=green_roofs[zip_code]
+	else:
+		green_roof_no=0
+	if zip_code in gardens.keys():
+		garden_no=gardens[zip_code]
+	else:
+		garden_no=0
+	OUT.write("{zip_code}\t{garden_no}\t{green_roof_no}\n".format(**locals()))
+OUT.close()
+
+#generate plot2 scatterplot with R:
+robjects.r("""
+	library(ggplot2)
+	d2<-read.table("vegetation_by_building_type.txt")
+	colnames(d2)<-c("zip_code","gardens", "green_roofs") #add column names
+
+	#spearman correlation
+	correlation<-cor.test(d2$gardens, d2$green_roofs,method="spearman",exact=FALSE)
+	rho=round(correlation$estimate,3)
+
+	r_label <- paste("italic(r)[s] == ", rho)
+	m <- ggplot(d2, aes(x=gardens, y=green_roofs))+
+  		geom_point(size=1.25)+
+  		geom_smooth(method="lm",se=FALSE,col="red")+ 
+  		annotate("text", x=.5, y=28,label=r_label,parse=TRUE,size=2.5)+
+  		theme_bw()+
+  		labs(x = "Number of Community Gardens Per Zip Code", y = "Number of Green Roofs Per Zip Code")
+	ggsave(filename="Green_Rooftop_vs_Gardens.png",
+       dpi=300,
+       width=4,
+       height=4,
+       units="in")
+"""
+)
+
+