From 770682f6f165bc5d9eadaf34d6c6884c7ec7f341 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Al=C3=AA=20Haak?= <alehaak@gmail.com>
Date: Fri, 10 Jun 2016 15:56:30 -0700
Subject: [PATCH] Fixed answer 1

---
 .../Week 4/Quiz week 4.R                      | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 Getting and Cleaning Data/Week 4/Quiz week 4.R

diff --git a/Getting and Cleaning Data/Week 4/Quiz week 4.R b/Getting and Cleaning Data/Week 4/Quiz week 4.R
new file mode 100644
index 0000000..154e370
--- /dev/null
+++ b/Getting and Cleaning Data/Week 4/Quiz week 4.R	
@@ -0,0 +1,82 @@
+rm(list=ls())
+#Question 1
+    #Apply strsplit() to split all the names of the data frame on the characters "wgtp". 
+    #What is the value of the 123 element of the resulting list?
+    #The code book, describing the variable names is here:
+        #https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FPUMSDataDict06.pdf
+    #Download and read data
+        fileUrl = 'https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv'
+        download.file(fileUrl, './data/W4Q1.csv')
+        data1 <- read.csv("./data/W4Q1.csv")
+    #Assign the data
+        wtgp <- names(data1)
+        strsplit(wtgp, "wtgp")[123]
+    #Answer: 
+
+#Question 2 & 3
+    #Remove the commas from the GDP numbers in millions of dollars and average them.
+    #Original data sources: http://data.worldbank.org/data-catalog/GDP-ranking-table
+    #In the data set from Question 2 what is a regular expression that would allow 
+    #you to count the number of countries whose name begins with "United"? Assume 
+    #that the variable with the country names in it is named countryNames. How many 
+    #countries begin with United?
+    #Download data
+        fileUrl2 <- 'https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FGDP.csv'
+        download.file(fileUrl2, './data/W4Q2.csv')
+    #Read the data
+        GDP <- read.csv('./data/W4Q2.csv', skip=4, nrows=190)
+    #Substitute comma's out
+        GDPdol <- gsub(",", "", GDP$X.4)
+    #Convert to integer and calculate mean
+        GDPdol <- as.integer(GDPdol)
+        mean(GDPdol, na.rm=TRUE)
+    
+    #Attach the GDP data frame
+        attach(GDP)
+        grep("^United",GDP$X.3)
+
+        #Answer Question2 = 377652.4
+        #Answer Question3 = grep("^United",GDP$V4), 3
+
+#Question 4
+    #Match the data based on the country shortcode. Of the countries for which the end of the 
+    #fiscal year is available, how many end in June?
+    #Original data sources:
+        #http://data.worldbank.org/data-catalog/GDP-ranking-table
+        #http://data.worldbank.org/data-catalog/ed-stats
+        
+    # download data and load it
+        fileUrl <- 'https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FGDP.csv'
+        download.file(fileUrl, './data/W4Q4.csv')
+        GDP <- read.csv('./data/W4Q4.csv', skip=4, nrows=190)
+        fileUrl <- 'https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FEDSTATS_Country.csv'
+        fileDest <- 'edu.csv'
+        download.file(fileUrl, fileDest)
+        edu <- read.csv(fileDest)
+    # merge the datasets
+        merged <- merge(GDP, edu, by.x = 'X', by.y = 'CountryCode')
+    # extract the information
+        fy.june <- grep('Fiscal year end: June', merged$Special.Notes)
+        length(fy.june)
+
+# Question 5
+    #You can use the quantmod (http://www.quantmod.com/) package to get historical stock prices 
+    #for publicly traded companies on the NASDAQ and NYSE. Use the following code to download 
+    #data on Amazon's stock price and get the times the data was sampled.
+    #Install the quantmod package
+        install.packages('quantmod')
+        library(quantmod)
+    #Load Amazon stock data
+        amzn = getSymbols("AMZN",auto.assign=FALSE)
+    #Extract the index
+        sampleTimes = index(amzn) 
+    #Create logical for year 2012
+        year2012 <- grepl('2012-*', sampleTimes)
+    #Count 2012 observations (i.e. true)
+        table(year2012)
+    #Subset based on 2012
+        sampleTimes2012 <- subset(sampleTimes, year2012)
+    #Convert to day of week
+        day <- format(sampleTimes2012, '%A')
+    #Count each day
+        table(day)
\ No newline at end of file