diff --git a/.DS_Store b/.DS_Store index aa5554ce..4625e334 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/5_Reproducible_Research/.DS_Store b/5_Reproducible_Research/.DS_Store index 324272e7..b6860cde 100644 Binary files a/5_Reproducible_Research/.DS_Store and b/5_Reproducible_Research/.DS_Store differ diff --git a/5_Reproducible_Research/project2/ReproducibleResearchProject2.html b/5_Reproducible_Research/project2/ReproducibleResearchProject2.html index e9505d08..73e81459 100644 --- a/5_Reproducible_Research/project2/ReproducibleResearchProject2.html +++ b/5_Reproducible_Research/project2/ReproducibleResearchProject2.html @@ -1,303 +1,363 @@ - - - - - - - - - - - - - - -Reproducible Research Project 2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - -

github repo for rest of specialization: Data Science Coursera

-
-

US Storm Event Human Health and Economic Impact Analysis

-
-

1: Synopsis

-

The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.

-

The analysis aims to investigate which different types of severe weather events are most harmful on the populations health in respect of general injuries and fatalities. Further the economic consequences will be analyzed by exploring the financial damage done to both general property and agriculture (i.e. crops)

-

information on the data: Documentation

-
-
-

2: Data Processing

-
-

2.1: Data Loading

-

Download the raw data file and extract the data into a dataframe.

-
library("data.table")
-library(ggplot2)
-
-fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
-download.file(fileUrl, destfile = paste0("C:/Users/mgalarnyk/Desktop", '/repdata%2Fdata%2FStormData.csv.bz2'))
-stormDF <- read.csv("C:/Users/mgalarnyk/Desktop/repdata%2Fdata%2FStormData.csv.bz2")
-
-stormDT <- as.data.table(stormDF)
-
-
-

2.2: Examining Column Names

-
colnames(stormDT)
-
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
-##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
-## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
-## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
-## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
-## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
-## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
-## [36] "REMARKS"    "REFNUM"
-
-
-

2.3: Data Subsetting

-

Subset the dataset on the parameters of interest. Basically, we remove the columns we don’t need

-
# Finding columns to remove
-cols2Remove <- colnames(stormDT[, !c("EVTYPE"
-  , "FATALITIES"
-  , "INJURIES"
-  , "PROPDMG"
-  , "PROPDMGEXP"
-  , "CROPDMG"
-  , "CROPDMGEXP")])
-
-# Removing columns
-stormDT[, c(cols2Remove) := NULL]
-
-# Only get data where fatalities or injuries occurred.  
-stormDT <- stormDT[(EVTYPE != "?" & 
-             (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
-                                                                            , "FATALITIES"
-                                                                            , "INJURIES"
-                                                                            , "PROPDMG"
-                                                                            , "PROPDMGEXP"
-                                                                            , "CROPDMG"
-                                                                            , "CROPDMGEXP") ]
-
-
-

2.4: Converting Exponent Columns into Actual Exponents instead of (-,+, H, K, etc)

-
# Change all damage exponents to uppercase.
-cols <- c("PROPDMGEXP", "CROPDMGEXP")
-stormDT[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
-
-# Map property damage alphanumeric exponents to numeric values.
-propDmgKey <-  c("\"\"" = 10^0,
-                 "-" = 10^0, 
-                 "+" = 10^0,
-                 "0" = 10^0,
-                 "1" = 10^1,
-                 "2" = 10^2,
-                 "3" = 10^3,
-                 "4" = 10^4,
-                 "5" = 10^5,
-                 "6" = 10^6,
-                 "7" = 10^7,
-                 "8" = 10^8,
-                 "9" = 10^9,
-                 "H" = 10^2,
-                 "K" = 10^3,
-                 "M" = 10^6,
-                 "B" = 10^9)
-
-# Map crop damage alphanumeric exponents to numeric values
-cropDmgKey <-  c("\"\"" = 10^0,
-                "?" = 10^0, 
-                "0" = 10^0,
-                "K" = 10^3,
-                "M" = 10^6,
-                "B" = 10^9)
-
-stormDT[,PROPDMGEXP := propDmgKey[as.character(stormDT[,PROPDMGEXP])]]
-stormDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
-
-stormDT[, CROPDMGEXP := cropDmgKey[as.character(stormDT[,CROPDMGEXP])] ]
-stormDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
-
-
-

2.5: Making Economic Cost Columns

-
stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
-
-
-

2.6: Events that are Most Harmful Health Processing

-
# Total injuries
-totalInjuriesDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
-
-totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ]
-
-totalInjuriesDT <- totalInjuriesDT[1:10, ]
-
-head(totalInjuriesDT, 5)
-
##            EVTYPE FATALITIES INJURIES totals
-## 1:        TORNADO       5633    91346  96979
-## 2: EXCESSIVE HEAT       1903     6525   8428
-## 3:    FLASH FLOOD        978     1777   2755
-## 4:           HEAT        937     2100   3037
-## 5:      LIGHTNING        816     5230   6046
-
-
-
-

3: Results

-

melting data.table so that each earlier column name (fatalities, totals) and their associated value goes into the

-
bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing")
-
# Create chart
-healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value))
-
-# Plot data as bar chart
-healthChart = healthChart + geom_bar(stat="identity", aes(fill=bad_thing), position="dodge")
-
-# Format y-axis scale and set y-axis label
-healthChart = healthChart + scale_y_sqrt("Frequency Count") 
-
-# Set x-axis label
-healthChart = healthChart + xlab("Event Type") 
-
-# Rotate x-axis tick labels 
-healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1))
-
-# Set chart title
-healthChart = healthChart + ggtitle("Top 10 US Killers")
-
-healthChart
-

-
-
- - - - -
- - - - - - - - + + + + + + + + + + + + + + +Analysis of the Adverse Health and Economic Impacts of US Storms + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +

Github repo for the Course: Reproducible Research
Github repo for Rest of Specialization: Data Science Coursera

+
+

1: Synopsis

+

The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.

+

The following analysis investigates which types of severe weather events are most harmful on:

+
    +
  1. Health (injuries and fatalities)
  2. +
  3. Property and crops (economic consequences)
  4. +
+

Information on the Data: Documentation

+
+
+

2: Data Processing

+
+

2.1: Data Loading

+

Download the raw data file and extract the data into a dataframe.Then convert to a data.table

+
library("data.table")
+library("ggplot2")
+
+fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
+download.file(fileUrl, destfile = paste0("/Users/mgalarny/Desktop", '/repdata%2Fdata%2FStormData.csv.bz2'))
+stormDF <- read.csv("/Users/mgalarny/Desktop/repdata%2Fdata%2FStormData.csv.bz2")
+
+# Converting data.frame to data.table
+stormDT <- as.data.table(stormDF)
+
+
+

2.2: Examining Column Names

+
colnames(stormDT)
+
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
+##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
+## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
+## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
+## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
+## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
+## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
+## [36] "REMARKS"    "REFNUM"
+
+
+

2.3: Data Subsetting

+

Subset the dataset on the parameters of interest. Basically, we remove the columns we don’t need for clarity.

+
# Finding columns to remove
+cols2Remove <- colnames(stormDT[, !c("EVTYPE"
+  , "FATALITIES"
+  , "INJURIES"
+  , "PROPDMG"
+  , "PROPDMGEXP"
+  , "CROPDMG"
+  , "CROPDMGEXP")])
+
+# Removing columns
+stormDT[, c(cols2Remove) := NULL]
+
+# Only use data where fatalities or injuries occurred.  
+stormDT <- stormDT[(EVTYPE != "?" & 
+             (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
+                                                                            , "FATALITIES"
+                                                                            , "INJURIES"
+                                                                            , "PROPDMG"
+                                                                            , "PROPDMGEXP"
+                                                                            , "CROPDMG"
+                                                                            , "CROPDMGEXP") ]
+
+
+

2.4: Converting Exponent Columns into Actual Exponents instead of (-,+, H, K, etc)

+

Making the PROPDMGEXP and CROPDMGEXP columns cleaner so they can be used to calculate property and crop cost.

+
# Change all damage exponents to uppercase.
+cols <- c("PROPDMGEXP", "CROPDMGEXP")
+stormDT[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
+
+# Map property damage alphanumeric exponents to numeric values.
+propDmgKey <-  c("\"\"" = 10^0,
+                 "-" = 10^0, 
+                 "+" = 10^0,
+                 "0" = 10^0,
+                 "1" = 10^1,
+                 "2" = 10^2,
+                 "3" = 10^3,
+                 "4" = 10^4,
+                 "5" = 10^5,
+                 "6" = 10^6,
+                 "7" = 10^7,
+                 "8" = 10^8,
+                 "9" = 10^9,
+                 "H" = 10^2,
+                 "K" = 10^3,
+                 "M" = 10^6,
+                 "B" = 10^9)
+
+# Map crop damage alphanumeric exponents to numeric values
+cropDmgKey <-  c("\"\"" = 10^0,
+                "?" = 10^0, 
+                "0" = 10^0,
+                "K" = 10^3,
+                "M" = 10^6,
+                "B" = 10^9)
+
+stormDT[, PROPDMGEXP := propDmgKey[as.character(stormDT[,PROPDMGEXP])]]
+stormDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
+
+stormDT[, CROPDMGEXP := cropDmgKey[as.character(stormDT[,CROPDMGEXP])] ]
+stormDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
+
+
+

2.5: Making Economic Cost Columns

+
stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
+
+
+

2.6: Calcuating Total Property and Crop Cost

+
totalCostDT <- stormDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
+
+totalCostDT <- totalCostDT[order(-Total_Cost), ]
+
+totalCostDT <- totalCostDT[1:10, ]
+
+head(totalCostDT, 5)
+
##               EVTYPE     propCost   cropCost   Total_Cost
+## 1:             FLOOD 144657709807 5661968450 150319678257
+## 2: HURRICANE/TYPHOON  69305840000 2607872800  71913712800
+## 3:           TORNADO  56947380676  414953270  57362333946
+## 4:       STORM SURGE  43323536000       5000  43323541000
+## 5:              HAIL  15735267513 3025954473  18761221986
+
+
+

2.7: Calcuating Total Fatalities and Injuries

+
totalInjuriesDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
+
+totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ]
+
+totalInjuriesDT <- totalInjuriesDT[1:10, ]
+
+head(totalInjuriesDT, 5)
+
##            EVTYPE FATALITIES INJURIES totals
+## 1:        TORNADO       5633    91346  96979
+## 2: EXCESSIVE HEAT       1903     6525   8428
+## 3:    FLASH FLOOD        978     1777   2755
+## 4:           HEAT        937     2100   3037
+## 5:      LIGHTNING        816     5230   6046
+
+
+
+

3: Results

+
+

3.1: Events that are Most Harmful to Population Health

+

Melting data.table so that it is easier to put in bar graph format

+
bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing")
+head(bad_stuff, 5)
+
##            EVTYPE  bad_thing value
+## 1:        TORNADO FATALITIES  5633
+## 2: EXCESSIVE HEAT FATALITIES  1903
+## 3:    FLASH FLOOD FATALITIES   978
+## 4:           HEAT FATALITIES   937
+## 5:      LIGHTNING FATALITIES   816
+
# Create chart
+healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value))
+
+# Plot data as bar chart
+healthChart = healthChart + geom_bar(stat="identity", aes(fill=bad_thing), position="dodge")
+
+# Format y-axis scale and set y-axis label
+healthChart = healthChart + ylab("Frequency Count") 
+
+# Set x-axis label
+healthChart = healthChart + xlab("Event Type") 
+
+# Rotate x-axis tick labels 
+healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1))
+
+# Set chart title and center it
+healthChart = healthChart + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
+
+healthChart
+

+
+
+

3.2: Events that have the Greatest Economic Consequences

+

Melting data.table so that it is easier to put in bar graph format

+
econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type")
+head(econ_consequences, 5)
+
##               EVTYPE Damage_Type        value
+## 1:             FLOOD    propCost 144657709807
+## 2: HURRICANE/TYPHOON    propCost  69305840000
+## 3:           TORNADO    propCost  56947380676
+## 4:       STORM SURGE    propCost  43323536000
+## 5:              HAIL    propCost  15735267513
+
# Create chart
+econChart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value))
+
+# Plot data as bar chart
+econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge")
+
+# Format y-axis scale and set y-axis label
+econChart = econChart + ylab("Cost (dollars)") 
+
+# Set x-axis label
+econChart = econChart + xlab("Event Type") 
+
+# Rotate x-axis tick labels 
+econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1))
+
+# Set chart title and center it
+econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
+
+econChart
+

+
+
+ + + + +
+ + + + + + + + diff --git a/5_Reproducible_Research/project2/ReproducibleResearchProject2.rmd b/5_Reproducible_Research/project2/ReproducibleResearchProject2.rmd index b98d4382..18352c91 100644 --- a/5_Reproducible_Research/project2/ReproducibleResearchProject2.rmd +++ b/5_Reproducible_Research/project2/ReproducibleResearchProject2.rmd @@ -1,206 +1,206 @@ ---- -title: "Analysis of the Adverse Health and Economic Impacts of US Storms" -author: "Michael Galarnyk" -date: "5/10/2017" -output: html_document ---- - -Github repo for the Course: [Reproducible Research](https://github.com/mGalarnyk/datasciencecoursera/tree/master/5_Reproducible_Research) -
-Github repo for Rest of Specialization: [Data Science Coursera](https://github.com/mGalarnyk/datasciencecoursera) - -## 1: Synopsis -The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011. - -The following analysis investigates which types of severe weather events are most harmful on: - -1. Health (injuries and fatalities) -2. Property and crops (economic consequences) - -Information on the Data: [Documentation](https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf) - -## 2: Data Processing - -### 2.1: Data Loading - -Download the raw data file and extract the data into a dataframe.Then convert to a data.table -```{r DataLoading} -library("data.table") -library("ggplot2") - -fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2" -download.file(fileUrl, destfile = paste0("C:/Users/mgalarnyk/Desktop", '/repdata%2Fdata%2FStormData.csv.bz2')) -stormDF <- read.csv("C:/Users/mgalarnyk/Desktop/repdata%2Fdata%2FStormData.csv.bz2") - -# Converting data.frame to data.table -stormDT <- as.data.table(stormDF) -``` - -### 2.2: Examining Column Names - -```{r ColumnNames} -colnames(stormDT) -``` - -### 2.3: Data Subsetting - -Subset the dataset on the parameters of interest. Basically, we remove the columns we don't need for clarity. -```{r DataSubsetting, results="hide"} - -# Finding columns to remove -cols2Remove <- colnames(stormDT[, !c("EVTYPE" - , "FATALITIES" - , "INJURIES" - , "PROPDMG" - , "PROPDMGEXP" - , "CROPDMG" - , "CROPDMGEXP")]) - -# Removing columns -stormDT[, c(cols2Remove) := NULL] - -# Only use data where fatalities or injuries occurred. -stormDT <- stormDT[(EVTYPE != "?" & - (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE" - , "FATALITIES" - , "INJURIES" - , "PROPDMG" - , "PROPDMGEXP" - , "CROPDMG" - , "CROPDMGEXP") ] -``` - -### 2.4: Converting Exponent Columns into Actual Exponents instead of (-,+, H, K, etc) - -Making the PROPDMGEXP and CROPDMGEXP columns cleaner so they can be used to calculate property and crop cost. -```{r CorrectingExponents, results="hide"} - -# Change all damage exponents to uppercase. -cols <- c("PROPDMGEXP", "CROPDMGEXP") -stormDT[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols] - -# Map property damage alphanumeric exponents to numeric values. -propDmgKey <- c("\"\"" = 10^0, - "-" = 10^0, - "+" = 10^0, - "0" = 10^0, - "1" = 10^1, - "2" = 10^2, - "3" = 10^3, - "4" = 10^4, - "5" = 10^5, - "6" = 10^6, - "7" = 10^7, - "8" = 10^8, - "9" = 10^9, - "H" = 10^2, - "K" = 10^3, - "M" = 10^6, - "B" = 10^9) - -# Map crop damage alphanumeric exponents to numeric values -cropDmgKey <- c("\"\"" = 10^0, - "?" = 10^0, - "0" = 10^0, - "K" = 10^3, - "M" = 10^6, - "B" = 10^9) - -stormDT[, PROPDMGEXP := propDmgKey[as.character(stormDT[,PROPDMGEXP])]] -stormDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ] - -stormDT[, CROPDMGEXP := cropDmgKey[as.character(stormDT[,CROPDMGEXP])] ] -stormDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ] -``` - -### 2.5: Making Economic Cost Columns - -```{r EconomicCostColumns} -stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)] -``` - -### 2.6: Calcuating Total Property and Crop Cost - -```{r TotalPropertyCropCost} -totalCostDT <- stormDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)] - -totalCostDT <- totalCostDT[order(-Total_Cost), ] - -totalCostDT <- totalCostDT[1:10, ] - -head(totalCostDT, 5) -``` - -### 2.7: Calcuating Total Fatalities and Injuries - -```{r TotalFatalitiesInjuriesCalc} -totalInjuriesDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)] - -totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ] - -totalInjuriesDT <- totalInjuriesDT[1:10, ] - -head(totalInjuriesDT, 5) -``` - -## 3: Results - -### 3.1: Events that are Most Harmful to Population Health - -Melting data.table so that it is easier to put in bar graph format -```{r HealthResults} -bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing") -head(bad_stuff, 5) -``` - -```{r healthChart} -# Create chart -healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value)) - -# Plot data as bar chart -healthChart = healthChart + geom_bar(stat="identity", aes(fill=bad_thing), position="dodge") - -# Format y-axis scale and set y-axis label -healthChart = healthChart + ylab("Frequency Count") - -# Set x-axis label -healthChart = healthChart + xlab("Event Type") - -# Rotate x-axis tick labels -healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1)) - -# Set chart title and center it -healthChart = healthChart + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5)) - -healthChart -``` - -### 3.2: Events that have the Greatest Economic Consequences - -Melting data.table so that it is easier to put in bar graph format -```{r EconConsequences} -econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type") -head(econ_consequences, 5) -``` - -```{r econChart} -# Create chart -econChart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value)) - -# Plot data as bar chart -econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge") - -# Format y-axis scale and set y-axis label -econChart = econChart + ylab("Cost (dollars)") - -# Set x-axis label -econChart = econChart + xlab("Event Type") - -# Rotate x-axis tick labels -econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1)) - -# Set chart title and center it -econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5)) - -econChart +--- +title: "Analysis of the Adverse Health and Economic Impacts of US Storms" +author: "Michael Galarnyk" +date: "5/10/2017" +output: html_document +--- + +Github repo for the Course: [Reproducible Research](https://github.com/mGalarnyk/datasciencecoursera/tree/master/5_Reproducible_Research) +
+Github repo for Rest of Specialization: [Data Science Coursera](https://github.com/mGalarnyk/datasciencecoursera) + +## 1: Synopsis +The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011. + +The following analysis investigates which types of severe weather events are most harmful on: + +1. Health (injuries and fatalities) +2. Property and crops (economic consequences) + +Information on the Data: [Documentation](https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf) + +## 2: Data Processing + +### 2.1: Data Loading + +Download the raw data file and extract the data into a dataframe.Then convert to a data.table +```{r DataLoading} +library("data.table") +library("ggplot2") + +fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2" +download.file(fileUrl, destfile = paste0("/Users/mgalarny/Desktop", '/repdata%2Fdata%2FStormData.csv.bz2')) +stormDF <- read.csv("/Users/mgalarny/Desktop/repdata%2Fdata%2FStormData.csv.bz2") + +# Converting data.frame to data.table +stormDT <- as.data.table(stormDF) +``` + +### 2.2: Examining Column Names + +```{r ColumnNames} +colnames(stormDT) +``` + +### 2.3: Data Subsetting + +Subset the dataset on the parameters of interest. Basically, we remove the columns we don't need for clarity. +```{r DataSubsetting, results="hide"} + +# Finding columns to remove +cols2Remove <- colnames(stormDT[, !c("EVTYPE" + , "FATALITIES" + , "INJURIES" + , "PROPDMG" + , "PROPDMGEXP" + , "CROPDMG" + , "CROPDMGEXP")]) + +# Removing columns +stormDT[, c(cols2Remove) := NULL] + +# Only use data where fatalities or injuries occurred. +stormDT <- stormDT[(EVTYPE != "?" & + (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE" + , "FATALITIES" + , "INJURIES" + , "PROPDMG" + , "PROPDMGEXP" + , "CROPDMG" + , "CROPDMGEXP") ] +``` + +### 2.4: Converting Exponent Columns into Actual Exponents instead of (-,+, H, K, etc) + +Making the PROPDMGEXP and CROPDMGEXP columns cleaner so they can be used to calculate property and crop cost. +```{r CorrectingExponents, results="hide"} + +# Change all damage exponents to uppercase. +cols <- c("PROPDMGEXP", "CROPDMGEXP") +stormDT[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols] + +# Map property damage alphanumeric exponents to numeric values. +propDmgKey <- c("\"\"" = 10^0, + "-" = 10^0, + "+" = 10^0, + "0" = 10^0, + "1" = 10^1, + "2" = 10^2, + "3" = 10^3, + "4" = 10^4, + "5" = 10^5, + "6" = 10^6, + "7" = 10^7, + "8" = 10^8, + "9" = 10^9, + "H" = 10^2, + "K" = 10^3, + "M" = 10^6, + "B" = 10^9) + +# Map crop damage alphanumeric exponents to numeric values +cropDmgKey <- c("\"\"" = 10^0, + "?" = 10^0, + "0" = 10^0, + "K" = 10^3, + "M" = 10^6, + "B" = 10^9) + +stormDT[, PROPDMGEXP := propDmgKey[as.character(stormDT[,PROPDMGEXP])]] +stormDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ] + +stormDT[, CROPDMGEXP := cropDmgKey[as.character(stormDT[,CROPDMGEXP])] ] +stormDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ] +``` + +### 2.5: Making Economic Cost Columns + +```{r EconomicCostColumns} +stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)] +``` + +### 2.6: Calcuating Total Property and Crop Cost + +```{r TotalPropertyCropCost} +totalCostDT <- stormDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)] + +totalCostDT <- totalCostDT[order(-Total_Cost), ] + +totalCostDT <- totalCostDT[1:10, ] + +head(totalCostDT, 5) +``` + +### 2.7: Calcuating Total Fatalities and Injuries + +```{r TotalFatalitiesInjuriesCalc} +totalInjuriesDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)] + +totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ] + +totalInjuriesDT <- totalInjuriesDT[1:10, ] + +head(totalInjuriesDT, 5) +``` + +## 3: Results + +### 3.1: Events that are Most Harmful to Population Health + +Melting data.table so that it is easier to put in bar graph format +```{r HealthResults} +bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing") +head(bad_stuff, 5) +``` + +```{r healthChart} +# Create chart +healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value)) + +# Plot data as bar chart +healthChart = healthChart + geom_bar(stat="identity", aes(fill=bad_thing), position="dodge") + +# Format y-axis scale and set y-axis label +healthChart = healthChart + ylab("Frequency Count") + +# Set x-axis label +healthChart = healthChart + xlab("Event Type") + +# Rotate x-axis tick labels +healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1)) + +# Set chart title and center it +healthChart = healthChart + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5)) + +healthChart +``` + +### 3.2: Events that have the Greatest Economic Consequences + +Melting data.table so that it is easier to put in bar graph format +```{r EconConsequences} +econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type") +head(econ_consequences, 5) +``` + +```{r econChart} +# Create chart +econChart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value)) + +# Plot data as bar chart +econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge") + +# Format y-axis scale and set y-axis label +econChart = econChart + ylab("Cost (dollars)") + +# Set x-axis label +econChart = econChart + xlab("Event Type") + +# Rotate x-axis tick labels +econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1)) + +# Set chart title and center it +econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5)) + +econChart ``` \ No newline at end of file diff --git a/5_Reproducible_Research/project2/rsconnect/documents/ReproducibleResearchProject2.rmd/rpubs.com/rpubs/Document.dcf b/5_Reproducible_Research/project2/rsconnect/documents/ReproducibleResearchProject2.rmd/rpubs.com/rpubs/Document.dcf new file mode 100644 index 00000000..5ffa7613 --- /dev/null +++ b/5_Reproducible_Research/project2/rsconnect/documents/ReproducibleResearchProject2.rmd/rpubs.com/rpubs/Document.dcf @@ -0,0 +1,8 @@ +name: Document +title: +account: rpubs +server: rpubs.com +appId: https://api.rpubs.com/api/v1/document/275387/c97019d186124a31938439877a867e7e +bundleId: https://api.rpubs.com/api/v1/document/275387/c97019d186124a31938439877a867e7e +url: http://rpubs.com/publish/claim/275387/f2063cae4a604990b7d6e67f6eba2c4c +when: 1494525654.05411