-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
code and data for college & county EDA and cond. prob
This contains the code + csv files used for creating the EDA and conditional probabilities for both COLLEGES and COUNTIES. The code is set up to run the code from the same folder/directory that the code files are in.
- Loading branch information
1 parent
b906bc4
commit 80fd6af
Showing
4 changed files
with
3,265 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
--- | ||
title: "Visuals and Conditional Probabilies at the College Level" | ||
author: "Austin Eaton" | ||
date: "12/7/2021" | ||
output: html_document | ||
--- | ||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE) | ||
``` | ||
|
||
```{r message=FALSE} | ||
library(ggplot2) | ||
library(dplyr) | ||
``` | ||
|
||
|
||
Read in file: | ||
```{r} | ||
colleges <- read.csv("colleges_edited.csv") | ||
``` | ||
|
||
### Conditional Probabilities for Colleges | ||
|
||
```{r, results = 'hold'} | ||
print("Distribution of Covid case rates on college campuses: ") | ||
summary(colleges$case_rate) | ||
print( | ||
"Distribution of the percent of county population that ALWAYS wears a mask, for counties with college campuses: " | ||
) | ||
summary(colleges$ALWAYS) | ||
## P(less than half of county with a college wears a mask | county voted Republican) | ||
print( | ||
paste0( | ||
"P(less than half of county with a college wears a mask | county voted Republican) = ", | ||
nrow(colleges[(colleges$ALWAYS < 0.5) & | ||
(colleges$winner == "Republican"),]) / nrow(colleges[(colleges$winner == "Republican"),]) | ||
) | ||
) | ||
## P(low covid rate on campus (bottom 50 percentile) | county has high percent of mask wearers (top 50 percentile)) | ||
print( | ||
paste0( | ||
"P(low covid rate on campus (bottom 50 percentile) | county has high percent of mask wearers (top 50 percentile)) = ", | ||
nrow(colleges[(colleges$ALWAYS > 0.6805) & | ||
(colleges$case_rate < 0.04156),]) / nrow(colleges[(colleges$ALWAYS > 0.6805),]) | ||
) | ||
) | ||
## P(low covid rate on campus (lower quantile) | county has high percent of mask wearers (upper quantile)) | ||
print( | ||
paste0( | ||
"P(low covid rate on campus (lower quantile) | county has high percent of mask wearers (upper quantile)) = ", | ||
nrow(colleges[(colleges$ALWAYS > 0.7560) & | ||
(colleges$case_rate < 0.07609), ]) / nrow(colleges[(colleges$ALWAYS > 0.7560), ]) | ||
) | ||
) | ||
``` | ||
|
||
|
||
## Visuals (EDA) | ||
```{r, results = 'hold'} | ||
plot<-ggplot(colleges, aes(winner, case_rate)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
labs(title = "Covid Infection Rate on College Campuses", | ||
y = "Covid Infection Rate", | ||
x = "How County Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing covid infection rate on college campuses to how county voted in the 2020 election") | ||
plot | ||
``` | ||
```{r, results = 'hold'} | ||
plot<-ggplot(colleges, aes(winner, ALWAYS)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
labs(title = "Mask Usage in Counties with College Campuses", | ||
y = "Percent that ALWAYS wears masks", | ||
x = "How County Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing mask usage in counties with college campuses to how county voted in the 2020 election") | ||
plot | ||
``` | ||
|
||
```{r, results = 'hold'} | ||
plot<-ggplot(colleges, aes(political_party, case_rate)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
scale_x_discrete(labels = c('Democrat', 'Republican')) + | ||
labs(title = "Covid Infection Rate on College Campuses", | ||
y = "Covid Infection Rate", | ||
x = "How State Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing covid infection rate on college campuses to how thier state voted in the 2020 election") | ||
plot | ||
``` | ||
|
||
|
||
```{r, results = 'hold'} | ||
plot<-ggplot(colleges, aes(political_party, ALWAYS)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
scale_x_discrete(labels = c('Democrat', 'Republican')) + | ||
labs(title = "Mask Usage in Counties with College Campuses", | ||
y = "Percent that ALWAYS wears masks", | ||
x = "How State Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing mask usage in counties with college campuses to how their state voted in the 2020 election") | ||
plot | ||
``` | ||
|
||
|
||
```{r} | ||
## scatter plot with different size points | ||
colleges %>% | ||
arrange(desc(case_rate)) %>% | ||
mutate(winner = factor(winner)) %>% | ||
ggplot(aes(x=case_rate, y=ALWAYS, size=case_rate, color = winner)) + | ||
geom_point(alpha=0.35) + | ||
scale_size(range = c(.1, 20), name="Case Rate") + | ||
scale_color_manual(values = c("blue", "red")) + | ||
labs(title = "Covid Cases on College Campuses", | ||
y = "Percent of State that Always wears Masks", | ||
x = "COVID-19 Case Rate at the University", | ||
color = "Political Lean", | ||
subtitle = "Comparing College Case Rate to Percent of State that Wears Masks") | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
--- | ||
title: "Visuals and Conditional Probabilities at the County Level" | ||
author: "Austin Eaton" | ||
date: "12/7/2021" | ||
output: html_document | ||
--- | ||
|
||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE) | ||
``` | ||
|
||
```{r message=FALSE} | ||
library(dplyr) | ||
library(ggplot2) | ||
``` | ||
|
||
Read in file: | ||
```{r} | ||
county_data <- read.csv("counties_edited.csv") | ||
``` | ||
|
||
|
||
```{r, results = 'hold'} | ||
print(paste0("Distribution of county populations that ALWAYS wear masks: ")) | ||
summary(county_data$ALWAYS) | ||
## P(upper quartile of mask use | voted democrat) | ||
print(paste0("P(upper quartile of mask use | voted democrat) = ", | ||
nrow(county_data[(county_data$ALWAYS > 0.6080)&(county_data$winner == "Democrat"), ])/nrow(county_data[(county_data$winner == "Democrat"), ]))) | ||
## P(upper HALF of mask use | voted Democrat) | ||
print(paste0("P(upper HALF of mask use | voted Democrat) = ", | ||
nrow(county_data[(county_data$ALWAYS > 0.5014)&(county_data$winner == "Democrat"), ])/nrow(county_data[(county_data$winner == "Democrat"), ]))) | ||
## P(lower HALF of mask use | voted Republican) | ||
print(paste0("P(lower HALF of mask use | voted Republican) = ", | ||
nrow(county_data[(county_data$ALWAYS < 0.5014)&(county_data$winner == "Republican"), ])/nrow(county_data[(county_data$winner == "Republican"), ]))) | ||
print(paste0("Distribution of county populations that NEVER wear masks: ")) | ||
summary(county_data$NEVER) | ||
## P(upper quartile of NEVER mask use | voted democrat) | ||
print(paste0("P(upper quartile of NEVER mask use | voted democrat) = ", | ||
nrow(county_data[(county_data$NEVER > 0.1180)&(county_data$winner == "Democrat"), ])/nrow(county_data[(county_data$winner == "Democrat"), ]))) | ||
## P(upper HALF of NEVER mask use | voted Democrat) | ||
print(paste0("P(upper HALF of NEVER mask use | voted Democrat) = ", | ||
nrow(county_data[(county_data$NEVER > 0.0710)&(county_data$winner == "Democrat"), ])/nrow(county_data[(county_data$winner == "Democrat"), ]))) | ||
## P(lower HALF of NEVER mask use | voted Republican) | ||
print(paste0("P(lower HALF of NEVER mask use | voted Republican) = ", | ||
nrow(county_data[(county_data$NEVER < 0.0710)&(county_data$winner == "Republican"), ])/nrow(county_data[(county_data$winner == "Republican"), ]))) | ||
## P(upper quartile of NEVER mask use | voted republican) | ||
print(paste0("P(upper quartile of NEVER mask use | voted republican) = ", | ||
nrow(county_data[(county_data$NEVER > 0.1180)&(county_data$winner == "Republican"), ])/nrow(county_data[(county_data$winner == "Republican"), ]))) | ||
## P(upper HALF of NEVER mask use | voted republican) | ||
print(paste0("P(upper HALF of NEVER mask use | county voted republican) = ", nrow(county_data[(county_data$NEVER > 0.0710)&(county_data$winner == "Republican"), ])/nrow(county_data[(county_data$winner == "Republican"), ]))) | ||
## P(lower HALF of NEVER mask use | voted Democrat) | ||
print(paste0("P(lower HALF of NEVER mask use | county voted Democrat) = ", nrow(county_data[(county_data$NEVER < 0.0710)&(county_data$winner == "Democrat"), ])/nrow(county_data[(county_data$winner == "Democrat"), ]))) | ||
``` | ||
```{r} | ||
county_data %>% | ||
arrange(desc(case_rate)) %>% | ||
mutate(winner = factor(winner)) %>% | ||
ggplot(aes(x=case_rate, y=ALWAYS)) + #, size=case_rate, color = winner)) + | ||
geom_point(aes(color = winner), size = 0.5) + | ||
# scale_size(range = c(.05, 15), name="Case Rate") + | ||
scale_color_manual(values = c("blue", "red")) + | ||
labs(title = "Covid Cases by County", | ||
y = "Percent of County that ALWAYS wears Masks", | ||
x = "COVID-19 Infection Rate", | ||
color = "Political Lean", | ||
subtitle = "Comparing Covid Case Rate to Percent of State that Wears Masks") | ||
``` | ||
|
||
|
||
|
||
```{r, results = 'hold'} | ||
summary(county_data[county_data$winner == "Republican", "case_rate"]) | ||
summary(county_data[county_data$winner == "Democrat", "case_rate"]) | ||
library(ggplot2) | ||
plot<-ggplot(county_data, aes(winner, case_rate)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
labs(title = "County Cases by Political Stance", | ||
y = "Covid Infection Rate", | ||
x = "How County Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing covid infection rate to how county voted in the 2020 election") | ||
plot | ||
plot2<-ggplot(county_data, aes(political_party, case_rate)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
scale_x_discrete(labels = c('Democrat', 'Republican')) + | ||
labs(title = "County Cases by Political Stance", | ||
y = "Covid Infection Rate", | ||
x = "How State Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing covid infection rate to how counties state voted in the 2020 election") | ||
plot2 | ||
``` | ||
|
||
|
||
```{r, results = 'hold'} | ||
plot<-ggplot(county_data, aes(winner, percent_dead_from_covid)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
labs(title = "Covid Mortality Rate per County", | ||
y = "Mortality Rate", | ||
x = "How County Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing covid mortality rate to how county voted in the 2020 election") | ||
plot | ||
plot2<-ggplot(county_data, aes(political_party, percent_dead_from_covid)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
scale_x_discrete(labels = c('Democrat', 'Republican')) + | ||
labs(title = "Covid Mortality Rate per County", | ||
y = "Mortality Rate", | ||
x = "How State Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing covid mortality rate to how state voted in the 2020 election") | ||
plot2 | ||
``` | ||
|
||
|
||
|
||
```{r, results = 'hold'} | ||
plot<-ggplot(county_data, aes(winner, ALWAYS)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
labs(title = "County Mask Users by Political Stance", | ||
y = "Percent that ALWAYS wears masks", | ||
x = "How County Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing mask usage to how county voted in the 2020 election") | ||
plot | ||
plot2<-ggplot(county_data, aes(political_party, ALWAYS)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
scale_x_discrete(labels = c('Democrat', 'Republican')) + | ||
labs(title = "County Mask Users by Political Stance", | ||
y = "Percent that ALWAYS wears masks", | ||
x = "How State Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing mask usage to how state voted in the 2020 election") | ||
plot2 | ||
``` | ||
```{r, results = 'hold'} | ||
plot<-ggplot(county_data, aes(winner, NEVER)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
labs(title = "County Mask Users by Political Stance", | ||
y = "Percent that NEVER wears masks", | ||
x = "How County Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing mask usage to how county voted in the 2020 election") | ||
plot | ||
plot2<-ggplot(county_data, aes(political_party, NEVER)) + | ||
geom_boxplot(fill = c("#0096fa", "#fa1100")) + | ||
scale_x_discrete(labels = c('Democrat', 'Republican')) + | ||
labs(title = "County Mask Users by Political Stance", | ||
y = "Percent that NEVER wears masks", | ||
x = "How State Voted in 2020", | ||
color = "Political Lean", | ||
subtitle = "Comparing mask usage to how state voted in the 2020 election") | ||
plot2 | ||
``` | ||
|
Oops, something went wrong.