-
Notifications
You must be signed in to change notification settings - Fork 13
/
pres
35 lines (35 loc) · 8.16 KB
/
pres
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta charset="utf-8">
<meta name="author" content="Taylor, Molly & Megha">
<link href="libs/remark-css-0.0.1/default.css" rel="stylesheet"><link href="libs/remark-css-0.0.1/fonts.css" rel="stylesheet">
</head>
<body>
<textarea id="source"> class: center, middle, inverse, title-slide # Student Alcohol Consumption in Portugal ### Taylor, Molly & Megha ### 2017/10/8 --- # Packages ```r library(tidyverse) library(forcats) library(broom) library(knitr) library(kableExtra) ```
--- # Student alcohol consumption data - Data from 2005-2006 year from two public schools in Portugal - Collected from school reports as well as questionnaires - Contains student grades, demographic information and data on student alcohol consumption --- #
The data ```r sac_mat <- read_csv("student-mat.csv") sac_mat %>% select(sex, Walc, G3) %>% glimpse() ``` ``` ## Observations: 395 ## Variables: 3 ## $ sex <chr> "F", "F", "F", "F", "F", "M", "M", "F", "M", "M", "F", "F... ## $ Walc <int> 1, 1, 3, 1, 2, 2,
1, 1, 1, 1, 2, 1, 3, 2, 1, 2, 2, 1, 4, ... ## $ G3 <int> 6, 6, 10, 15, 10, 15, 11, 6, 19, 15, 9, 12, 14, 11, 16, 1... ``` --- # Check ```r table(sac_mat$G3, useNA = "ifany") ``` ``` ## ## 0 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ## 38 1 7 15 9 32 28
56 47 31 31 27 33 16 6 12 5 1 ``` ```r table(sac_mat$sex, useNA = "ifany") ``` ``` ## ## F M ## 208 187 ``` ```r table(sac_mat$Walc, useNA = "ifany") ``` ``` ## ## 1 2 3 4 5 ## 151 85 80 51 28 ``` --- # Cleaning ```r sac_mat_clean <- sac_mat %>% filter(G3
!= 0) %>% mutate(sex_f = fct_recode(factor(sex), "Female" = "F", "Male" = "M"), Walc_f = fct_recode(factor(Walc), "Very Low" = "1", "Low" = "2", "Medium" = "3", "High" = "4", "Very High" = "5")) ``` --- # Demographics: Sex ```r sac_mat_clean %>% count(sex_f)
%>% mutate(p = prop.table(n)) %>% mutate_if(is.numeric, funs(round(., 3))) %>% select(sex = sex_f, n, proportion = p) %>% kable(format = "html") %>% kable_styling(c("striped", "bordered"), full_width = F) ``` <table class="table table-striped table-bordered"
style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead><tr> <th style="text-align:left;"> sex </th> <th style="text-align:right;"> n </th> <th style="text-align:right;"> proportion </th> </tr></thead> <tbody> <tr> <td style="text-align:left;">
Female </td> <td style="text-align:right;"> 185 </td> <td style="text-align:right;"> 0.518 </td> </tr> <tr> <td style="text-align:left;"> Male </td> <td style="text-align:right;"> 172 </td> <td style="text-align:right;"> 0.482 </td> </tr> </tbody> </table>
--- # Demographics: Age ```r sac_mat_clean %>% summarize(n = n(), mean_age = mean(age), sd_age = sd(age)) %>% mutate_if(is.numeric, funs(round(., 3))) ``` ``` ## # A tibble: 1 x 3 ## n mean_age sd_age ## <dbl> <dbl> <dbl> ## 1 357 16.655 1.268 ``` --- # Distributions
of sex by WALC ```r sac_mat_clean %>% group_by(Walc_f, sex_f) %>% summarize(n = n()) %>% mutate(N = sum(n), p = n/N) %>% mutate_if(is.numeric, funs(round(., 3))) %>% select(-n, N, walc = Walc_f, sex = sex_f, p) %>% spread(sex, p) %>% kable(format = "html")
%>% kable_styling(c("striped", "bordered"), full_width = F) ``` <table class="table table-striped table-bordered" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead><tr> <th style="text-align:left;"> walc </th> <th style="text-align:right;">
N </th> <th style="text-align:right;"> Female </th> <th style="text-align:right;"> Male </th> </tr></thead> <tbody> <tr> <td style="text-align:left;"> Very Low </td> <td style="text-align:right;"> 133 </td> <td style="text-align:right;"> 0.609 </td> <td style="text-align:right;">
0.391 </td> </tr> <tr> <td style="text-align:left;"> Low </td> <td style="text-align:right;"> 73 </td> <td style="text-align:right;"> 0.589 </td> <td style="text-align:right;"> 0.411 </td> </tr> <tr> <td style="text-align:left;"> Medium </td> <td style="text-align:right;">
77 </td> <td style="text-align:right;"> 0.558 </td> <td style="text-align:right;"> 0.442 </td> </tr> <tr> <td style="text-align:left;"> High </td> <td style="text-align:right;"> 48 </td> <td style="text-align:right;"> 0.292 </td> <td style="text-align:right;">
0.708 </td> </tr> <tr> <td style="text-align:left;"> Very High </td> <td style="text-align:right;"> 26 </td> <td style="text-align:right;"> 0.154 </td> <td style="text-align:right;"> 0.846 </td> </tr> </tbody> </table> --- # Distributions of sex by WALC ```r
ggplot(sac_mat_clean, aes(x = Walc_f, fill = sex_f)) + geom_bar(position = "fill") + ggtitle("Distributions of Sex by Weekend Alcohol Consumption") + labs(x = "Weekend Alcohol Consumption", y = "Proportion", fill = "Sex", caption = "Source: Kaggle") + theme_minimal()
+ scale_fill_brewer(type = "qual", palette = 6) + theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold"), plot.caption = element_text(size = 15, face = "italic"), axis.text = element_text(size = 15), axis.title = element_text(size = 15), legend.title=element_text(size=15),
legend.text=element_text(size=15)) ``` --- ![](new_pres_files/figure-html/unnamed-chunk-11-1.png)<!-- --> --- ## Test of independence ```r tidy(with(sac_mat_clean, chisq.test(Walc_f, sex_f))) %>% select(statistic, p_val = p.value) ``` ``` ## statistic p_val
## 1 30.05166 4.777341e-06 ``` --- # Association between WALC and math grades ```r ggplot(sac_mat_clean, aes(x = Walc_f, y = G3, fill = sex_f)) + geom_boxplot() + facet_wrap(~ sex_f) + ggtitle("Association between Weekend Alcohol Consumption on Math Grades
by Sex") + labs(x = "Weekend Alcohol Consumption", y = "Math Grade", caption = "Source: Kaggle") + theme_minimal() + scale_fill_brewer(type = "qual", palette = 6) + theme(plot.title = element_text(size = 18, face = "bold"), plot.caption = element_text(size
= 15, face = "italic"), axis.text = element_text(size = 15), axis.title = element_text(size = 15), strip.text = element_text(size = 15), panel.spacing = unit(3, "lines"), legend.position = "none") ``` --- ![](new_pres_files/figure-html/unnamed-chunk-14-1.png)<!--
--> --- # Test of interaction- Exploratory Treating Walc as categorical. Cannot infer causality! ```r model_cat <- lm(G3 ~ Walc_f * sex_f, data = sac_mat_clean) anova(model_cat) ``` ``` ## Analysis of Variance Table ## ## Response: G3 ## Df Sum Sq Mean Sq
F value Pr(>F) ## Walc_f 4 155.5 38.875 4.1021 0.002916 ** ## sex_f 1 90.5 90.505 9.5501 0.002161 ** ## Walc_f:sex_f 4 174.6 43.643 4.6052 0.001234 ** ## Residuals 347 3288.5 9.477 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ```
--- class: center, middle # Thanks! </textarea>
</body>
</html>