-
Notifications
You must be signed in to change notification settings - Fork 0
/
uebung_3.R
40 lines (24 loc) · 1.65 KB
/
uebung_3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
library(tidyr)
library(tidyverse)
library(nycflights13)
library(ggcorrplot)
library(dplyr)
nycflights13::flights
## Verspätungen pro Monat
flights %>% select(dep_delay, month) %>% na.omit() %>% ggplot(aes(x=as.factor(month), y=dep_delay)) + geom_boxplot()
## Ausreisser visuell ignorieren
flights %>% select(dep_delay, month) %>% na.omit() %>% ggplot(aes(x=as.factor(month), y=dep_delay)) + geom_boxplot() + ylim(0, 100)
## Durschschnittliche Verspätung pro Monat
flights %>% select(dep_delay, month) %>% na.omit() %>% group_by(month) %>% summarize(delay=mean(dep_delay))
## Durschschnittliche Verspätung pro Monat, berückscihtigung der Abflugsverspätung
flights %>% select(arr_delay, dep_delay, month) %>% na.omit() %>% group_by(month) %>% summarize(depmean=mean(dep_delay), arrmean=mean(arr_delay))
## correlation dazwischen
cor(flights %>% select(arr_delay, dep_delay, month) %>% na.omit() %>% group_by(month) %>% summarize(depmean=mean(dep_delay), arrmean=mean(arr_delay)))
## und plotten
ggcorrplot(cor(flights %>% select(arr_delay, dep_delay, month) %>% na.omit() %>% group_by(month) %>% summarize(depmean=mean(dep_delay), arrmean=mean(arr_delay))))
## correlation von Fluglänge zu departure delay - arrival
# je länger die Flugzeit, desto höher die Warhscheinlichkeit, etwas von der Verspätung leicht aufzuholen
sub.dat <- flights %>% select(dep_delay, arr_delay, distance) %>% na.omit() %>% mutate(delay_diff=dep_delay-arr_delay) %>% group_by(distance) %>% sample_frac(0.1)
cor(select(sub.dat, distance, delay_diff))
ggcorrplot(cor(select(sub.dat, distance, delay_diff)))
ggplot(sub.dat, aes(x=distance, y=delay_diff)) + geom_point() + geom_smooth()