Skip to content

Commit 08e390f

Browse files
committed
updated to run predictive analytic
1 parent c4bb577 commit 08e390f

3 files changed

+53
-89
lines changed

compile fangraphs data.R

+14
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,26 @@ p <- f[str_detect(f,"pitchers")]
99

1010
b.df <- lapply(b,function(x) {
1111
fg <- read.csv(x)
12+
return(fg)
1213
})
1314

1415
p.df <- lapply(p,function(x) {
1516
fg <- read.csv(x)
17+
return(fg)
1618
})
1719

1820
library(data.table)
1921
batters <- rbindlist(b.df)
2022
pitchers <- rbindlist(p.df)
23+
24+
pitchers <- pitchers %>%
25+
mutate(Date=as.Date(Date)) %>%
26+
select(-X) %>%
27+
as.data.frame()
28+
29+
batters <- batters %>%
30+
mutate(Date=as.Date(Date)) %>%
31+
select(-X) %>%
32+
as.data.frame()
33+
34+
rm(b.df,p.df,b,p,f)

predictive metrics - pitchers.R

+34-86
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,43 @@
1-
library(rvest)
2-
library(dplyr)
3-
library(stringr)
4-
5-
month <- "5"
6-
day <- str_pad(as.character(1:30),2,pad="0")
7-
8-
for (d in day) {
9-
date <- as.Date(paste(month,d,"2015",sep="-"),"%m-%d-%Y")
10-
date.rotoguru <- paste(month,d,sep="")
11-
url <- paste("http://rotoguru1.com/cgi-bin/byday.pl?date=",date.rotoguru,"&game=dd",sep="")
12-
rotoguru <- html(url)
13-
14-
dk <- rotoguru %>%
15-
html_nodes("table") %>%
16-
.[[5]] %>%
17-
html_table(fill=TRUE) %>%
18-
mutate(Player = str_replace(X2,"\\^\\d?",""),
19-
Salary = as.numeric(str_replace_all(X4,"\\$|,","")),
20-
Points = as.numeric(X3),
21-
Date = date) %>%
22-
select(Player,Date,Salary,Points) %>%
23-
filter(!(is.na(Salary)))
24-
25-
if (d==day[1]) {
26-
dk.pitchers <- dk
27-
} else {
28-
dk.pitchers <- rbind(dk.pitchers,dk)
29-
}
30-
}
1+
setwd("/Users/christopherteixeira/Documents/draftkings optimization/")
2+
source("salary data till now.R")
3+
source("compile fangraphs data.R")
314

32-
rm(dk,rotoguru,url,date,date.rotoguru)
33-
stats <- list(pitching="pit",batting="bat")
34-
35-
day <- day[!day %in% c("06","23")]
36-
37-
for (d in day) {
38-
date <- as.Date(paste(month,d,"2015",sep="-"),"%m-%d-%Y")
39-
40-
url <- paste("http://www.fangraphs.com/leaders.aspx?pos=all&stats=",stats$pitching,"&lg=all&qual=0&type=8&season=2015&month=0&season1=2015&ind=0&team=0&rost=0&age=0&filter=&players=p",format(date,"%Y-%m-%d"),"page=1_50",sep="")
41-
raw <- html(url)
42-
43-
fg.dashboard <- raw %>%
44-
html_nodes("table") %>%
45-
.[[33]] %>%
46-
html_table(fill=TRUE) %>%
47-
select(-1) %>%
48-
mutate(Date = date)
49-
50-
url <- paste("http://www.fangraphs.com/leaders.aspx?pos=all&stats=",stats$pitching,"&lg=all&qual=0&type=3&season=2015&month=0&season1=2015&ind=0&team=0&rost=0&age=0&filter=&players=p",format(date,"%Y-%m-%d"),"page=1_50",sep="")
51-
raw <- html(url)
52-
53-
fg.winprobability <- raw %>%
54-
html_nodes("table") %>%
55-
.[[33]] %>%
56-
html_table(fill=TRUE) %>%
57-
select(-1) %>%
58-
mutate(Date = date)
59-
60-
if (d==day[1]) {
61-
fangraphs.dashboard <- fg.dashboard
62-
fangraphs.winprobability <- fg.winprobability
63-
} else {
64-
fangraphs.dashboard <- rbind(fangraphs.dashboard,fg.dashboard)
65-
fangraphs.winprobability <- rbind(fangraphs.winprobability,fg.winprobability)
66-
}
67-
}
5+
library(dplyr)
686

69-
rm(d,fg.dashboard,fg.winprobability,date,date.rotoguru,raw,url,stats)
7+
df <- inner_join(draftkings,
8+
pitchers,
9+
by=c("Player"="Name","Date"="Date"))
7010

71-
pitchers <- inner_join(dk.pitchers,fangraphs.dashboard,by=c("Player"="Name","Date"="Date")) %>%
72-
inner_join(fangraphs.winprobability,by=c("Player"="Name","Date"="Date")) %>%
73-
rename(Team=Team.x,
74-
K.per.9=`K/9`,
75-
BB.per.9=`BB/9`,
76-
HR.per.9=`HR/9`,
77-
WPA.minus=`-WPA`,
78-
WPA.plus=`+WPA`,
79-
WPA.per.LI=`WPA/LI`) %>%
80-
mutate(LOB.pct=as.numeric(str_replace(`LOB%`,'%','')),
81-
GB.pct=as.numeric(str_replace(`GB%`,'%','')),
82-
HR.per.FB=as.numeric(str_replace(`HR/FB`,'%',''))) %>%
83-
select(-c(Team.y,`LOB%`,`GB%`,`HR/FB`))
11+
numeric.vars <- names(df)[sapply(df,is.numeric)]
12+
character.vars <- names(df)[!sapply(df,is.numeric)]
8413

8514
library(ggplot2)
86-
ggplot(data=pitchers, aes(y=Points)) +
87-
geom_point(aes(x=Game.Score))
15+
g <- ggplot(data=df, aes(y=Points))
8816

89-
ggplot(data=pitchers, aes(y=Points)) +
90-
geom_point(aes(x=WPA))
17+
g + geom_point(aes(x=WAR))
18+
g + geom_point(aes(x=xFIP))
19+
g + geom_point(aes(x=FIP))
9120

92-
correlations <- as.data.frame(t(cor(pitchers$Points,select(pitchers,-Team,-Player,-Points))))
21+
correlations <- as.data.frame(t(cor(df$Points,select(df,-Team,-Player,-Points,-Date,-Position))))
9322
names(correlations) <- c("Correlation")
9423
arrange(correlations,Correlation)
95-
rownames(correlations)
24+
correlations$Statistic <- rownames(correlations)
25+
26+
library(DT)
27+
datatable(correlations %>% select(Correlation))
28+
29+
library(rCharts)
30+
h <- rPlot(Points ~ WAR,
31+
data=df,
32+
color="Position",
33+
type = "point")
34+
h$addControls("x",value="WAR",values=numeric.vars)
35+
h$addControls("color", value = "Position", values = character.vars)
36+
h
37+
38+
h <- dPlot(Points ~ WAR,
39+
groups = 'Position',
40+
data=df,
41+
type = "bubble")
42+
h$legend(x = 200,y = 10,width = 500,height = 20,horizontalAlign = "right")
43+
h

salary data till now.R

+5-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ library(dplyr)
33
library(stringr)
44
library(data.table)
55

6-
start.date <- as.Date("06-01-2015","%m-%d-%Y")
6+
start.date <- as.Date("05-01-2015","%m-%d-%Y")
77
today <- Sys.Date()
88
days.to.retrieve <- seq(start.date,today,"day")
99

@@ -19,10 +19,12 @@ dk.full <- lapply(days.to.retrieve,function(d) {
1919
mutate(Player = str_replace(X2,"\\^\\d",""),
2020
Salary = as.numeric(str_replace_all(X4,"\\$|,","")),
2121
Points = as.numeric(X3),
22-
Date = date) %>%
22+
Date = d) %>%
2323
select(Player,Date,Salary,Points) %>%
2424
filter(!(is.na(Salary)))
2525
return(dk)
2626
})
2727

28-
draftkings <- rbindlist(dk.full)
28+
draftkings <- as.data.frame(rbindlist(dk.full))
29+
30+
rm(dk.full,days.to.retrieve,start.date,today)

0 commit comments

Comments
 (0)