-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from bdilday/initialize
Initialize
- Loading branch information
Showing
30 changed files
with
1,081 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,3 +31,4 @@ vignettes/*.pdf | |
# Temporary files created by R markdown | ||
*.utf8.md | ||
*.knit.md | ||
.Rproj.user |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
Package: marcelR | ||
Type: Package | ||
Title: Compute Marcel Projections | ||
Version: 1.0 | ||
Date: 2017-03-08 | ||
Authors@R: person("Ben", "Dilday", email = "ben.dilday.phd@gmail.com", | ||
role = c("aut", "cre")) | ||
Description: Compute Marcel Projections | ||
License: MIT | ||
Depends: R(>= 3.1.0) | ||
Imports: | ||
dplyr (>= 0.5.0), | ||
Lahman (>= 5.0-0), | ||
tidyr (>= 0.6.1), | ||
lazyeval (>= 0.2.0), | ||
readr (>= 1.0.0) | ||
RoxygenNote: 6.0.1 | ||
Encoding: UTF-8 | ||
LazyData: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(apply_marcel_batting) | ||
export(apply_marcel_pitching) | ||
export(get_batting_stats) | ||
export(get_games_for_year) | ||
export(get_ipouts_for_year) | ||
export(get_pa_for_year) | ||
export(get_pitching_stats) | ||
export(get_primary_pos) | ||
export(get_seasonal_averages_batting) | ||
export(get_seasonal_averages_pitching) | ||
export(get_team_projected_batting) | ||
export(get_team_projected_pitching) | ||
export(get_team_projected_wins) | ||
import(dplyr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
#' Get seasonal averages for batting stats | ||
#' | ||
#' @param data A data frame containing batting stats. Should be similar to | ||
#' Lahman::Batting | ||
#' @return A data frame containing league averages for batting stats | ||
#' @export | ||
get_seasonal_averages_batting <- function(data) { | ||
|
||
data %>% | ||
dplyr::select(-stint, -lgID) %>% | ||
tidyr::gather(key, value, -playerID, -yearID, -teamID, -PA) %>% | ||
dplyr::mutate(value=as.numeric(value)) %>% group_by(key, yearID) %>% | ||
dplyr::summarise(lgAv=sum(value, na.rm=TRUE)/sum(PA, na.rm=TRUE)) %>% | ||
dplyr::ungroup() %>% | ||
dplyr::select(yearID, key, lgAv) %>% | ||
tidyr::spread(key, lgAv) | ||
} | ||
|
||
#' Get stats for batters | ||
#' | ||
#' Gets stats for batters. Pulls data from Lahman:::battingStats(), | ||
#' aggregates over stints and appends primary position and age. | ||
#' | ||
#' @param PrimaryPosition A data frame containing primary position. If not provided, | ||
#' it will be generated by the function. | ||
#' @return A data frame containing batting stats, primary position, and age. Batting stats are aggregated over | ||
#' all stints. | ||
#' @seealso \code{\link{get_primary_pos}} \code{\link{combine_batter_stints}} | ||
#' @export | ||
get_batting_stats <- function(PrimaryPosition=NULL) { | ||
if (is.null(PrimaryPosition)) { | ||
PrimaryPosition <- get_primary_pos() | ||
} | ||
|
||
BattingStats <- combine_batter_stints(Lahman::battingStats()) %>% | ||
merge(PrimaryPosition %>% | ||
select(playerID, yearID, POS), | ||
by=c("playerID", "yearID")) | ||
|
||
BattingStats$Age <- get_age(BattingStats) | ||
BattingStats | ||
} | ||
|
||
#' Combine batter stints | ||
#' | ||
#' Combine batting stats over all the stints for a given player and season. | ||
#' | ||
#' @param data A data frame containing batting stats. Should be similar to Batting from the Lahman package. | ||
#' @seealso \code{\link{get_batting_stats}} | ||
#' @return A data frame containing batting stats aggregated over all stints for a given player and season. | ||
combine_batter_stints <- function(data) { | ||
|
||
columns_to_sum <- c("G","PA","AB", | ||
"H","X2B","X3B","HR", | ||
"R","RBI" | ||
,"SB","CS","BB","SO","IBB","HBP","SH","SF","GIDP") | ||
grouped_data <- sum_stints(data, columns_to_sum) %>% | ||
dplyr::mutate(OB=OBP*(PA-SH), | ||
BIP=AB-SO-HR+SF, | ||
HOBIP=H-HR, | ||
OBP=sum(OB)/sum(PA-SH), | ||
SLG=sum(TB)/sum(AB), | ||
BABIP=sum(HOBIP)/sum(BIP)) | ||
|
||
grouped_data %>% dplyr::ungroup() %>% dplyr::filter(stint==1) | ||
} | ||
|
||
#' Apply marcels for batters | ||
#' | ||
#' @param data A data frame with batting stats, including seasonal averages. | ||
#' | ||
#' @param metric A string given the name of the metric to compute projections for, e.g. 'HR' | ||
#' | ||
#' @param age_adjustment_fun A callable to make the age adjustment | ||
#' | ||
#' @param metric_weights An array with the weights to give to the projected stats for the previous seasons. | ||
#' The ith elemnt is the weight for the season i years previous. The default is the c(5, 4, 3). | ||
#' | ||
#' @param playing_time_weights An array with the weights to be used for projecting playing time | ||
#' | ||
#' @return A data frame containg Marcel projections for 'metric'. The projection is given the generic | ||
#' name 'proj_value'. | ||
#' @examples | ||
#' a <- get_batting_stats() %>% filter(yearID>=2001, yearID<=2003) %>% filter(POS!="P") | ||
#' b <- tbl_df(append_previous_years(a, | ||
#' get_seasonal_averages_batting, | ||
#' previous_years = 3)) | ||
#' mcl = apply_marcel_batting(b, "HR", age_adjustment) | ||
#' mcl %>% filter(projectedYearID==2004, playerID=='beltrca01') | ||
#' | ||
#' playerID yearID projectedYearID age_adj x_metric x_pa x_av proj_pa | ||
#' 1 beltrca01 2003 2004 1.006 318 7938 0.02867422 573.2 | ||
#' num denom proj_rate_raw proj_rate proj_value | ||
#' 1 352.4091 9138 0.03856523 0.03879662 22.23822 | ||
#' @seealso \code{\link{apply_marcel_pitching}}, | ||
#' \code{\link{get_team_projected_batting}}, | ||
#' \code{\link{export_marcels}} | ||
#' @export | ||
apply_marcel_batting <- function(data, metric, age_adjustment_fun, | ||
metric_weights=c(5,4,3), | ||
playing_time_weights=c(0.5, 0.1, 0) | ||
) { | ||
sw <- sum(metric_weights) | ||
|
||
x_metric <- 0 | ||
x_pa <- 0 | ||
x_av_num <- 0 | ||
x_av_denom <- 0 | ||
proj_pa <- 200 | ||
pebble <- 1e-6 | ||
|
||
for (idx in seq_along(metric_weights)) { | ||
metric_key <- sprintf('%s.%d', metric, idx) | ||
metric_av_key <- paste0(metric_key, ".SA") | ||
pa_key <- sprintf('%s.%d', "PA", idx) | ||
pa <- na.zero(data[[pa_key]]) | ||
sa_value <- na.zero(data[[metric_av_key]]) | ||
|
||
x_metric <- x_metric + na.zero(data[[metric_key]]) * metric_weights[idx] | ||
x_pa <- x_pa + pa * metric_weights[idx] | ||
x_av_num <- x_av_num + (sa_value * metric_weights[idx] * (pa + pebble)) | ||
x_av_denom <- x_av_denom + (metric_weights[idx] * (pa + pebble)) | ||
proj_pa <- proj_pa + playing_time_weights[[idx]] * pa | ||
} | ||
data$age_adj <- sapply(data$Age+1, age_adjustment_fun) | ||
|
||
x_av <- x_av_num / x_av_denom | ||
data.frame(playerID=data$playerID, | ||
yearID=data$yearID, | ||
projectedYearID=data$yearID+1, | ||
age_adj=data$age_adj, | ||
x_metric=x_metric, | ||
x_pa=x_pa, x_av=x_av, proj_pa=proj_pa) %>% | ||
mutate(num=x_av*100*sw+x_metric, | ||
denom=x_pa+100*sw, | ||
proj_rate_raw=num/denom, | ||
proj_rate=age_adj*proj_rate_raw, | ||
proj_value=proj_pa*proj_rate) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#' Get seasonal averages for pitching stats | ||
#' | ||
#' @param data A data frame containing pitching stats. Should be similar to | ||
#' Lahman::Pitching | ||
#' @return A data frame containing league averages for pitching stats | ||
#' @export | ||
get_seasonal_averages_pitching <- function(data) { | ||
|
||
data %>% | ||
dplyr::select(-stint, -lgID) %>% | ||
tidyr::gather(key, value, -playerID, -yearID, -teamID, -IPouts, -BFP) %>% | ||
dplyr::mutate(value=as.numeric(value)) %>% group_by(key, yearID) %>% | ||
dplyr::summarise(lgAv=sum(value, na.rm=TRUE)/sum(IPouts, na.rm=TRUE)) %>% | ||
dplyr::ungroup() %>% | ||
dplyr::select(yearID, key, lgAv) %>% | ||
tidyr::spread(key, lgAv) | ||
} | ||
|
||
#' Get stats for pitchers | ||
#' | ||
#' Gets stats for pitchers. Pulls data from Lahman::Pitching, | ||
#' aggregates over stints and appends primary position and age. | ||
#' | ||
#' @param PrimaryPosition A data frame containing primary position. If not provided, | ||
#' it will be generated by the function. | ||
#' @return A data frame containing pitching stats, primary position, and age. pitching stats are aggregated over | ||
#' all stints. | ||
#' @seealso \code{\link{get_primary_pos}} \code{\link{combine_pitcher_stints}} | ||
#' @export | ||
get_pitching_stats <- function(PrimaryPosition=NULL) { | ||
if (is.null(PrimaryPosition)) { | ||
PrimaryPosition <- get_primary_pos() | ||
} | ||
|
||
PitchingStats <- combine_pitcher_stints(Lahman::Pitching) %>% | ||
merge(PrimaryPosition %>% | ||
select(playerID, yearID, POS), | ||
by=c("playerID", "yearID")) | ||
|
||
PitchingStats$Age <- get_age(PitchingStats) | ||
PitchingStats %>% mutate(PA=BFP) | ||
} | ||
|
||
#' Combine pitcher stints | ||
#' | ||
#' Combine pitching stats over all the stints for a given player and season. | ||
#' | ||
#' @param data A data frame containing pitching stats. Should be similar to Pitching from the Lahman package. | ||
#' @seealso \code{\link{get_pitching_stats}} | ||
#' @return A data frame conating pitching stats aggregated over all stints for a given player and season. | ||
combine_pitcher_stints <- function(data) { | ||
|
||
columns_to_sum <- c("W", "L" ,"G", "GS","CG", | ||
"SHO","SV","IPouts","H", | ||
"ER","HR", "BB", | ||
"SO","IBB","WP","HBP", | ||
"BK","BFP","GF","R","SH", "SF", "GIDP") | ||
|
||
grouped_data <- sum_stints(data, columns_to_sum) %>% | ||
dplyr::mutate(ERA=sum(ER*27)/sum(IPouts), | ||
RA9=sum(R*27)/sum(IPouts), | ||
BAOpp=sum(H)/sum(BFP-BB-HBP-SH-SF), | ||
BABIP=sum(H-HR)/sum(BFP-BB-HBP-SO), | ||
OBPOpp=sum(H+BB+HBP)/sum(BFP-SH), | ||
uFIP=sum(13*HR + 3*(BB+HBP) - 2*SO)/sum(IPouts/3) | ||
) | ||
grouped_data %>% dplyr::ungroup() %>% dplyr::filter(stint==1) | ||
} | ||
|
||
#' Apply marcels for pitchers | ||
#' | ||
#' @param data A data frame with pitching stats, including seasonal averages. | ||
#' | ||
#' @param metric A string given the name of the metric to compute projections for, e.g. 'HR' | ||
#' | ||
#' @param age_adjustment_fun A callable to make the age adjustment | ||
#' | ||
#' @param metric_weights An array with the weights to give to the projected stats for the previous seasons. | ||
#' The ith elemnt is the weight for the season i years previous. The default is the c(5, 4, 3). | ||
#' | ||
#' @param playing_time_weights An array with the weights to be used for projecting playing time | ||
#' | ||
#' @return A data frame containg Marcel projections for 'metric'. The projection is given the generic | ||
#' name 'proj_value'. | ||
#' | ||
#' @seealso \code{\link{apply_marcel_pitching}}, | ||
#' \code{\link{get_team_projected_batting}}, | ||
#' \code{\link{export_marcels}} | ||
#' @export | ||
apply_marcel_pitching <- function(data, metric, age_adjustment_fun, | ||
metric_weights=c(3,2,1), | ||
playing_time_weights=c(0.5, 0.1, 0)) { | ||
|
||
sw <- sum(metric_weights) | ||
x_pt <- 0 | ||
x_metric <- 0 | ||
|
||
x_lgav_num <- 0 | ||
x_lgav_denom <- 0 | ||
proj_pt <- 75 + data$GS/data$G * 105 | ||
pebble <- 1e-6 | ||
|
||
for (idx in seq_along(metric_weights)) { | ||
metric_key <- sprintf('%s.%d', metric, idx) | ||
metric_av_key <- paste0(metric_key, ".SA") | ||
pt_key <- sprintf('%s.%d', "IPouts", idx) | ||
playing_time <- na.zero(data[[pt_key]]) | ||
sa_value <- na.zero(data[[metric_av_key]]) | ||
|
||
x_pt <- x_pt + playing_time * metric_weights[idx] | ||
x_metric <- x_metric + na.zero(data[[metric_key]]) * metric_weights[idx] | ||
x_lgav_num <- x_lgav_num + (sa_value * metric_weights[idx] * (playing_time + pebble)) | ||
x_lgav_denom <- x_lgav_denom + (metric_weights[idx] * (playing_time+ pebble)) | ||
proj_pt <- proj_pt + playing_time_weights[[idx]] * playing_time | ||
} | ||
|
||
data$age_adj <- sapply(data$Age+1, age_adjustment_fun) | ||
x_lgav <- x_lgav_num / x_lgav_denom | ||
|
||
# 134 IPouts * 6 = 804 IPouts ~ 800 IPouts | ||
data.frame(playerID=data$playerID, | ||
yearID=data$yearID, | ||
projectedYearID=data$yearID+1, | ||
age_adj=data$age_adj, | ||
x_metric=x_metric, | ||
x_pt=x_pt, x_lgav=x_lgav, proj_pt=proj_pt) %>% | ||
mutate(num=x_lgav*134*sw+x_metric, | ||
denom=x_pt+134*sw, | ||
proj_rate_raw=num/denom, | ||
proj_rate=age_adj*proj_rate_raw, | ||
proj_value=proj_pt*proj_rate) | ||
|
||
} |
Empty file.
Oops, something went wrong.