Skip to content

Commit

Permalink
Merge pull request #1 from bdilday/initialize
Browse files Browse the repository at this point in the history
Initialize
  • Loading branch information
bdilday authored Mar 23, 2017
2 parents b637291 + 15f7672 commit 16072bd
Show file tree
Hide file tree
Showing 30 changed files with 1,081 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ vignettes/*.pdf
# Temporary files created by R markdown
*.utf8.md
*.knit.md
.Rproj.user
19 changes: 19 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Package: marcelR
Type: Package
Title: Compute Marcel Projections
Version: 1.0
Date: 2017-03-08
Authors@R: person("Ben", "Dilday", email = "ben.dilday.phd@gmail.com",
role = c("aut", "cre"))
Description: Compute Marcel Projections
License: MIT
Depends: R(>= 3.1.0)
Imports:
dplyr (>= 0.5.0),
Lahman (>= 5.0-0),
tidyr (>= 0.6.1),
lazyeval (>= 0.2.0),
readr (>= 1.0.0)
RoxygenNote: 6.0.1
Encoding: UTF-8
LazyData: true
16 changes: 16 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by roxygen2: do not edit by hand

export(apply_marcel_batting)
export(apply_marcel_pitching)
export(get_batting_stats)
export(get_games_for_year)
export(get_ipouts_for_year)
export(get_pa_for_year)
export(get_pitching_stats)
export(get_primary_pos)
export(get_seasonal_averages_batting)
export(get_seasonal_averages_pitching)
export(get_team_projected_batting)
export(get_team_projected_pitching)
export(get_team_projected_wins)
import(dplyr)
139 changes: 139 additions & 0 deletions R/marcelBatting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#' Get seasonal averages for batting stats
#'
#' @param data A data frame containing batting stats. Should be similar to
#' Lahman::Batting
#' @return A data frame containing league averages for batting stats
#' @export
get_seasonal_averages_batting <- function(data) {

data %>%
dplyr::select(-stint, -lgID) %>%
tidyr::gather(key, value, -playerID, -yearID, -teamID, -PA) %>%
dplyr::mutate(value=as.numeric(value)) %>% group_by(key, yearID) %>%
dplyr::summarise(lgAv=sum(value, na.rm=TRUE)/sum(PA, na.rm=TRUE)) %>%
dplyr::ungroup() %>%
dplyr::select(yearID, key, lgAv) %>%
tidyr::spread(key, lgAv)
}

#' Get stats for batters
#'
#' Gets stats for batters. Pulls data from Lahman:::battingStats(),
#' aggregates over stints and appends primary position and age.
#'
#' @param PrimaryPosition A data frame containing primary position. If not provided,
#' it will be generated by the function.
#' @return A data frame containing batting stats, primary position, and age. Batting stats are aggregated over
#' all stints.
#' @seealso \code{\link{get_primary_pos}} \code{\link{combine_batter_stints}}
#' @export
get_batting_stats <- function(PrimaryPosition=NULL) {
if (is.null(PrimaryPosition)) {
PrimaryPosition <- get_primary_pos()
}

BattingStats <- combine_batter_stints(Lahman::battingStats()) %>%
merge(PrimaryPosition %>%
select(playerID, yearID, POS),
by=c("playerID", "yearID"))

BattingStats$Age <- get_age(BattingStats)
BattingStats
}

#' Combine batter stints
#'
#' Combine batting stats over all the stints for a given player and season.
#'
#' @param data A data frame containing batting stats. Should be similar to Batting from the Lahman package.
#' @seealso \code{\link{get_batting_stats}}
#' @return A data frame containing batting stats aggregated over all stints for a given player and season.
combine_batter_stints <- function(data) {

columns_to_sum <- c("G","PA","AB",
"H","X2B","X3B","HR",
"R","RBI"
,"SB","CS","BB","SO","IBB","HBP","SH","SF","GIDP")
grouped_data <- sum_stints(data, columns_to_sum) %>%
dplyr::mutate(OB=OBP*(PA-SH),
BIP=AB-SO-HR+SF,
HOBIP=H-HR,
OBP=sum(OB)/sum(PA-SH),
SLG=sum(TB)/sum(AB),
BABIP=sum(HOBIP)/sum(BIP))

grouped_data %>% dplyr::ungroup() %>% dplyr::filter(stint==1)
}

#' Apply marcels for batters
#'
#' @param data A data frame with batting stats, including seasonal averages.
#'
#' @param metric A string given the name of the metric to compute projections for, e.g. 'HR'
#'
#' @param age_adjustment_fun A callable to make the age adjustment
#'
#' @param metric_weights An array with the weights to give to the projected stats for the previous seasons.
#' The ith elemnt is the weight for the season i years previous. The default is the c(5, 4, 3).
#'
#' @param playing_time_weights An array with the weights to be used for projecting playing time
#'
#' @return A data frame containg Marcel projections for 'metric'. The projection is given the generic
#' name 'proj_value'.
#' @examples
#' a <- get_batting_stats() %>% filter(yearID>=2001, yearID<=2003) %>% filter(POS!="P")
#' b <- tbl_df(append_previous_years(a,
#' get_seasonal_averages_batting,
#' previous_years = 3))
#' mcl = apply_marcel_batting(b, "HR", age_adjustment)
#' mcl %>% filter(projectedYearID==2004, playerID=='beltrca01')
#'
#' playerID yearID projectedYearID age_adj x_metric x_pa x_av proj_pa
#' 1 beltrca01 2003 2004 1.006 318 7938 0.02867422 573.2
#' num denom proj_rate_raw proj_rate proj_value
#' 1 352.4091 9138 0.03856523 0.03879662 22.23822
#' @seealso \code{\link{apply_marcel_pitching}},
#' \code{\link{get_team_projected_batting}},
#' \code{\link{export_marcels}}
#' @export
apply_marcel_batting <- function(data, metric, age_adjustment_fun,
metric_weights=c(5,4,3),
playing_time_weights=c(0.5, 0.1, 0)
) {
sw <- sum(metric_weights)

x_metric <- 0
x_pa <- 0
x_av_num <- 0
x_av_denom <- 0
proj_pa <- 200
pebble <- 1e-6

for (idx in seq_along(metric_weights)) {
metric_key <- sprintf('%s.%d', metric, idx)
metric_av_key <- paste0(metric_key, ".SA")
pa_key <- sprintf('%s.%d', "PA", idx)
pa <- na.zero(data[[pa_key]])
sa_value <- na.zero(data[[metric_av_key]])

x_metric <- x_metric + na.zero(data[[metric_key]]) * metric_weights[idx]
x_pa <- x_pa + pa * metric_weights[idx]
x_av_num <- x_av_num + (sa_value * metric_weights[idx] * (pa + pebble))
x_av_denom <- x_av_denom + (metric_weights[idx] * (pa + pebble))
proj_pa <- proj_pa + playing_time_weights[[idx]] * pa
}
data$age_adj <- sapply(data$Age+1, age_adjustment_fun)

x_av <- x_av_num / x_av_denom
data.frame(playerID=data$playerID,
yearID=data$yearID,
projectedYearID=data$yearID+1,
age_adj=data$age_adj,
x_metric=x_metric,
x_pa=x_pa, x_av=x_av, proj_pa=proj_pa) %>%
mutate(num=x_av*100*sw+x_metric,
denom=x_pa+100*sw,
proj_rate_raw=num/denom,
proj_rate=age_adj*proj_rate_raw,
proj_value=proj_pa*proj_rate)
}
133 changes: 133 additions & 0 deletions R/marcelPitching.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#' Get seasonal averages for pitching stats
#'
#' @param data A data frame containing pitching stats. Should be similar to
#' Lahman::Pitching
#' @return A data frame containing league averages for pitching stats
#' @export
get_seasonal_averages_pitching <- function(data) {

data %>%
dplyr::select(-stint, -lgID) %>%
tidyr::gather(key, value, -playerID, -yearID, -teamID, -IPouts, -BFP) %>%
dplyr::mutate(value=as.numeric(value)) %>% group_by(key, yearID) %>%
dplyr::summarise(lgAv=sum(value, na.rm=TRUE)/sum(IPouts, na.rm=TRUE)) %>%
dplyr::ungroup() %>%
dplyr::select(yearID, key, lgAv) %>%
tidyr::spread(key, lgAv)
}

#' Get stats for pitchers
#'
#' Gets stats for pitchers. Pulls data from Lahman::Pitching,
#' aggregates over stints and appends primary position and age.
#'
#' @param PrimaryPosition A data frame containing primary position. If not provided,
#' it will be generated by the function.
#' @return A data frame containing pitching stats, primary position, and age. pitching stats are aggregated over
#' all stints.
#' @seealso \code{\link{get_primary_pos}} \code{\link{combine_pitcher_stints}}
#' @export
get_pitching_stats <- function(PrimaryPosition=NULL) {
if (is.null(PrimaryPosition)) {
PrimaryPosition <- get_primary_pos()
}

PitchingStats <- combine_pitcher_stints(Lahman::Pitching) %>%
merge(PrimaryPosition %>%
select(playerID, yearID, POS),
by=c("playerID", "yearID"))

PitchingStats$Age <- get_age(PitchingStats)
PitchingStats %>% mutate(PA=BFP)
}

#' Combine pitcher stints
#'
#' Combine pitching stats over all the stints for a given player and season.
#'
#' @param data A data frame containing pitching stats. Should be similar to Pitching from the Lahman package.
#' @seealso \code{\link{get_pitching_stats}}
#' @return A data frame conating pitching stats aggregated over all stints for a given player and season.
combine_pitcher_stints <- function(data) {

columns_to_sum <- c("W", "L" ,"G", "GS","CG",
"SHO","SV","IPouts","H",
"ER","HR", "BB",
"SO","IBB","WP","HBP",
"BK","BFP","GF","R","SH", "SF", "GIDP")

grouped_data <- sum_stints(data, columns_to_sum) %>%
dplyr::mutate(ERA=sum(ER*27)/sum(IPouts),
RA9=sum(R*27)/sum(IPouts),
BAOpp=sum(H)/sum(BFP-BB-HBP-SH-SF),
BABIP=sum(H-HR)/sum(BFP-BB-HBP-SO),
OBPOpp=sum(H+BB+HBP)/sum(BFP-SH),
uFIP=sum(13*HR + 3*(BB+HBP) - 2*SO)/sum(IPouts/3)
)
grouped_data %>% dplyr::ungroup() %>% dplyr::filter(stint==1)
}

#' Apply marcels for pitchers
#'
#' @param data A data frame with pitching stats, including seasonal averages.
#'
#' @param metric A string given the name of the metric to compute projections for, e.g. 'HR'
#'
#' @param age_adjustment_fun A callable to make the age adjustment
#'
#' @param metric_weights An array with the weights to give to the projected stats for the previous seasons.
#' The ith elemnt is the weight for the season i years previous. The default is the c(5, 4, 3).
#'
#' @param playing_time_weights An array with the weights to be used for projecting playing time
#'
#' @return A data frame containg Marcel projections for 'metric'. The projection is given the generic
#' name 'proj_value'.
#'
#' @seealso \code{\link{apply_marcel_pitching}},
#' \code{\link{get_team_projected_batting}},
#' \code{\link{export_marcels}}
#' @export
apply_marcel_pitching <- function(data, metric, age_adjustment_fun,
metric_weights=c(3,2,1),
playing_time_weights=c(0.5, 0.1, 0)) {

sw <- sum(metric_weights)
x_pt <- 0
x_metric <- 0

x_lgav_num <- 0
x_lgav_denom <- 0
proj_pt <- 75 + data$GS/data$G * 105
pebble <- 1e-6

for (idx in seq_along(metric_weights)) {
metric_key <- sprintf('%s.%d', metric, idx)
metric_av_key <- paste0(metric_key, ".SA")
pt_key <- sprintf('%s.%d', "IPouts", idx)
playing_time <- na.zero(data[[pt_key]])
sa_value <- na.zero(data[[metric_av_key]])

x_pt <- x_pt + playing_time * metric_weights[idx]
x_metric <- x_metric + na.zero(data[[metric_key]]) * metric_weights[idx]
x_lgav_num <- x_lgav_num + (sa_value * metric_weights[idx] * (playing_time + pebble))
x_lgav_denom <- x_lgav_denom + (metric_weights[idx] * (playing_time+ pebble))
proj_pt <- proj_pt + playing_time_weights[[idx]] * playing_time
}

data$age_adj <- sapply(data$Age+1, age_adjustment_fun)
x_lgav <- x_lgav_num / x_lgav_denom

# 134 IPouts * 6 = 804 IPouts ~ 800 IPouts
data.frame(playerID=data$playerID,
yearID=data$yearID,
projectedYearID=data$yearID+1,
age_adj=data$age_adj,
x_metric=x_metric,
x_pt=x_pt, x_lgav=x_lgav, proj_pt=proj_pt) %>%
mutate(num=x_lgav*134*sw+x_metric,
denom=x_pt+134*sw,
proj_rate_raw=num/denom,
proj_rate=age_adj*proj_rate_raw,
proj_value=proj_pt*proj_rate)

}
Empty file added R/marcelR-internal.R
Empty file.
Loading

0 comments on commit 16072bd

Please sign in to comment.