-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpunt_angles.R
261 lines (214 loc) · 8.97 KB
/
punt_angles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#load in libraries
library(tidyverse)
library(ggthemes)
library(mgcv)
#load in PFF data for hangtimes
pff_dat <- read.csv('PFFScoutingData.csv') %>%
filter(!is.na(hangTime) & kickType %in% c('N','R','A')) %>%
filter(!is.na(kickType)) %>%
select(gameId, playId, hangTime)
#load in extra play data and filter out OOB, Fakes, and Blocked punts
punts <- read.csv('plays.csv') %>%
filter(specialTeamsPlayType == 'Punt') %>%
select(gameId, playId, specialTeamsPlayType, specialTeamsResult) %>%
filter(!specialTeamsResult %in% c('Out of Bounds','Non-Special Teams Result','Blocked Punt'))
#This loads in all the angles
punt_angles <- c(2018:2020) %>%
list() %>%
pmap_dfr(function(x){
print(x)
#load year
read.csv(paste0('tracking',x,'.csv')) %>%
#we only care about what the ball does
filter(displayName == 'football') %>%
#join extra data
left_join(punts) %>%
#this filters out OOB, Fakes, Blocks
filter(!is.na(specialTeamsResult)) %>%
#We don't care about any of this info
select(-jerseyNumber:-team, -a, -o, -dir, -nflId, -time, -dis, -displayName) %>%
select(-specialTeamsPlayType) %>%
#Do cleaning for each play
group_by(gameId, playId) %>%
mutate(
#There's a bunch of plays where the snap isn't marked so we can't work from there
has_snap = ifelse('ball_snap' %in% event, 1, 0),
#using the kinematics assumption, this will be our kick speed
max_s = max(s),
#this lets us get the yardline the ball was snapped from that we can feed into our model
yardline =
case_when(
playDirection == 'left' ~ round(x[frameId == 1] - 10), #for communication purposes we round to closest integer
playDirection == 'right' ~ round(110 - x[frameId == 1]) #the rounding shouldn't make too much of a difference
)
) %>%
#keep only plays with snaps marked
filter(has_snap == 1) %>%
#don't need this info anymore
select(-has_snap) %>%
#get the frame of the snap
mutate(
snap_frame = frameId[event == 'ball_snap']
) %>%
#look at the frames after the marked snap where the ball isn't still
#this will make sure we're only looking at when the ball is actually snapped
filter(frameId >= snap_frame & s != 0) %>%
#since the ball is already moving by now the frame that the punter catches the snap will be
#the frame where the ball is basically at a standstill (minimum speed). We'll only look for
#this in the second following the snap of the ball
mutate(
snap_catch_frame = first(frameId[s == min(s[frameId <= snap_frame + 10])])
) %>%
#Now we only want to look at the frames after the snap was caught
filter(frameId >= snap_catch_frame) %>%
mutate(
#the maximum speed the ball reaches in the next two seconds after this catch we'll say was the moment
#the ball was kicked, per our basic kinematics assumptions
kick_frame = first(frameId[s == max(s[frameId <= snap_catch_frame + 20])]),
#this is for convenience
event = ifelse(frameId == kick_frame, 'KICK', event)
) %>%
ungroup() %>%
#now we don't care about this frames
select(-snap_frame, -snap_catch_frame, -kick_frame) %>%
#we only want when the ball is marked as kicked, when it hits the ground, or when it is caught
filter(event %in% c('KICK','punt_land','punt_received','fair_catch')) %>%
#sometimes there will be multiple of these in a play, so we'll only look at the first one
group_by(gameId, playId) %>%
mutate(
row_num = row_number()
) %>%
ungroup() %>%
#now we'll only be left with two rows: one when the ball was kicked, and one when the ball landed/was caught
filter(row_num <= 2) %>%
#add the PFF data
left_join(pff_dat) %>%
#puts everything in one row because this is how my brain works
group_by(gameId, playId) %>%
mutate(
land_x = lead(x),
land_y = lead(y)
) %>%
ungroup() %>%
#now we get one play on a single row
filter(row_num == 1) %>%
select(-row_num) %>%
#Just a series of intermediate steps to get the final solution of the launch angle
mutate(
dx = (land_x - x)^2,
dy = (land_y - y)^2,
s_2 = max_s^2,
vz = sqrt(s_2 - (1/hangTime)^2*(dx+dy)),
phi = (asin(vz/max_s))*(180/pi),
height = vz*(hangTime/2)*3 #this is just for fun we don't use it for anything
)
})
#Congrats! You read the cleaning and calculating part of the code! You get a fun fact.
#The fun fact is that if air resistance played no part in a punt's path, 75% of punts
#would hit the big board in Jerry World! Anyways,
#get the modelling factors ready
ml_fit <- punt_angles %>%
#5% of the remaining punts were weirdly marked/had technology malfunctions so we got rid of them
filter(!is.na(phi)) %>%
#this only keeps the variables we want and one hot encodes them (if that's the right term)
select(max_s, yardline, phi, specialTeamsResult) %>%
mutate(
res = case_when(
specialTeamsResult == 'Fair Catch' ~ 0,
specialTeamsResult == 'Downed' ~ 1,
specialTeamsResult == 'Return' ~ 2,
specialTeamsResult == 'Touchback' ~ 3
)
) %>%
select(-specialTeamsResult) %>%
#this takes out muffs, of which there were 1% of the remaining sample
filter(!is.na(res))
#here we run things through the model based on what I read from the tutorial
#on how to do GAM multinomial logisitic regression from the mgcv documentation
yardline <- ml_fit$yardline
phi <- ml_fit$phi
max_s <- ml_fit$max_s
res <- ml_fit$res
#plz let this work
#small note that this has to be a GAM since the relationships are all nonlinear
plz <- gam(list(res~s(yardline)+s(phi)+s(max_s),
~s(yardline)+s(phi)+s(max_s),
~s(yardline)+s(phi)+s(max_s)),
family=multinom(K=3))
#some quick model checks
plot(plz, pages = 1)
gam.check(plz)
#We only care about the relationships between the variables and aren't really trying to predict anything out of sample
#so we don't need to split into training and testing sets
#For the purposes of the viz we'll make some fake data that are well within the distributions of each variable
plot_gr <- expand.grid(yardline = seq(40, 99, length=60), phi=seq(40,60,length=60),max_s=seq(15,25,length=60))
pred_gr <- predict(plz, newdata = plot_gr, type = 'response')
#for the purposes of the viz we'll only look at the most likely outcome
pred_cat <- apply(pred_gr,1,function(x) which(max(x)==x)[1])-1
#this just puts everything into a dataframe because this is how my brain works
plot_data <- data.frame(
yardline = plot_gr$yardline,
max_s = plot_gr$max_s,
phi = plot_gr$phi,
y = pred_cat
) %>%
mutate(
res = case_when(
y == 0 ~ 'Fair Catch',
y == 1 ~ 'Downed',
y == 2 ~ 'Return',
y == 3 ~ 'Touchback'
)
)
#To make the shiny app as simple as possible we put it into a csv.
#This will not be included in the GitHub because it is large
write.csv(plot_data, 'plot_data.csv')
#This makes the punt outcomes viz
ggplot(plot_data %>% filter(yardline == 57))+
geom_tile(mapping = aes(x = phi, y = max_s, fill = res))+
scale_fill_manual(values = c('Return'='red','Fair Catch'='forestgreen','Touchback'='darkblue','Downed'='violet'))+
theme_minimal()+
theme(
panel.background = element_rect(color = 'transparent'),
plot.title = element_text(hjust = 0.5)
)+
labs(
x = 'Phi (deg)',
y = 'Punt Speed (Yds/sec)',
title = 'Likeliest Punt Outcomes at the -43 Yardline',
fill = 'Punt Outcome'
)
ggsave('outcome.png', height = 7, width = 13)
#this makes the phi vs P(Fair Catch) viz
punt_angles %>%
mutate(
fc = ifelse(specialTeamsResult == 'Fair Catch', 1, 0)
) %>%
ggplot()+
geom_smooth(mapping = aes(x = phi, y = fc))+
theme_minimal()+
theme(
panel.background = element_rect(color = 'transparent'),
plot.title = element_text(hjust = 0.5)
)+
labs(
x = 'Phi (deg)',
y = 'Probability of Fair Catch',
title = 'Probability of Fair Catch v Launch Angle Phi'
)
ggsave('fc_v_phi.png', height = 7, width = 13)
#This makes the phi vs hang time viz
punt_angles %>%
ggplot()+
geom_smooth(mapping = aes(x = phi, y = hangTime))+
theme_minimal()+
theme(
panel.background = element_rect(color = 'transparent'),
plot.title = element_text(hjust = 0.5)
)+
labs(
x = 'Phi (deg)',
y = 'Hang Time (s)',
title = 'Hang Time v Launch Angle Phi'
)
ggsave('ht_v_phi.png', height = 7, width = 13)