# expected hits version
library(shiny)
library(ggplot2)
library(dplyr)
library(readr)
library(stringr)
# data is dataset sc2021_ip3 located in
# data folder of package
# turn off warnings
options(warn=-1)
#scip <- read_csv("sc2021_ip3.csv")
#chadwick <- read_csv("chadwick.csv")
sc2021_ip3 %>%
group_by(batter) %>%
summarize(N = n()) %>%
filter(N >= 200) %>%
inner_join(chadwick, c("batter" = "key_mlbam")) %>%
mutate(Name = paste(name_first, name_last)) %>%
arrange(name_last) -> S1
##############################################
setup_data <- function(sc, pid){
sc %>%
filter(batter == pid) %>%
arrange(Game_Date, game_pk, at_bat_number)
}
#############################################
moving_average_plot <- function(d,
width,
name,
type = "BA"){
require(RcppRoll)
d %>%
filter(is.na(estimated_ba) == FALSE) -> d
d %>%
mutate(Name = name,
Width = width,
Type = type,
IP_number = 1:n()) -> d
if(type == "BA"){
d$Outcome <- as.numeric(d$estimated_ba)
}
if(type == "WOBA"){
d$Outcome <- as.numeric(d$estimated_woba)
}
d$Roll_BA <-
roll_mean(d$Outcome,
n = width,
align = "center", fill = NA)
d$Roll_IP_Number <-
roll_mean(d$IP_number,
n = width,
align = "center", fill = NA)
d$AVG <- mean(d$Outcome,
na.rm = TRUE)
d$BLUE <- round(sum(abs(d$Roll_BA - d$AVG),
na.rm = TRUE), 2)
subtitle <- paste("Width = ", width,
", BLUE = ", d$BLUE, sep = "")
the_title <- paste(name, type)
d <- select(d,
Name, Width, Type,
Game_Date, game_pk, events,
IP_number, Outcome,
Roll_IP_Number, Roll_BA, AVG, BLUE)
plot1 <- ggplot(d, aes(x = Roll_IP_Number,
ymax = Roll_BA,
ymin = AVG)) +
geom_ribbon(fill = "blue") +
ylab("Rolling BA") +
theme(text = element_text(size = 18)) +
labs(title = the_title,
subtitle = subtitle) +
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
plot.subtitle = element_text(color = "red",
size = 16,
hjust = 0.5, vjust = 0.8, angle = 0))
if(type == "BA"){
plot1 <- plot1 + ylab("Rolling BA")
}
if(type == "WOBA"){
plot1 <- plot1 + ylab("Rolling WOBA")
}
list(plot1 = plot1, S = d)
}
#############################################
get_id <- function(st){
names <- str_to_lower(unlist(str_split(str_squish(st), " ")))
if(length(names) == 3){
names <- c(paste(names[1], names[2]), names[3])
}
chadwick %>%
mutate(fname = str_to_lower(name_first),
lname = str_to_lower(name_last),
Name = paste(name_first,
name_last)) %>%
filter(fname == names[1],
lname == names[2]) %>%
select(key_mlbam, Name)
}
#############################################
moving_average_sim <- function(d, width,
ITER = 500,
name = "",
type = "BA"){
require(RcppRoll)
d %>%
filter(is.na(estimated_ba) == FALSE) -> d
d %>%
select(Game_Date, game_pk, events,
estimated_ba,
estimated_woba) %>%
mutate(IP_number = 1:n()) -> d
if(type == "BA"){
d$Outcome <- d$estimated_ba
}
if(type == "WOBA"){
d$Outcome <- d$estimated_woba
}
d$Roll_IP_Number <-
roll_mean(d$IP_number,
n = width,
align = "center", fill = NA)
d$AVG <- mean(d$Outcome,
na.rm = TRUE)
one_sim <- function(){
mixed_d <- slice(d, sample(1:n()))
mixed_roll <- roll_mean(mixed_d$Outcome,
n = width,
align = "center", fill = NA)
sum(abs(mixed_roll - d$AVG), na.rm = TRUE)
}
Simulated <- replicate(ITER, one_sim())
d$Roll_BA <-
roll_mean(d$Outcome,
n = width,
align = "center", fill = NA)
Observed <- sum(abs(d$Roll_BA - d$AVG),
na.rm = TRUE)
P_Value <- mean(Simulated >= Observed)
d_out <- data.frame(Simulated = Simulated,
Observed = Observed)
the_title <- paste("Simulated BLUE", name, type)
subtitle <- paste("Width = ", width,
", Obs BLUE = ",
round(Observed, 2),
", Tail-Prob = ", P_Value,
sep = "")
ggplot(d_out, aes(Simulated)) +
geom_histogram(bins = 10,
fill = "plum4",
color = "white") +
geom_vline(aes(xintercept = Observed),
size = 2, color = "red") +
theme(text = element_text(size = 18)) +
labs(title = the_title,
subtitle = subtitle) +
xlab("Simulated BLUE Measure") +
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
plot.subtitle = element_text(color = "red",
size = 16,
hjust = 0.5, vjust = 0.8, angle = 0))
}
# shiny app
ui <- fluidPage(
theme = bslib::bs_theme(version = 4,
bootswatch = "superhero"),
h2("Streakiness in In-Play Batting Performance"),
fluidRow(
column(4, wellPanel(
# textInput("player_name", "2021 Batter Name:",
# value = "Bryce Harper"),
selectInput("player_name",
"Select 2021 Batter (at Least 200 BIP):",
S1$Name,
selected = "Bryce Harper"),
radioButtons("type", "Select Measure:",
c("BA", "WOBA"),
"BA", inline = TRUE),
sliderInput("width", "Width for Moving Average:",
min = 1, max = 50,
value = 20,
animate = FALSE),
downloadButton("downloadData", "Download Data")
)),
column(8,
tabsetPanel(type = "tabs",
tabPanel("Observed",
plotOutput("plot1",
height = "500px")
),
tabPanel("Simulated",
plotOutput("plot2",
height = "500px")
),
tabPanel("Description",
hr(),
p("This app displays moving averages
of in-play batting data for any
2021 batter of interest."),
p("One inputs the batter player,
the measure (either BA
or wOBA) and the width
for the moving average. These measures
are estimated values of BA and wOBA
based on the
launch angle and exit velocity
measurements."),
p("The Observed tab displays a graph of
the moving average against the in-play
number. The shaded region shows the
deviations of the moving average from
the overall average. The BLUE statistic
is the area of the shaded region and
measures the streakiness of the
hitting data."),
p("The Simulation tab shows results of
a simulation to assess the significance
of the observed streakiness. One
randomly permutes the measure values,
finds all the moving averages, and
computes the BLUE statistic.
One repeats this exercise 500 times and
collects the values of BLUE. A
histogram of the BLUE values is shown.
and the observed BLUE is shown as a
vertical line. The tail probability is
the probability the simulated BLUE is
at least as large as the observed value.
A small tail probablity indicates there
is more streakiness in the data than
one would anticipate by chance.")
)
)
)
)
)
server <- function(input, output, session) {
output$plot1 <- renderPlot({
id_info <- get_id(input$player_name)
# check for duplicate names
inner_join(id_info, sc2021_ip3,
by = c("key_mlbam" = "batter")) %>%
summarize(ID = first(key_mlbam)) %>% pull() -> ID
d <- setup_data(sc2021_ip3, ID)
moving_average_plot(d,
width = input$width,
name = input$player_name,
type = input$type)$plot1
}, res = 96)
output$plot2 <- renderPlot({
id_info <- get_id(input$player_name)
# check for duplicate names
inner_join(id_info, sc2021_ip3,
by = c("key_mlbam" = "batter")) %>%
summarize(ID = first(key_mlbam)) %>% pull() -> ID
d <- setup_data(sc2021_ip3, ID)
moving_average_sim(d,
width = input$width,
name = input$player_name,
type = input$type)
}, res = 96)
output$downloadData <- downloadHandler(
filename = "streak_output.csv",
content = function(file) {
id_info <- get_id(input$player_name)
# check for duplicate names
inner_join(id_info, sc2021_ip3,
by = c("key_mlbam" = "batter")) %>%
summarize(ID = first(key_mlbam)) %>% pull() -> ID
d <- setup_data(sc2021_ip3, ID)
out <- moving_average_plot(d,
width = input$width,
input$player_name,
type = input$type)
write.csv(out$S, file, row.names = FALSE)
}
)
}
shinyApp(ui = ui, server = server)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.