library(shiny)
library(ggplot2)
library(dplyr)
library(stringr)
library(Lahman)
library(BayesTestStreak)
# data is dataset retro2019 located in
# data folder of ShinyBaseball package
# turn off warnings
options(warn=-1)
retro2019 %>%
group_by(BAT_ID) %>%
summarize(AB = sum(AB_FL)) %>%
filter(AB >= 200) %>%
inner_join(People, c("BAT_ID" = "retroID")) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
arrange(nameLast) %>%
dplyr::select(Name, BAT_ID) -> S1
#############################################
bayes_factor_app <- function(retrodata,
retroid,
type = "H"){
require(Lahman)
filter(People, retroID == retroid) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
pull(Name) -> name
retrodata %>%
filter(BAT_ID == retroid) %>%
arrange(GAME_ID, INN_CT) %>%
mutate(Type = type,
AB_number = row_number()) -> d
if(type == "H"){
d$Outcome <- ifelse(d$EVENT_CD %in% 20:23, 1, 0)
}
if(type == "HR"){
d$Outcome <- ifelse(d$EVENT_CD == 23, 1, 0)
}
if(type == "not SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 0, 1)
}
if(type == "SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 1, 0)
}
bf_out <- bayes_factor_logK(d)
max_bf <- summarize(bf_out,
M = max(log_BF)) %>%
pull(M)
ggplot(bf_out,
aes(log_K, log_BF)) +
geom_line(linewidth = 2) +
geom_hline(yintercept = 0,
color = "red") +
theme(axis.text = element_text(size = rel(1.5))) +
theme(axis.title = element_text(size = rel(1.5))) +
xlab("log K") +
ylab("log Bayes Factor") +
labs(title = paste(name, type),
subtitle = paste("Max log Bayes Factor =",
round(max_bf, 2))) +
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
plot.subtitle = element_text(colour = "red",
size = 16,
hjust = 0.5, vjust = 0.8, angle = 0))
}
spacings_sim <- function(retrodata,
retroid,
type = "H",
iter = 100){
find_spacings_geometric <- function(streak_data) {
# input is a retrosheet data frame with
# Outcome variable
n <- dim(streak_data)[1]
ab_success <- c((1:n)[streak_data$Outcome == 1],
n + 1)
df <- data.frame(Spacing =
diff(c(0, ab_success)) - 1)
df %>%
mutate(N = row_number(),
BAT_ID = streak_data$BAT_ID[1]) %>%
dplyr::select(BAT_ID, N, Spacing)
}
require(Lahman)
filter(People, retroID == retroid) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
pull(Name) -> name
retrodata %>%
filter(BAT_ID == retroid) %>%
arrange(GAME_ID, INN_CT) %>%
mutate(Type = type,
AB_number = row_number()) -> d
if(type == "H"){
d$Outcome <- ifelse(d$EVENT_CD %in% 20:23, 1, 0)
}
if(type == "HR"){
d$Outcome <- ifelse(d$EVENT_CD == 23, 1, 0)
}
if(type == "not SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 0, 1)
}
if(type == "SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 1, 0)
}
find_spacings_geometric(d) %>%
summarize(Obs = sum(Spacing ^ 2)) %>%
pull() -> Observed
one_sim <- function(d){
d$Outcome <- sample(d$Outcome)
find_spacings_geometric(d) %>%
summarize(Obs = sum(Spacing ^ 2)) %>%
pull()
}
S_data <- data.frame(Statistic =
replicate(iter, one_sim(d)))
p_value <- mean(S_data$Statistic >= Observed)
ggplot(S_data,
aes(Statistic)) +
geom_histogram(color = "white",
fill = "tan",
bins = 10) +
geom_vline(xintercept = Observed, linewidth = 2,
color = "red") +
theme(axis.text = element_text(size = rel(1.5))) +
theme(axis.title = element_text(size = rel(1.5))) +
xlab("Clumpiness Statistic") +
labs(title = paste(name, type),
subtitle = paste("P-Value =", p_value)) +
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
plot.subtitle = element_text(colour = "red",
size = 16,
hjust = 0.5, vjust = 0.8, angle = 0))
}
geometric_plot_app <- function(retrodata,
retroid,
type = "H"){
find_spacings_geometric <- function(streak_data) {
# input is a retrosheet data frame with
# Outcome variable
n <- dim(streak_data)[1]
ab_success <- c((1:n)[streak_data$Outcome == 1],
n + 1)
df <- data.frame(Spacing =
diff(c(0, ab_success)) - 1)
df %>%
mutate(N = row_number(),
BAT_ID = streak_data$BAT_ID[1]) %>%
dplyr::select(BAT_ID, N, Spacing)
}
require(Lahman)
filter(People, retroID == retroid) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
pull(Name) -> name
retrodata %>%
filter(BAT_ID == retroid) %>%
arrange(GAME_ID, INN_CT) %>%
mutate(Type = type,
AB_number = row_number()) -> d
if(type == "H"){
d$Outcome <- ifelse(d$EVENT_CD %in% 20:23, 1, 0)
}
if(type == "HR"){
d$Outcome <- ifelse(d$EVENT_CD == 23, 1, 0)
}
if(type == "not SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 0, 1)
}
if(type == "SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 1, 0)
}
spacings_df <- find_spacings_geometric(d)
spacings_df %>%
group_by(Spacing) %>%
summarize(N = n()) -> spacings_table
ggplot(spacings_table,
aes(Spacing, log(N))) +
geom_point(size=3) +
stat_smooth(method=lm,
se=FALSE,
formula = "y ~ x",
linewidth=1,
color="black") +
stat_smooth(method=loess,
se=FALSE,
formula = "y ~ x",
linewidth=2) +
theme_minimal() +
theme(axis.text = element_text(size = rel(1.5))) +
theme(axis.title = element_text(size = rel(1.5))) +
xlab("Spacing") +
ylab("Log Frequency") +
ggtitle(paste(name, type)) +
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
}
moving_average_plot <- function(retrodata,
width,
retroid,
type = "H"){
require(RcppRoll)
require(Lahman)
filter(People, retroID == retroid) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
pull(Name) -> name
retrodata %>%
filter(BAT_ID == retroid) %>%
arrange(GAME_ID, INN_CT) %>%
mutate(Width = width,
Type = type,
AB_number = row_number()) -> d
if(type == "H"){
d$Outcome <- ifelse(d$EVENT_CD %in% 20:23, 1, 0)
}
if(type == "HR"){
d$Outcome <- ifelse(d$EVENT_CD == 23, 1, 0)
}
if(type == "not SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 0, 1)
}
if(type == "SO"){
d$Outcome <- ifelse(d$EVENT_CD == 3, 1, 0)
}
d$Roll_BA <-
roll_mean(d$Outcome,
n = width,
align = "center", fill = NA)
d$Roll_AB_Number <-
roll_mean(d$AB_number,
n = width,
align = "center", fill = NA)
d$AVG <- mean(d$Outcome,
na.rm = TRUE)
subtitle <- paste("Width = ", width)
the_title <- paste(name, type)
d <- dplyr::select(d,
BAT_ID, Width, Type,
GAME_ID, INN_CT, EVENT_CD,
AB_number, Outcome,
Roll_AB_Number, Roll_BA, AVG)
plot1 <- ggplot(d, aes(x = Roll_AB_Number,
ymax = Roll_BA,
ymin = AVG)) +
geom_ribbon(fill = "blue") +
ylab("Rolling BA") +
theme(text = element_text(size = 18)) +
labs(title = the_title,
subtitle = subtitle) +
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
plot.subtitle = element_text(color = "red",
size = 16,
hjust = 0.5, vjust = 0.8, angle = 0))
if(type == "H"){
plot1 <- plot1 + ylab("Rolling BA")
}
if(type == "HR"){
plot1 <- plot1 + ylab("Rolling HR Rate")
}
if(type == "not SO"){
plot1 <- plot1 + ylab("Rolling not SO Rate")
}
if(type == "SO"){
plot1 <- plot1 + ylab("Rolling SO Rate")
}
list(plot1 = plot1, S = d)
}
#############################################
# shiny app
ui <- fluidPage(
theme = bslib::bs_theme(version = 4,
bootswatch = "superhero"),
h2("Streakiness in At-Bat Batting Performance"),
fluidRow(
column(4, wellPanel(
selectInput("player_name",
"Select 2021 Batter (at Least 200 AB):",
S1$Name,
selected = "Bryce Harper"),
radioButtons("type", "Select Measure:",
c("H", "HR", "SO", "not SO"),
"H", inline = TRUE),
sliderInput("width", "Width for Moving Average:",
min = 1, max = 80,
value = 20,
animate = FALSE),
downloadButton("downloadData", "Download Data")
)),
column(8,
tabsetPanel(type = "tabs",
tabPanel("Moving Averages",
plotOutput("plot1",
height = "500px")
),
tabPanel("Geometric Plot",
plotOutput("plot2",
height = "500px")
),
tabPanel("Simulation",
plotOutput("plot3",
height = "500px")
),
tabPanel("Bayes Factor",
plotOutput("plot4",
height = "500px")
),
tabPanel("Description",
hr(),
p("This app displays moving averages
and geometric plots of batting data for any
2021 batter of interest."),
p("One inputs the batter player,
the measure (either H, HR, SO,
or not SO) and the width
for the moving average."),
p("The Moving Average tab displays a graph of
the moving average against the at-bat
number. The shaded region shows the
deviations of the moving average from
the overall average."),
p("The Geometric Plot tab constructs a
geometric plot of the spacings between
successes. If the points follow a line
the spacings are approximately
Geometric distributed."),
p("The Simulation tab shows a histogram
of a clumpiness measure when the
outcomes are randomly permuted.")
)
)
)
)
)
server <- function(input, output, session) {
output$plot1 <- renderPlot({
player_id <- filter(S1,
Name == input$player_name) %>%
pull(BAT_ID)
moving_average_plot(filter(retro2019,
AB_FL == TRUE),
width = input$width,
retroid = player_id,
type = input$type)$plot1
}, res = 96)
output$plot2 <- renderPlot({
player_id <- filter(S1,
Name == input$player_name) %>%
pull(BAT_ID)
geometric_plot_app(filter(retro2019,
AB_FL == TRUE),
retroid = player_id,
type = input$type)
}, res = 96)
output$plot3 <- renderPlot({
player_id <- filter(S1,
Name == input$player_name) %>%
pull(BAT_ID)
spacings_sim(filter(retro2019,
AB_FL == TRUE),
retroid = player_id,
type = input$type)
}, res = 96)
output$plot4 <- renderPlot({
player_id <- filter(S1,
Name == input$player_name) %>%
pull(BAT_ID)
bayes_factor_app(filter(retro2019,
AB_FL == TRUE),
retroid = player_id,
type = input$type)
}, res = 96)
output$downloadData <- downloadHandler(
filename = "streak_output.csv",
content = function(file) {
player_id <- filter(S1,
Name == input$player_name) %>%
pull(BAT_ID)
out <- moving_average_plot(
filter(retro2019, AB_FL == TRUE),
width = input$width,
retroid = player_id,
type = input$type)
write.csv(out$S, file, row.names = FALSE)
}
)
}
shinyApp(ui = ui, server = server)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.