inst/shiny-examples/LogitHitRates/app.R

library(shiny)
library(ggplot2)
library(dplyr)
library(readr)
library(lubridate)
library(gridExtra)
library(scales)

# data is read from Github repository

# turn off warnings
options(warn=-1)

data_work <- function(){
  require(readr)
  require(dplyr)
  require(lubridate)

  sc_2021 <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/statcast2021.csv")
  sc_2022 <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/statcast_2022.csv")
  sc_2023 <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/statcast_2023.csv")
  sc_old <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/SC_BB_mini.csv")

  names(sc_old)[2] <- "Game_Date"

  hits <- c("single", "double", "triple",
            "home_run")
  sc_2021 %>%
    mutate(HR = ifelse(events == "home_run", 1, 0),
           H = ifelse(events %in% hits, 1, 0)) %>%
    select(game_year, Game_Date, launch_angle,
           launch_speed, events, HR, H) -> sc_2021
  sc_2022 %>%
    mutate(HR = ifelse(events == "home_run", 1, 0),
           H = ifelse(events %in% hits, 1, 0))  %>%
    select(game_year, Game_Date, launch_angle,
           launch_speed, events, HR, H) ->
    sc_2022
  sc_2023 %>%
    mutate(HR = ifelse(events == "home_run", 1, 0),
           H = ifelse(events %in% hits, 1, 0))  %>%
    select(game_year, Game_Date, launch_angle,
           launch_speed, events, HR, H) ->
    sc_2023
  sc <- rbind(sc_old, sc_2021, sc_2022, sc_2023)

  sc %>%
    mutate(Season = year(Game_Date))
}

logit_work <- function(sc, LA_breaks, LS_breaks,
                       season1, season2){
  library(dplyr)
  library(ggplot2)
  library(readr)
  library(lubridate)
  library(stringr)

  date2020_1 <- ""
  date2020_2 <- ""
  if(season1 == 2022) {
    date2020_1 <- sc %>%
      filter(game_year == 2022) %>%
      summarize(max(Game_Date)) %>%
      pull()
    date2020_1 <- paste(" through ", date2020_1)
  }
  if(season2 == 2022) {
    date2020_2 <- sc %>%
      filter(game_year == 2022) %>%
      summarize(max(Game_Date)) %>%
      pull()
    date2020_2 <- paste(" through ", date2020_2)
  }

  # some helper functions

  increasefont <- function (){
    theme(text = element_text(size = 16))
  }
  centertitle <- function (){
    theme(plot.title = element_text(
      colour = "white", size = 14,
      face = "bold",
      hjust = 0.5, vjust = 0.8, angle = 0),
      plot.subtitle = element_text(
        colour = "white", size = 12,
        face = "bold",
        hjust = 0.5, vjust = 0.8, angle = 0))
  }

  sc %>%
    mutate(LA = cut(launch_angle,
                    LA_breaks),
           LS = cut(launch_speed,
                    LS_breaks)) -> sc

  sc %>%
    filter(is.na(LA) == FALSE,
           is.na(LS) == FALSE) %>%
    group_by(Season, LA, LS) %>%
    summarize(N = n(),
              HR = sum(HR),
              H = sum(H),
              H_HR = H - HR,
              N_HR = N - HR,
              .groups = "drop") -> S

  sc %>%
    group_by(Season) %>%
    summarize(IP = n(),
              total_HR = sum(HR),
              IP_HR = IP - total_HR) -> S1

  inner_join(S, S1, by = "Season") -> S

  convert_string <- function(y){
    y1 <- gsub("[,(]", " ", y)
    y2 <- gsub("[][]", "", y1)
    y3 <- gsub("^ ", "", y2)
    mean(as.numeric(str_split(y3, " ")[[1]]))
  }

  S$la <- sapply(S$LA, convert_string)
  S$ls <- sapply(S$LS, convert_string)

  ###################### compare logits work

  S %>%
    mutate(p_inplay = N_HR / IP_HR,
           p_hit = H_HR / N_HR,
           logit_inplay = log(p_inplay) -
             log(1 - p_inplay),
           logit_hit = log(p_hit) -
             log(1 - p_hit)) -> S

  S %>%
    filter(Season == season1) %>%
    select(Season, la, ls,
           IP_HR, N_HR, H_HR,
           p_inplay, p_hit,
           logit_inplay, logit_hit) -> S1
  S %>%
    filter(Season == season2) %>%
    select(Season, la, ls,
           IP_HR, N_HR, H_HR,
           p_inplay, p_hit,
           logit_inplay, logit_hit) -> S2

  S12 <- inner_join(S1, S2,
                    by = c("la", "ls")) %>%
    mutate(diff_inplay = logit_inplay.y -
             logit_inplay.x,
           diff_hit = logit_hit.y -
             logit_hit.x,
           Z_inplay = diff_inplay /
             sqrt(1 / IP_HR.x / p_inplay.x / (1 - p_inplay.x) +
                  1 / IP_HR.y / p_inplay.y / (1 - p_inplay.y)),
           Z_hit = diff_hit /
             sqrt(1 / N_HR.x / p_hit.x / (1 - p_hit.x) +
                  1 / N_HR.y / p_hit.y / (1 - p_hit.y)))

  the_title = paste("Logit(", season2,
                    ") Minus Logit(", season1,
                    ")", sep = "")
  the_title2 = paste("Z Score comparing ", season2,
                    " and ", season1,
                    sep = "")

  xlim_lo <- min(LA_breaks) - diff(LA_breaks)[1] / 4
  xlim_hi <- max(LA_breaks) + diff(LA_breaks)[1] / 4
  ylim_lo <- min(LS_breaks) - diff(LS_breaks)[1] / 4
  ylim_hi <- max(LS_breaks) + diff(LS_breaks)[1] / 4

  plot5 <- ggplot(S1, aes(la, ls,
                  label = N_HR)) +
    geom_label(size = 6,
               fill = "red",
               color = "white") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    labs(title = paste(season1, "In-Play (not HR) Counts",
                       date2020_1),
         subtitle = paste("Total In-Play =",
                          comma(S1$IP_HR[1]))) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))

  plot6 <- ggplot(S1, aes(la, ls,
                          label = H_HR)) +
    geom_label(size = 6,
               fill = "red",
               color = "white") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    ggtitle(paste(season1, "Hit (not HR) Counts",
                  date2020_1)) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))

  plot7 <- ggplot(S2, aes(la, ls,
                          label = N_HR)) +
    geom_label(size = 6,
               fill = "red",
               color = "white") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    labs(title = paste(season2, "In-Play (not HR) Counts",
                       date2020_2),
         subtitle = paste("Total In-Play =",
                          comma(S2$IP_HR[1]))) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))

  plot8 <- ggplot(S2, aes(la, ls,
                          label = H_HR)) +
    geom_label(size = 6,
               fill = "red",
               color = "white") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    ggtitle(paste(season2, "Hit (not HR) Counts",
                  date2020_2)) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))

  plot1 <- ggplot(S12, aes(la, ls,
                           label = round(diff_inplay, 2))) +
    geom_label(size = 6,
               aes(fill = diff_inplay > 0),
               color = "white") +
    theme(legend.position = "none") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    ggtitle(paste("In-Play Rates:", the_title)) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))

  plot2 <- ggplot(S12, aes(la, ls,
                           label = round(diff_hit, 2))) +
    geom_label(size = 6,
               aes(fill = diff_hit > 0),
               color = "white") +
    theme(legend.position = "none") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    ggtitle(paste("Hit Rates:", the_title)) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))

  plot3 <- ggplot(S12, aes(la, ls,
                           label = round(Z_inplay, 2))) +
    geom_label(size = 6,
               aes(fill = Z_inplay > 0),
               color = "white") +
    theme(legend.position = "none") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    ggtitle(paste("In-Play Rates:", the_title2)) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))

  plot4 <- ggplot(S12, aes(la, ls,
                           label = round(Z_hit, 2))) +
    geom_label(size = 6,
               aes(fill = Z_hit > 0),
               color = "white") +
    theme(legend.position = "none") +
    xlim(xlim_lo, xlim_hi) +
    ylim(ylim_lo, ylim_hi) +
    ggtitle(paste("Hit Rates:", the_title2)) +
    centertitle() +
    increasefont() +
    xlab("Launch Angle") +
    ylab("Launch Speed") +
    geom_vline(xintercept = LA_breaks,
               color = "blue") +
    geom_hline(yintercept = LS_breaks,
               color = "blue") +
    scale_fill_manual(values =
                        c("darkorange2",
                          "dodgerblue")) +
    theme(plot.background = element_rect(fill = "grey25"),
          axis.text = element_text(color = "white"),
          axis.title = element_text(color = "white")) +
    theme(
      panel.background = element_rect(fill = "bisque",
                                      colour = "grey"))


 # M_inplay <- apply(matrix(round(S12$diff_inplay, 2),
 #                          length(LS_breaks) - 1,
 #                          length(LA_breaks) - 1),
 #                   2, rev)
 # dimnames(M_inplay)[[1]] <-
 #  (rev(LS_breaks) - diff(LS_breaks)[1] / 2)[-1]
 # dimnames(M_inplay)[[2]] <-
 #   (LA_breaks - diff(LA_breaks)[1] / 2)[-1]

  #M_hr <- apply(matrix(round(S12$diff_hr, 2),
  #                     length(LS_breaks) - 1,
  #                     length(LA_breaks) - 1),
  #              2, rev)
  #dimnames(M_hr)[[1]] <-
  #  (rev(LS_breaks) - diff(LS_breaks)[1] / 2)[-1]
  #dimnames(M_hr)[[2]] <-
  #  (LA_breaks - diff(LA_breaks)[1] / 2)[-1]

  list(S = S12,
       plot1 = plot1,
       plot2 = plot2,
       plot3 = plot3,
       plot4 = plot4,
       plot5 = plot5,
       plot6 = plot6,
       plot7 = plot7,
       plot8 = plot8)
}

# read in statcast dataset
sc <- data_work()

# shiny app
ui <- fluidPage(
  theme = bslib::bs_theme(version = 4,
                          bootswatch = "superhero"),
  fluidRow(
    column(4, wellPanel(
      h5("Hit Rates on Balls in Play"),
      radioButtons("year1",
                   label = "Select First Season:",
                   choices = c("2015", "2016", "2017",
                             "2018", "2019", "2020",
                             "2021", "2022", "2023"),
                           selected = "2022",
                           inline = TRUE),
      radioButtons("year2",
                   label = "Select Second Season:",
                   choices = c("2015", "2016", "2017",
                               "2018", "2019", "2020",
                               "2021", "2022", "2023"),
                   selected = "2023",
                   inline = TRUE),
      sliderInput("rX", "Range of Launch Angle:",
                  min = -20, max = 50,
                  value = c(20, 40)),
      numericInput("nX",
                  "Number of Groups for Launch Angle:",
                  value = 4, step = 1),
      sliderInput("rY", "Range of Launch Speed:",
                  min = 80, max = 120,
                  value = c(95, 110)),
      numericInput("nY",
                  "Number of Groups for Launch Speed:",
                   value = 3, step = 1),
      downloadButton("downloadData", "Download Rates")
    )),
    column(8,
           tabsetPanel(type = "tabs",
                       tabPanel("First Season",
                                plotOutput("plot1a",
                                           height = "670px")
                       ),
                       tabPanel("Second Season",
                                plotOutput("plot1b",
                                           height = "670px")
                       ),
                       tabPanel("Difference in Logits",
                                plotOutput("plot1",
                                           height = "670px")
                       ),
                       tabPanel("Z-Score",
                         plotOutput("plot2",
                         height = "670px")
           ))
      )
      )
)

server <- function(input, output, session) {
  output$plot1a <- renderPlot({
    step_LA <- diff(input$rX) / input$nX
    step_LS <- diff(input$rY) / input$nY
    LA_breaks <- seq(input$rX[1], input$rX[2],
                     by = step_LA)
    LS_breaks <- seq(input$rY[1], input$rY[2],
                     by = step_LS)
    out1 <- logit_work(sc, LA_breaks, LS_breaks,
                       as.numeric(input$year1),
                       as.numeric(input$year2))
    grid.arrange(out1$plot5,
                 out1$plot6)
  }, res = 96)

  output$plot1b <- renderPlot({
    step_LA <- diff(input$rX) / input$nX
    step_LS <- diff(input$rY) / input$nY
    LA_breaks <- seq(input$rX[1], input$rX[2],
                     by = step_LA)
    LS_breaks <- seq(input$rY[1], input$rY[2],
                     by = step_LS)
    out1 <- logit_work(sc, LA_breaks, LS_breaks,
                       as.numeric(input$year1),
                       as.numeric(input$year2))
    grid.arrange(out1$plot7,
                 out1$plot8)
  }, res = 96)

  output$plot1 <- renderPlot({
    step_LA <- diff(input$rX) / input$nX
    step_LS <- diff(input$rY) / input$nY
    LA_breaks <- seq(input$rX[1], input$rX[2],
                     by = step_LA)
    LS_breaks <- seq(input$rY[1], input$rY[2],
                     by = step_LS)
    out1 <- logit_work(sc, LA_breaks, LS_breaks,
                       as.numeric(input$year1),
                       as.numeric(input$year2))
    grid.arrange(out1$plot1,
                 out1$plot2)
  }, res = 96)

  output$plot2 <- renderPlot({
    step_LA <- diff(input$rX) / input$nX
    step_LS <- diff(input$rY) / input$nY
    LA_breaks <- seq(input$rX[1], input$rX[2],
                     by = step_LA)
    LS_breaks <- seq(input$rY[1], input$rY[2],
                     by = step_LS)
    out1 <- logit_work(sc, LA_breaks, LS_breaks,
                       as.numeric(input$year1),
                       as.numeric(input$year2))
    grid.arrange(out1$plot3,
                 out1$plot4)
  }, res = 96)

  output$downloadData <- downloadHandler(
    filename = "rates_output.csv",
    content = function(file) {
      step_LA <- diff(input$rX) / input$nX
      step_LS <- diff(input$rY) / input$nY
      LA_breaks <- seq(input$rX[1], input$rX[2],
                       by = step_LA)
      LS_breaks <- seq(input$rY[1], input$rY[2],
                       by = step_LS)
      out <- logit_work(sc, LA_breaks, LS_breaks,
                         as.numeric(input$year1),
                         as.numeric(input$year2))
      write.csv(out$S, file, row.names = FALSE)
    }
  )
}

shinyApp(ui = ui, server = server)
bayesball/ShinyBaseball documentation built on March 26, 2024, 9:26 a.m.