R/do_four_factors_df.R

Defines functions do_four_factors_df

Documented in do_four_factors_df

#' Four factors for teams
#' 
#' @aliases do_four_factors_df
#'
#' @description 
#' This function computes team's offense and defense four factors.
#' 
#' The four factors are the effective field goal percentage (EFGP), 
#' the turnover percentage (TOVP), the offensive rebound percentage (ORBP) 
#' and the free throws rate (FTRate). They are well defined at 
#' \url{http://www.rawbw.com/~deano/articles/20040601_roboscout.htm} and
#' \url{https://www.basketball-reference.com/about/factors.html}.
#' 
#' As a summary, EFGP is a measure of shooting efficiency; TOVP is
#' the percentage of possessions where the team missed the ball, see
#' \url{https://www.nba.com/thunder/news/stats101.html} to read about 
#' the 0.44 coefficient; ORBP measures how many rebounds were offensive 
#' from the total of available rebounds, and FTRate is a measure of 
#' how often a team gets to the line.
#' 
#' @usage 
#' do_four_factors_df(df_games, teams, data_team_reb_tov)
#' 
#' @param df_games Data frame with the games, players info, advanced stats and
#' eventually recoded teams names.
#' @param teams Teams names.
#' @param data_team_reb_tov Additional data with rebounds and turnovers 
#' directly assigned to teams. Nothing to do if NULL.
#' 
#' @details 
#' Instead of defining the Offensive and Defensive Rebound Percentage
#' as mentioned in the previous links, I have computed just the Offensive
#' Rebound Percentage for the team and for its rivals. This makes easier
#' to have four facets, one per factor, in the ggplot.
#' 
#' In order to establish the team rankings, we have to consider these facts:
#' In defense (accumulated statistics of the opponent teams to the team of interest), 
#' the best team in each factor is the one that allows the smallest EFGP, the biggest TOVP, 
#' the smallest ORBP and the smallest FTRate, respectively.
#' 
#' In offense (accumulated statistics of the team of interest), the best team in each factor 
#' is the one that has the biggest EFGP, the smallest TOVP, 
#' the biggest ORBP and the biggest FTRate, respectively. 
#' 
#' @return 
#' A list with two data frames, \code{df_rank} and \code{df_no_rank}. 
#' Both have the same columns:
#' \itemize{
#' \item Team: Team name.
#' \item Type: Either Defense or Offense.
#' \item EFGP, ORBP, TOVP and FTRate.
#' }
#' 
#' The \code{df_rank} data frame contains the team ranking label for 
#' each statistic between parentheses. Therefore, \code{df_no_rank} is used
#' to create the ggplot with the numerical values and \code{df_rank} is
#' used to add the ranking labels.
#' 
#' @author 
#' Guillermo Vinue
#' 
#' @seealso 
#' \code{\link{get_four_factors_plot}} 
#' 
#' @examples 
#' df <- do_join_games_bio("ACB", acb_games_1718, acb_players_1718)
#' 
#' df1 <- do_add_adv_stats(df)
#' 
#' # When only one team is selected the rankings between parentheses
#' # do not reflect the real rankings regarding all the league teams.
#' # The rankings are computed with respect to the number of teams 
#' # passed as an argument.
#' df_four_factors <- do_four_factors_df(df1, "Valencia", NULL)
#' 
#' @importFrom dplyr summarise bind_rows
#'
#' @export

do_four_factors_df <- function(df_games, teams, data_team_reb_tov) {
  GameID <- Day <- Game <- Team <- Player.x <- FG <- FGA <- ThreeP <- FT <- FTA <- NULL 
  DRB <- ORB <- TOV <- Type <- EFGP <- TOVP <- ORBP <- FTRate <- NULL
  period <- day <- game_code <- team <- drb <- orb <- tov <- NULL

  if (!is.null(data_team_reb_tov)) {
    data_add <- data_team_reb_tov %>%
      filter(period == "All") %>%
      select(Day = day, game_code, Team = team, DRB = drb, ORB = orb, TOV = tov) %>%
      mutate(Player.x = "Equipo", FG = 0, FGA = 0, ThreeP = 0, FT = 0, FTA = 0, .after = Team) 
  }else{
    data_add <- NULL
  }
  
  df5 <- data.frame()
  for (i in teams) {
    team_Game <- unique(df_games$Game[df_games$Team == i])
    
    if (!is.null(data_add)) {
      df2_0 <- df_games %>%
        filter(Game %in% team_Game) %>%
        select(Day, game_code, Team, Player.x, FG, FGA, ThreeP, FT, FTA, DRB, ORB, TOV) 
    
      data_add_0 <- data_add %>%
        filter(game_code %in% unique(df2_0$game_code))
      
      df2_1 <- bind_rows(df2_0, data_add_0) 
    }else{
      df2_1 <- df_games %>%
        filter(Game %in% team_Game) %>%
        select(Day, Game, Team, Player.x, FG, FGA, ThreeP, FT, FTA, DRB, ORB, TOV)
    }
    
    df2 <- df2_1 %>%
      group_by(Team) %>%
      mutate(Type = ifelse(Team == i, "Offense", "Defense")) %>%
      ungroup()
    
    df3 <- df2 %>%
      group_by(Type) %>%
      summarise(EFGP = (sum(FG) + 0.5 * sum(ThreeP)) / sum(FGA),
                TOVP = sum(TOV) / (sum(FGA) + 0.44 * sum(FTA) + sum(TOV)),
                ORB = sum(ORB),
                DRB = sum(DRB), 
                ORBP = NA,
                FTRate = sum(FTA) / sum(FGA)) %>%
      ungroup()
    df3$ORBP[1] <- df3$ORB[1] / (df3$ORB[1] + df3$DRB[2])
    df3$ORBP[2] <- df3$ORB[2] / (df3$ORB[2] + df3$DRB[1])
    
    df4 <- df3 %>%
      select(-ORB, -DRB) %>%
      mutate(EFGP = round(EFGP * 100, 2),
             TOVP = round(TOVP * 100, 2),
             ORBP = round(ORBP * 100, 2),
             FTRate = round(FTRate * 100, 2)) %>%
      mutate(Team = i) %>%
      select(Team, everything())
    
    # The DRBP (percentage of defensive rebounds) would be:
    #df4$ORBP[df4$Type == "Defense"] <- 100 - df4$ORBP[df4$Type == "Defense"]
    
    # Data frame with the four factors for each team, both defense and offense:
    df5 <- bind_rows(df5, df4) 
  }  
  
  # The next steps are to add the ranking label for each team in the corresponding factor.
  df6 <- df5 %>%
    filter(Type == "Defense") %>%
    # The best team is the one that allows the worst (smallest) field percentage:
    mutate(order_EFGP = Team[order(EFGP)]) %>%
    # The best team is the one that allows the biggest turnover percentage:
    mutate(order_TOVP = Team[order(TOVP, decreasing = TRUE)]) %>%
    # The best team is the one that allows the worst (smallest) offensive rebounding percentage:
    mutate(order_ORBP = Team[order(ORBP)]) %>%
    # The best team is the one that allows the worst (smallest) free throw rate:
    mutate(order_FTRate = Team[order(FTRate)])
  df6 <- as.data.frame(df6)
  
  for (i in teams) {
    # Find the position of the team in each of the order columns.
    orders_cols <- apply(df6[,7:10], 2, function(x){grep(i, x)})
    df6[df6$Team == i, 3:6] <- paste(df6[df6$Team == i, 3:6], 
                                        " (", orders_cols, ")", sep = "")
  }
  
  df7 <- df5 %>%
    filter(Type == "Offense") %>%
    # The best team is the one that has the best (biggest) field percentage:
    mutate(order_EFGP = Team[order(EFGP, decreasing = TRUE)]) %>%
    # The best team is the one that has the smallest turnover percentage:
    mutate(order_TOVP = Team[order(TOVP)]) %>%
    # The best team is the one that has the best (biggest) offensive rebounding percentage:
    mutate(order_ORBP = Team[order(ORBP, decreasing = TRUE)]) %>%
    # The best team is the one that has the best (biggest) free throw rate:
    mutate(order_FTRate = Team[order(FTRate, decreasing = TRUE)])
  df7 <- as.data.frame(df7)
  
  for (i in teams) {
    orders_cols <- apply(df7[,7:10], 2, function(x){grep(i, x)})
    df7[df7$Team == i, 3:6] <- paste(df7[df7$Team == i, 3:6], 
                                        " (", orders_cols, ")", sep = "")
  }
  
  # Data frame with the four factors for each team, both defense and offense and the ranking label:
  df8 <- bind_rows(df6, df7) %>%
    select(-contains("order")) %>%
    arrange(rev(Team)) 
  
  return(list(df_rank = df8, df_no_rank = df5)) 
}

Try the BAwiR package in your browser

Any scripts or data that you put into this service are public.

BAwiR documentation built on Feb. 27, 2026, 5:07 p.m.