R/game.R

Defines functions GetLineups .GetPlayers .AddPlayers .RemovePlayer .ProcessSubs .AddFullQuarterPlayers .AddPlayersNotInBoxScore

Documented in GetLineups

#' Lineups
#'
#' @return Data frame with 5 man lineups with play-by-play from NBA.com
#' @keywords lineups playbyplay
#' @export
#' @examples
#' # GetLineups(game.id = '0021300359')

GetLineups <- function(game.id) {

  # Get play by play first
  pbp <- GetPlayByPlay(GameID = game.id)

  # 18 is a useless play marker
  if (sum(pbp$EVENTMSGTYPE == 18) > 0) {
    pbp <- pbp[-which(pbp$EVENTMSGTYPE == 18), ]
  }

  # Add markers for when the 10 man lineups can change
  mark.periods <- c(1, which(pbp$EVENTMSGTYPE == 13 & pbp$EVENTMSGACTIONTYPE == 0))   # End of period

  # Sometimes end of period marker is missing
  if (length(mark.periods) < 5) {
    periods.present <- pbp[(mark.periods), ]
    periods.present <- periods.present[periods.present$PCTIMESTRING == '0:00', 'PERIOD']
    periods.missing <- unique(pbp$PERIOD)[!(unique(pbp$PERIOD) %in% periods.present)]
    
    for (p in periods.missing) {
      mark.periods <- sort(c(mark.periods, max(which(pbp$PERIOD == p))))
    }
  }
  
  # Check if duplicate "end of periods"
  period.diffs <- diff(mark.periods)
  while (sum(period.diffs <= 2) > 0) {
    bad.marker <- mark.periods[which(period.diffs <= 2) + 1][1]
    pbp <- pbp[-bad.marker, ]
    row.names(pbp) <- 1:nrow(pbp)

    mark.periods <- c(1, which(pbp$EVENTMSGTYPE == 13 & pbp$EVENTMSGACTIONTYPE == 0))   # End of period
    period.diffs <- diff(mark.periods)
  }

  # For some reason last play sometimes marked wrong
  if ((max(mark.periods) + 20) < nrow(pbp)) {
    mark.periods <- c(mark.periods, nrow(pbp))
  }

  markers <- which(pbp$EVENTMSGTYPE == 8)  # Subs

  # Remove subs that somehow happened after the end of the game
  markers <- markers[markers < max(mark.periods)]

  # Group subs based on consecutive actions
  ranges <- list(markers[1])
  last <- markers[1]
  last.time <- pbp[markers[1], 'PCTIMESTRING']
  for (i in 2:length(markers)) {
    if ((markers[i] == (last + 1)) & (pbp[markers[i], 'PCTIMESTRING'] == last.time)) {
      ranges[[length(ranges)]] <- c(ranges[[length(ranges)]], markers[i])
    } else {
      ranges[[length(ranges) + 1]] <- markers[i]
    }

    last <- markers[i]
    last.time <- pbp[markers[i], 'PCTIMESTRING']
  }

  # Remove consecutive markers at the same timestamp
  markers <- sapply(ranges, function(x) x[1])

  # Add periods into markers
  markers <- c(markers, mark.periods)
  markers <- markers[order(markers)]

  # Get team IDs
  home.id <- pbp[pbp$EVENTMSGTYPE == 8 & pbp$EVENTMSGACTIONTYPE == 0 & !is.na(pbp$HOMEDESCRIPTION), 'PLAYER1_TEAM_ID'][1]
  away.id <- pbp[pbp$EVENTMSGTYPE == 8 & pbp$EVENTMSGACTIONTYPE == 0 & !is.na(pbp$VISITORDESCRIPTION), 'PLAYER1_TEAM_ID'][1]

  # Go through substitutions and get preliminary lineups
  home.players <- .ProcessSubs(pbp, markers, mark.periods, ranges, 'Home')
  away.players <- .ProcessSubs(pbp, markers, mark.periods, ranges, 'Away')

  # Add players who played the entire quarter
  home.players <- .AddFullQuarterPlayers(home.players, game.id, markers, mark.periods, home.id, FALSE)
  away.players <- .AddFullQuarterPlayers(away.players, game.id, markers, mark.periods, away.id, FALSE)

  # Sometimes the full quarter isn't the right length. Add those players back in
  home.players <- .AddFullQuarterPlayers(home.players, game.id, markers, mark.periods, home.id, TRUE)
  away.players <- .AddFullQuarterPlayers(away.players, game.id, markers, mark.periods, away.id, TRUE)

  # Sometimes the player isn't in the box score. Add players who had an action back in
  home.players <- .AddPlayersNotInBoxScore(home.players, pbp, markers, mark.periods, home.id, 1)
  away.players <- .AddPlayersNotInBoxScore(away.players, pbp, markers, mark.periods, away.id, 1)

  # Getting desperate... add players in Team 2
  home.players <- .AddPlayersNotInBoxScore(home.players, pbp, markers, mark.periods, home.id, 2)
  away.players <- .AddPlayersNotInBoxScore(away.players, pbp, markers, mark.periods, away.id, 2)

  pbp[, c('H1', 'H2', 'H3', 'H4', 'H5', 'A1', 'A2', 'A3', 'A4', 'A5')] <- NA

  # Add player ids into pbp
  for (i in 1:length(home.players)) {
    length <- markers[i + 1] - markers[i] + 1
    pbp[markers[i]:markers[i + 1], c('H1', 'H2', 'H3', 'H4', 'H5')] <- rep(home.players[[i]], each = length)
    pbp[markers[i]:markers[i + 1], c('A1', 'A2', 'A3', 'A4', 'A5')] <- rep(away.players[[i]], each = length)
  }

  return(pbp)
}

# Get all the players in the lineups between the first and last indices
.GetPlayers <- function(lineups, first, last) {
  players <- c()
  for (i in first:last) {
    players <- union(players, lineups[[i]])
  }
  return(players)
}

# Add all the players to the lineups between the first and last indices
.AddPlayers <- function(players, lineups, first, last) {
  for (i in first:last) {
    lineups[[i]] <- union(lineups[[i]], players)
  }
  return(lineups)
}

# Remove player from the lineups between the first and last indices
.RemovePlayer <- function(player, lineups, first, last) {
  for (i in first:last) {
    if (player %in% lineups[[i]]) {
      lineups[[i]] <- lineups[[i]][lineups[[i]] != player]
    }
  }
  return(lineups)
}

# Input:    pbp - NBA play by play data frame
#           markers - rows of pbp that signify changes
#           mark.periods - rows of pbp for start and end of quarters
#           ranges - list of consecutive subs
#           team - either 'Home' or 'Away'
# Output:   List of arrays of players at each time
.ProcessSubs <- function(pbp, markers, mark.periods, ranges, team) {
  
  # Keep track of players that are in the lineup between each marker
  players <- lapply(markers[-1], function(x) NULL)
  
  # Create act column corresponding to home or away team
  if (team == 'Home') {
    actions <- pbp$HOMEDESCRIPTION
  } else {
    actions <- pbp$VISITORDESCRIPTION
  }
  
  subs <- grep('SUB:', actions)                               # Find plays with subs
  for (j in 1:length(ranges)) {
    
    # Get subs in that range
    current.subs <- subs[subs %in% ranges[[j]]]
    
    # Only proceed if there was at least 1 sub
    if (length(current.subs) > 0) {
      
      # Get one sub
      i <- current.subs[1]
      
      # Compute the group this sub happened between, and the first and last group of the period
      group <- length(markers[markers <= i]) - 1
      i.last <- min(mark.periods[mark.periods > i])
      group.last <- length(markers[markers <= i.last]) - 1
      i.first <- max(mark.periods[mark.periods <= i])
      group.first <- length(markers[markers <= i.first])
      
      # Get the players subbed in and the players subbed out
      players.in <- pbp[current.subs, 'PLAYER2_ID']
      players.out <- pbp[current.subs, 'PLAYER1_ID']
      
      # Remove players subbed in and out
      overlap <- intersect(players.in, players.out)
      for (player in overlap) {
        players.in <- players.in[-match(player, players.in)]
        players.out <- players.out[-match(player, players.out)]
      }
      
      # Loop through subs
      if (length(players.in) > 0) {
        for (k in 1:length(players.in)) {
          
          player.in <- players.in[k]
          player.out <- players.out[k]
          
          # If we don't have the player that subbed out at all, add him in the whole quarter up to now
          if (!(player.out %in% .GetPlayers(players, group.first, group))) {
            players <- .AddPlayers(player.out, players, group.first, group)
          }
          
          # Remove the player subbed out for the remainder of the quarter, and add the guy subbed in
          players <- .RemovePlayer(player.out, players, group + 1, group.last)
          players <- .AddPlayers(player.in, players, group + 1, group.last)
        }
      }
    }
  }
  
  return(players)
}

# Input:    players - list of vectors of players in
#           game.id - game ID to get box score
#           markers - rows of pbp that signify changes
#           mark.periods - rows of pbp for start and end of quarters
#           team.id - id of team to match with box scores
# Output:   List of arrays of players at each time
.AddFullQuarterPlayers <- function(players, game.id, markers, mark.periods, team.id, wrong.time) {

  # Get # players in each range; if not 10, then use the box score
  i <- length(mark.periods)
  num.players <- sapply(players, function(x) length(x))

  # Add players who were in the entire quarter
  while ((length(num.players[num.players != 5]) > 0) & (i > 1)) {

    # Decrement the period
    i <- i - 1

    # Get box score for period and split into home and away
    box.score <- GetBoxScore(GameID = game.id, RangeType = 1, StartPeriod = i, EndPeriod = i)
    box.score <- box.score[box.score$TEAM_ID == team.id, ]
    
    if (wrong.time) {
      box.score$MIN <- TimeToSeconds(box.score$MIN)
      max.time <- round(sum(box.score$MIN) / 5)
      box.score <- box.score[box.score$MIN == max.time, ]
    } else {
      if (i <= 4) {
        box.score <- box.score[box.score$MIN == '12:00', ]
      } else {
        box.score <- box.score[box.score$MIN == '5:00', ]
      }
    }
    
    # Get range corresponding to period in players list
    group.first <- length(markers[markers <= mark.periods[i]])
    group.last <- length(markers[markers <= mark.periods[i + 1]]) - 1

    # If home players haven't played in the quarter, add them to the whole thing
    for (player in box.score$PLAYER_ID) {
      if (!(player %in% .GetPlayers(players, group.first, group.last))) {
        players <- .AddPlayers(player, players, group.first, group.last)
      }
    }

    # Update the player counts
    num.players <- sapply(players, function(x) length(x))
  }

  return(players)
}

# Input:    players - list of vectors of players in
#           pbp - NBA play by play data frame
#           markers - rows of pbp that signify changes
#           mark.periods - rows of pbp for start and end of quarters
#           team.id - id of team to match with box scores
# Output:   List of arrays of players at each time
.AddPlayersNotInBoxScore <- function(players, pbp, markers, mark.periods, team.id, team.num) {

  # Get # players in each range; if not 10, then use the box score
  i <- length(mark.periods)
  num.players <- sapply(players, function(x) length(x))

  # Add players who were in the entire quarter
  while ((length(num.players[num.players != 5]) > 0) & (i > 1)) {

    # Decrement the period
    i <- i - 1

    # Get range corresponding to period in players list
    group.first <- length(markers[markers <= mark.periods[i]])
    group.last <- length(markers[markers <= mark.periods[i + 1]]) - 1

    q.players <- players[group.first:group.last]
    q.num <- sapply(q.players, function(x) length(x))

    # If less than 5 players, find a missing player
    if (mean(q.num) < 5) {

      # Get players we already have
      existing.players <- unique(paste(unlist(q.players)))

      # Get plays with presence actions
      actions <- pbp[mark.periods[i]:mark.periods[i + 1], ]

      if (team.num == 1) {
        actions <- actions[!is.na(actions$PLAYER1_TEAM_ID) & (actions$PLAYER1_TEAM_ID == team.id), ]
        actions <- actions[actions$EVENTMSGTYPE %in% c(1, 2, 4), ]
        potential.players <- unique(actions$PLAYER1_ID)
      } else {
        actions <- actions[!is.na(actions$PLAYER2_TEAM_ID) & (actions$PLAYER2_TEAM_ID == team.id), ]
        # actions <- actions[actions$EVENTMSGTYPE %in% c(1, 2, 4), ]
        potential.players <- unique(actions$PLAYER2_ID)
      }

      potential.players <- potential.players[!(potential.players %in% existing.players)]

      if (length(potential.players) == (5 - mean(q.num))) {
        for (player in potential.players) {
          players <- .AddPlayers(player, players, group.first, group.last)
        }
      }

      # Update the player counts
      num.players <- sapply(players, function(x) length(x))
    }
  }

  return(players)
}
ccagrawal/nbaTools documentation built on Jan. 25, 2020, 11:49 p.m.