R/build_dataset_for_playoff_model.R

#' @title Build Dataset For Playoff Model
#' @description gets data for all teams
#' @keywords nba.com api
#' @importFrom magrittr %>%
#' @export build_dataset_for_playoff_model_step1
#' @export get_record_against_winning_teams
#' @examples
#' build_dataset_for_playoff_model(year='2018-19')

build_dataset_for_playoff_model_step1 <- function(season,
                                            base_url = 'https://stats.nba.com/stats/leaguedashteamstats?',
                                            per_mode = 'PerGame', plus_minus = 'N',
                                            pace_adjust = 'N', rank = 'N', season_type = 'Regular%20Season',
                                            outcome = '', location = '', month = 0, season_segment = '',
                                            date_from = '', date_to = '', opp_team_id = 0, vs_conf = '',
                                            vs_div = '', game_segment = '', period = 0, last_n_games = 0) {

  team_stats_df <- get_base_dataset(season = season)

  # write.csv(team_stats_df, file = 'base_team_stats.csv')

  # team_stats_df <- get_record_against_winning_teams(team_stats_df)

  return (team_stats_df)

}

numericcharacters <- function(x) {
  !any(is.na(suppressWarnings(as.numeric(x)))) & is.character(x)
}

get_base_dataset <- function(season,
                             base_url = 'https://stats.nba.com/stats/leaguedashteamstats?',
                             per_mode = 'PerGame', plus_minus = 'N',
                             pace_adjust = 'N', rank = 'N', season_type = 'Regular%20Season',
                             outcome = '', location = '', month = 0, season_segment = '',
                             date_from = '', date_to = '', opp_team_id = 0, vs_conf = '',
                             vs_div = '', game_segment = '', period = 0, last_n_games = 0) {

  all_measure_type <- c('Base','Advanced','Misc','Four%20Factors','Scoring','Opponent')

  team_stats_lst <- list()

  for (i in 1:length(all_measure_type)) {

    team_stats_url <- paste0(base_url,
                             'MeasureType=', all_measure_type[i],
                             '&PerMode=', per_mode,
                             '&PlusMinus=', plus_minus,
                             '&PaceAdjust=', pace_adjust,
                             '&Rank=', rank,
                             '&Season=', season,
                             '&SeasonType=', season_type,
                             '&Outcome=', outcome,
                             '&Location=', location,
                             '&Month=', month,
                             '&SeasonSegment=', season_segment,
                             '&DateFrom=', date_from,
                             '&DateTo=', date_to,
                             '&OpponentTeamID=', opp_team_id,
                             '&VsConference=', vs_conf,
                             '&VsDivision=', vs_div,
                             '&GameSegment=', game_segment,
                             '&Period=', period,
                             '&LastNGames=', last_n_games)

    print(team_stats_url)

    print (all_measure_type[i])

    team_stats_json <-
      curl::curl(team_stats_url) %>%
      jsonlite::fromJSON()

    team_stats_df <- data.frame(team_stats_json$resultSets$rowSet[[1]], stringsAsFactors = F)
    names(team_stats_df) = tolower(team_stats_json$resultSets$headers[[1]])

    team_stats_lst[[i]] <- team_stats_df

    Sys.sleep(runif(n=1,min = 0.5,max = 2))

  }

  team_stats_base <- team_stats_lst[[1]] %>%
    dplyr::select(-dplyr::contains('rank'), -cfid, -cfparams) %>%
    dplyr::rename(min_per_game = min)

  team_stats_adv <- team_stats_lst[[2]] %>%
    dplyr::select(-dplyr::contains('rank'), -cfid, -cfparams, -gp, -w, -l, -w_pct) %>%
    dplyr::rename(total_min = min)

  team_stats_misc <- team_stats_lst[[3]] %>%
    dplyr::select(-dplyr::contains('rank'), -cfid, -cfparams, -gp, -w, -l, -w_pct, -min)

  team_stats_four_fact <- team_stats_lst[[4]] %>%
    dplyr::select(-dplyr::contains('rank'), -cfid, -cfparams, -gp, -w, -l, -w_pct,
                  -min, -oreb_pct, -tm_tov_pct, -efg_pct)

  team_stats_scoring <- team_stats_lst[[5]] %>%
    dplyr::select(-dplyr::contains('rank'), -cfid, -cfparams, -gp, -w, -l, -w_pct, -min)

  team_stats_opp <- team_stats_lst[[6]] %>%
    dplyr::select(-dplyr::contains('rank'), -cfid, -cfparams, -gp, -w, -l, -w_pct, -min) %>%
    dplyr::rename(plus_minus_opp = plus_minus)

  team_stats_df <- team_stats_base %>%
    dplyr::inner_join(team_stats_adv, by=c('team_id', 'team_name')) %>%
    dplyr::inner_join(team_stats_misc, by=c('team_id', 'team_name')) %>%
    dplyr::inner_join(team_stats_four_fact, by=c('team_id', 'team_name')) %>%
    dplyr::inner_join(team_stats_scoring, by=c('team_id', 'team_name')) %>%
    dplyr::inner_join(team_stats_opp, by=c('team_id', 'team_name')) %>%
    dplyr::mutate_if(numericcharacters,as.numeric)

  return (team_stats_df)

}
emilykuehler/basketballstatsR documentation built on May 31, 2019, 10:01 a.m.