R/stats.R

Defines functions fp_agg_data fp_get_stats_pos

Documented in fp_agg_data fp_get_stats_pos

#' NFL Positional Statistics
#'
#' @param pos \code{Charcater}. Specific position you want to return. Default will return all
#'            all offensive postions. Available options include
#'            \itemize{
#'             \item \code{"QB"}
#'             \item \code{"RB"}
#'             \item \code{"WR"}
#'             \item \code{"TE"}
#'             \item \code{"K"}
#'             \item \code{"DST"}
#'            }
#' @param season \code{Numeric}. The NFL season. If missing it will return the current year's season.
#'               Supported season only go back to \code{2013}
#'
#' @return a tibble
#' @export
#'
#' @note \itemize{
#'       \item If you query a single week all players that were on a bye that week are not returned
#'        }
#'
#' @examples
#'
#' Stats_QB <- fp_get_stats_pos("qb")
#' Stats_RB <- fp_get_stats_pos("rb")
#' Stats_WR <- fp_get_stats_pos("wr")
#' Stats_TE <- fp_get_stats_pos("te")
#' Stats_K <- fp_get_stats_pos("k")
#' Stats_DST <- fp_get_stats_pos("dst")
#'

fp_get_stats_pos <- function(pos) {
  #retrieve stats for year by selected pos
  cat(paste0("Scraping ", toupper(pos), " Stats ", sep = ""))
  FP_2013 <- fp_get_stats_data(pos, 2013)
  FP_2014 <- fp_get_stats_data(pos, 2014)
  FP_2015 <- fp_get_stats_data(pos, 2015)
  FP_2016 <- fp_get_stats_data(pos, 2016)
  FP_2017 <- fp_get_stats_data(pos, 2017)
  FP_2018 <- fp_get_stats_data(pos, 2018)
  FP_2019 <- fp_get_stats_data(pos, 2019)
  FP_2020 <- fp_get_stats_data(pos, 2020)

  #combine data to 1 set
  out <- rbind(FP_2013, FP_2014, FP_2015, FP_2016, FP_2017, FP_2018, FP_2019, FP_2020) %>% filter(team != "FA")

}

#' Aggregate Stats
#'
#' @param pos \code{Charcater}. Aggregates all Positions into 1 data set
#'            \itemize{
#'             \item \code{"QB"}
#'             \item \code{"RB"}
#'             \item \code{"WR"}
#'             \item \code{"TE"}
#'             \item \code{"K"}
#'             \item \code{"DST"}
#'            }

#' @return a tibble
#' @export
#'
#' @examples
#'
#' Stats_QB <- fp_get_stats_pos("qb")
#' Stats_RB <- fp_get_stats_pos("rb")
#' Stats_WR <- fp_get_stats_pos("wr")
#' Stats_TE <- fp_get_stats_pos("te")
#' Stats_K <- fp_get_stats_pos("k")
#' Stats_DST <- fp_get_stats_pos("dst")
#'
#'

fp_agg_data <- function() {
  Stats_QB <- fp_get_stats_pos("qb");cat("", sep = "\n  ")
  Stats_RB <- fp_get_stats_pos("rb");cat("", sep = "\n  ")
  Stats_WR <- fp_get_stats_pos("wr");cat("", sep = "\n  ")
  Stats_TE <- fp_get_stats_pos("te");cat("", sep = "\n  ")
  Stats_K <- fp_get_stats_pos("k");cat("", sep = "\n  ")
  Stats_DST <- fp_get_stats_pos("dst")



  stat_cols = c(season = "season", data_src = "data_src", player = "player",id = "id",src_id = "fantasypro_id",
                team = "team", position = "position",  rank = "rank",
                pass_att = "passing_att", pass_comp = "passing_cmp", pass_yds = "passing_yds",
                pass_tds = "passing_td",  pass_int = "passing_int", pass_comp_pct = "passing_pct",
                pass_avg = "passing_y_a", sacks = "passing_sacks",
                rush_att = "rushing_att", rush_yds = "rushing_yds",rush_tds = "rushing_td",
                rec = "Receiving Rec", rec_tgt = "Receiving Tgt", rec_yds_g = "Receiving Yds/g", games = "GP",
                rec_yds = "Receiving Yds", rec_avg = "Receiving Avg", rec_tds = "Receiving TD",
                fumbles_lost = "fl",
                fg = "fg", fg_att = "fga",
                fg_0019 = "x1-19", fg_2029 = "x20-29", fg_3039 = "x30-39", fg_4049 = "x40-49",fg_50 = "x50",
                xp = "xpm", xp_att = "xpt",
                dst_int = "int", dst_fum_rec = "fr", dst_fum_force = "ff", dst_sacks = "sack", dst_td = "def_td",
                dst_safety = "sfty") %>%
    plyr::ldply(., data.frame) %>%
    rename(., match = .id, raw = X..i..)

  combined_pos <- plyr::rbind.fill(Stats_QB, Stats_RB, Stats_WR, Stats_TE, Stats_K, Stats_DST)

  combined_pos$player <- gsub(".", "", combined_pos$player) %>% gsub("-II","", .) %>%
    gsub("-III","", .) %>% gsub("-IIII","", .) %>% gsub("-JR","", .) %>% tolower()
  combined_pos <- mutate(combined_pos, id =ffanalytics:::player_ids$id[match(combined_pos$fantasypro_id, ffanalytics:::player_ids$fantasypro_id)]) %>%
    select(-c(team, pos)) %>%
    add_player_info() %>%
    select(matches(stat_cols$raw))

  names(combined_pos) <-  stat_cols$match[match(names(combined_pos), stat_cols$raw)]


  combined_pos <- combined_pos
  out <- combined_pos
}


fp_get_stats_data <- function(pos=c("qb", "rb", "wr", "te", "k"),
                              season = c(2013,2014,2015,2016,2017,2018,2019,2020)) {
  pos=tolower(pos)
  base_url="https://www.fantasypros.com/nfl/stats/"

  url=paste0(base_url,pos,".php?&year=",season)
  #cat(paste0("Scraping ", toupper(pos), " Season Stats from"), url, sep = "\n  ")
  cat(paste0(season, sep = " "))

  fp_html <- xml2::read_html(url)
  fp_stats <- rvest::html_table(fp_html, fill = TRUE)[[1]]

  if (pos %in% c("qb", "rb", "wr", "te")) {
    first_row_names <- fp_stats[1 , ]
    second_row_names <- fp_stats[2 , ]

    new_row_names <- paste(first_row_names, second_row_names, sep = "_")
    new_row_names <- gsub("^_|MISC_", "", new_row_names, ignore.case = TRUE)

    fp_stats <- fp_stats[3:nrow(fp_stats), ]
    names(fp_stats) <- new_row_names
  }

  fp_stats$player <- gsub("\\s\\(.*", "", fp_stats$Player, perl = TRUE)
  fp_stats$team <- gsub(".*\\s|\\(|\\)", "", fp_stats$Player)
  fp_stats$pos <- toupper(pos)
  fp_stats$Player <- NULL
  fp_stats$Season <- season

  # this gets columns that are already numbers
  num_cols <- which(grepl("player|team|pos", names(fp_stats), ignore.case = TRUE))
  already_numeric <- which(sapply(1:ncol(fp_stats), function(i) class(fp_stats[, i])) != "character")

  num_cols <- sort(unique(c(num_cols, already_numeric)))
  num_cols <- setdiff(1:ncol(fp_stats), num_cols)
  num_cols <- names(fp_stats)[num_cols]
  fp_stats[num_cols][] <- lapply(fp_stats[num_cols], readr::parse_number)

  if (pos != "dst") {
    fp_stats$fantasypro_id <- gsub(" ", "-", fp_stats$player) %>% gsub("-II","", .) %>%
      gsub("-III","", .) %>% gsub("-IIII","", .) %>% gsub("-JR","", .) %>% gsub(".","", .) %>% tolower()
  }
  if (pos == "dst") { fp_stats$fantasypro_id <- tolower(paste0(gsub("([A-Za-z]+).*", "\\1",
                                                  fp_stats$player),"-defense")) }
  fp_stats <- dplyr::select(fp_stats, Season, player, fantasypro_id, pos, team, dplyr::everything()) %>%
    janitor::clean_names() %>% as_tibble(fp_stats)
}
RandalMorris/AnalyticsFootball documentation built on Dec. 18, 2021, 9:52 a.m.