R/scrape_funcs.R

Defines functions scrape_data scrape_source

Documented in scrape_data scrape_source

#' Scrape data from multiple sources and multiple positions
#'
#' This function scrapes data from multiple sources and multiple positions and
#' returns a list of \link{tibble}s with the results. Results contain raw data
#' from the sources.
#'
#' @param src the sources that data should be scraped from should be one or more
#' of \code{c("CBS", "ESPN", "FantasyData", "FantasyPros", "FantasySharks",
#' "FFToday", "FleaFlicker", "NumberFire", "Yahoo", "FantasyFootballNerd", "NFL",
#' "RTSports","Walterfootball")}
#' @param pos the posistions that data should be scraped for. Should be one or more
#' of \code{c("QB", "RB", "WR", "TE", "K", "DST", "DL", "LB", "DB")}
#' @param season The seaon for which data should be scraped. Should be set to the
#' current season.
#' @param week The week for which data should be scraped. Set to 0 to get season
#' data.
#' @export
scrape_data <- function(
  src = c("CBS", "ESPN", "FantasyData", "FantasyPros", "FantasySharks", "FFToday",
          "FleaFlicker", "NumberFire", "Yahoo", "FantasyFootballNerd", "NFL",
          "RTSports","Walterfootball"),
  pos = c("QB", "RB", "WR", "TE", "K", "DST", "DL", "LB", "DB"),
  season = 2018, week = 0){

  if(missing(week))
    week <- 0
  src <- match.arg(src, several.ok = TRUE)
  pos <- match.arg(pos, several.ok = TRUE)

  if(any(src == "NumberFire") & any(c("DL", "LB", "DB") %in% pos))
    pos <- c(pos, "IDP")

  names(pos) <- pos
  src_data <- map(pos, ~ map(projection_sources[src], ~ .x)) %>% transpose() %>%
    map( ~ imap(.x, ~ scrape_source(.x, season, week, .y))) %>%
    transpose() %>% map(discard, is.null) %>% map(bind_rows, .id = "data_src")

  if(any(names(src_data) == "IDP")){
    idp_data <- split(src_data$IDP, src_data$IDP$pos)
    for(p in names(idp_data)){
      src_data[[p]] <- bind_rows(list(src_data[[p]], idp_data[[p]]))
    }
  }

  src_data <- map(src_data,
                  ~ {if(any(names(.x) == "site_src")){
                       mutate(.x, data_src = if_else(is.na(site_src), data_src, paste(data_src, site_src, sep = ": ")))
                  } else {
                      .x
                    }}
  )

  src_data <- src_data[setdiff(pos, "IDP")]
  attr(src_data, "season") <- season
  attr(src_data, "week") <- week

  return(src_data)
}

#' Scrape data for a specific position from a single source
#' @export
scrape_source <- function(src, season, week, position){
  src_type <- intersect(c("html_source", "json_source", "xlsx_source"), class(src))
  cat("Scraping", position, "projections from \n", src$get_url(season, week, position), "\n")
  src_res <- switch(src_type,
                    "html_source" = src$open_session(season, week, position)$scrape(),
                    src$scrape(season, week, position))
  return(src_res)
}
MrDAndersen/ffwebscrape documentation built on May 22, 2019, 1:51 p.m.