R/oddscrapr.R

Defines functions oddscrapr

#' oddscrapr
#'
#' Scrapes betting lines for any season of college basketball.
#'
#' Takes inputs for beginning and end of season and scrapes www.covers.com for all box scores between those dates (inclusive).
#'
#' @author Kurt Wirth
#'
#' @param x The beginning date of data scraping, in the format (TEST)
#' "yyyy/mm/dd".
#'
#' @param y The ending date of data scraping, in the format
#' "yyyy/mm/dd".
#'
#' @return A tidy dataframe with the following variables for all games within
#' the date range: XXX
#'
#' @examples
#' \dontrun{oddscrapr("2017/11/10", "2018/04/02")}
#' @export

library(rvest)

library(dplyr)

library(stringr)

oddscrapr = function(x, y) {

  select <- dplyr::select; rename <- dplyr::rename; mutate <- dplyr::mutate

  summarize <- dplyr::summarize; arrange <- dplyr::arrange; filter <- dplyr::filter; slice <- dplyr::slice

  StartDate = x

  EndDate = y

  dates = seq(as.Date(StartDate), as.Date(EndDate), "day") %>% format("%m-%d-%Y")

  df = data.frame()

  strsplit <- function(x,
                       split,
                       type = "remove",
                       perl = FALSE,
                       ...) {
    if (type == "remove") {
      # use base::strsplit
      out <- base::strsplit(x = x, split = split, perl = perl, ...)
    } else if (type == "before") {
      # split before the delimiter and keep it
      out <- base::strsplit(x = x,
                            split = paste0("(?<=.)(?=", split, ")"),
                            perl = TRUE,
                            ...)
    } else if (type == "after") {
      # split after the delimiter and keep it
      out <- base::strsplit(x = x,
                            split = paste0("(?<=", split, ")"),
                            perl = TRUE,
                            ...)
    } else {
      # wrong type input
      stop("type must be remove, after or before!")
    }
    return(out)
  }

  for(date in dates){
    tryCatch({

      split = unlist(strsplit(date, "-"))
      month = split[1]
      day = split[2]
      year = split[3]

      Page = paste("https://www.pickmonitor.com/past-odds/",year,"-",month,"-",day, sep="")

      GameDate = date

      OddsText = html_text(html_nodes(read_html(Page), xpath = "//td | //h3"))

      Begin = which(OddsText=="NCAA Basketball") + 1

      End = Begin + min(which((grepl("^[A-Za-z -]+$", OddsText[Begin : length(OddsText)])) == TRUE)[which(OddsText[Begin : length(OddsText)][which((grepl("^[A-Za-z -]+$", OddsText[Begin : length(OddsText)])) == TRUE)]!="-")]) - 2

      Odds = as.data.frame(matrix(OddsText[Begin : End], ncol = 5, byrow = TRUE)) %>% select(-V1) %>%
        tidyr::separate(V2, into = c("AwayTeamName", "HomeTeamName"), sep = "vs") %>%
        tidyr::separate(V4, into = c("Line", "Line_Profit"), sep = " ") %>%
        tidyr::separate(V5, into = c("OverUnder", "Profit_OverUnder"), sep = " ") %>%
        mutate(AwayTeamName = str_trim(gsub('[0-9]+', '', AwayTeamName))) %>%
        mutate(HomeTeamName = str_trim(gsub('[0-9]+', '', HomeTeamName))) %>%
        mutate(Line = as.numeric(Line)) %>% mutate(Line_Profit = as.numeric(Line_Profit)) %>%
        mutate(OverUnder = as.numeric(OverUnder)) %>% mutate(Profit_OverUnder = as.numeric(Profit_OverUnder)) %>%
        mutate(Date = as.Date(paste(year, month, day, sep = "-"))) %>%
        rename("MoneyLine" = V3)

      }, error = function(e) print(e))

    df <- dplyr::bind_rows(df, Odds)

  }
  assign("BettingLines", df, envir=globalenv())
}
kurtawirth/ncaahoopsscraper documentation built on Feb. 7, 2021, 1:24 a.m.