####################### Function To Get a Schedule by Date #####################
#' Get Master Schedule
#'
#' Gets schedule for all games on a given date
#'
#' @param date Date for which to get schedule (YYYY-MM-DD)
#' @return A data frame of the day's schedule of games
#' @export
get_master_schedule <- function(date) {
### Error Testing
if(is.na(date)) {
stop("date is missing with no default")
}
tmp <- try(as.Date(date))
if(class(tmp) == "try-error") {
stop("Please enter valid date in the form YYYY-MM-DD")
}
date_ <- gsub("-", "", as.character(date))
url <- paste0("https://www.espn.com/mens-college-basketball/schedule/_/date/", date_)
z <- XML::readHTMLTable(RCurl::getURL(url))
if(length(z) == 0) {
cat("No games on", as.character(date), "\n")
return(NULL)
} else if(length(z) > 1) {
schedule <- as.data.frame(z[[1]])[,c(1,2)]
completed <- as.data.frame(z[[2]][, 1:3])
names(completed) <- c("away", "home", "result")
names(schedule) <- c("away", "home")
} else {
### No Games Scheduled
if(z[[1]][1,1] == "No games scheduled") {
cat("No games on", as.character(date), "\n")
return(NULL)
}
completed <- as.data.frame(z[[1]][,1:3])
names(completed) <- c("away", "home", "result")
schedule <- NA
}
ix_real <-
which(completed$result != 'Forfeit' &
completed$result != "Canceled" &
completed$result != "Postponed")
n_canceled <- sum(grepl("Canceled", completed$result))
n_postponed <- sum(grepl("Postponed", completed$result))
n_forfeit <- sum(grepl("Forfeit", completed$result))
completed <- dplyr::filter(completed, result != 'Forfeit', result != "Canceled", result != "Postponed")
n_completed <- nrow(completed)
### Extract Ranking
ranking <- function(team) {
rank <- gsub("[^0-9]", "", gsub("Men's Basketball.*$", '', team))
return(ifelse(rank == "", NA, rank))
}
### Clean Team Name
clean <- function(team) {
team <- gsub('\\u00c2', '', team)
team <- gsub('\\u00c3\\u00a9','\\u00e9', team)
team <- gsub('@','', team)
team <- gsub('Â', '', team)
team <- gsub("[#0-9]", "", team)
# team <- gsub("\\s[A-Z]*-*[A-Z]*$", "", team)
team <- gsub("TA&M", "", team)
team <- gsub("W&M", "", team)
team <- gsub("\\s*$", "", gsub("^\\s*", "", team))
team <- gsub("Men's Basketball.*$", '', team)
team <- gsub('^[^[:alnum:]]*', '', team)
return(team)
}
if(any(!is.na(schedule[1]))) {
schedule <- dplyr::mutate(schedule,
"away" = as.character(sapply(schedule$away, clean)),
"home" = as.character(sapply(schedule$home, clean)),
"away_rank" = as.numeric(sapply(schedule$away, ranking)),
"home_rank" = as.numeric(sapply(schedule$home, ranking)),
"away_score" = NA,
"home_score" = NA)
}
x <- RCurl::getURL(url)
in_progress <- strsplit(x, "/mens-college-basketball/game\\?gameId=")[[1]]
if(date == Sys.Date() & length(in_progress) != 1) {
ix <- grepl('class=\"Schedule__liveLink ', in_progress) | grepl('^\\d+\">\\d+:\\d+\\s?PM', in_progress)
ix_completed <- !ix
in_progress <- in_progress[ix]
in_progress <- suppressWarnings(as.numeric(unname(sapply(in_progress, function(y){ substring(y, 1, 9) }))))
in_progress <- in_progress[!is.na(in_progress) & !duplicated(in_progress)]
x <- RCurl::getURL(url)
x <- strsplit(x, "/mens-college-basketball/game\\?gameId=")[[1]]
x <- x[ix_completed]
x <- suppressWarnings(as.numeric(unname(sapply(x, function(y){ substring(y, 1, 9) }))))
x <- x[!is.na(x) & !duplicated(x)]
} else {
ix <- -1
in_progress <- in_progress[ix]
in_progress <- suppressWarnings(as.numeric(unname(sapply(in_progress, function(y){ substring(y, 1, 9) }))))
in_progress <- in_progress[!is.na(in_progress) & !duplicated(in_progress)]
x <- strsplit(x, "/mens-college-basketball/game/_/gameId/")
x <- suppressWarnings(as.numeric(unname(sapply(x, function(y){ substring(y, 1, 9) }))))
x <- x[-1]
}
n_scheduled <- 0
if(any(class(schedule) == 'data.frame')) {
n_scheduled <- nrow(schedule)
}
# x <- strsplit(x, "/mens-college-basketball/game/_/gameId/")
# x <- suppressWarnings(as.numeric(unname(sapply(x, function(y){ substring(y, 1, 9) }))))
# x <- x[-1]
# if(date == Sys.Date()) {
# x <- c(in_progress, x)
# }
x <- c(in_progress, x)
x <- x[!is.na(x) & !duplicated(x)]
if(n_scheduled > 0) {
ix_real <- c(1:n_scheduled, n_scheduled + ix_real)
}
x <- x[ix_real]
### Add in Completed Games
find_anchor <- function(team) {
cleaned <- clean(team)
team <- gsub("[#0-9]", "", team)
team <- gsub("\\s*$", "", gsub("^\\s*", "", team))
anchor <- unlist(strsplit(team, ""))[-c(1:(nchar(cleaned) + 1))]
return(paste0(anchor, collapse = ""))
}
completed <- dplyr::mutate(completed,
"away" = as.character(sapply(away, clean)),
"home" = as.character(sapply(home, clean)),
"result" = as.character(result),
"away_rank" = as.numeric(sapply(completed$away, ranking)),
"home_rank" = as.numeric(sapply(completed$home, ranking)),
"away_anchor" = sapply(completed$away, find_anchor),
"away_score" = NA,
"home_score" = NA)
completed$result <- gsub('\\d+:\\d\\d (A|P)M', '', completed$result)
winners <- unname(sapply(completed$result, function(y) { gsub("\\s[0-9]*.*", "", y) }))
losers <- unname(sapply(completed$result, function(y) { gsub('\\s*$', '', gsub('\\(OT\\)', '', gsub('[0-9]*', '', gsub(".*,\\s", "", y)))) }))
scores <- as.numeric(gsub("[^0-9]", "", gsub("\\(.*\\)", "", unlist(strsplit(completed$result, ",")))))
scores[completed$result == 'LIVE' | grepl('PM', completed$result)] <- NA
if(length(scores) > 0) {
winning_scores <- scores[seq(1, length(scores) - 1, 2)]
losing_scores <- scores[seq(2, length(scores), 2)]
# index <- sapply(completed$away_anchor, function(y) { y %in% winners })
index <- c()
for(i in 1:length(winning_scores)) {
home_abbv <- ids$espn_abbrv[ids$team == completed$home[i] ]
away_abbv <- ids$espn_abbrv[ids$team == completed$away[i] ]
if(length(home_abbv) == 0) {
home_abbv <- '---'
}
if(length(away_abbv) == 0) {
away_abbv <- '---'
}
if(home_abbv == winners[i] | away_abbv == losers[i]) {
index <- c(index, F)
} else if(away_abbv == winners[i] | home_abbv == losers[i]) {
index <- c(index, T)
} else if(grepl(tolower(winners[i]), tolower(completed$home[i])) & !grepl(tolower(winners[i]), tolower(completed$away[i]))) {
index <- c(index, F)
} else if(!grepl(tolower(winners[i]), tolower(completed$home[i])) & grepl(tolower(winners[i]), tolower(completed$away[i]))) {
index <- c(index, T)
} else if(tolower(substr(winners[i], 1, 1)) == tolower(substr(completed$home[i], 1,1)) &
tolower(substr(winners[i], 1, 1)) != tolower(substr(completed$away[i], 1,1))) {
index <- c(index, F)
} else if(tolower(substr(winners[i], 1, 1)) != tolower(substr(completed$home[i], 1,1)) &
tolower(substr(winners[i], 1, 1)) == tolower(substr(completed$away[i], 1,1))) {
index <- c(index, T)
} else if(sum(tolower(unlist(strsplit(completed$home[i], ''))) %in% tolower(unlist(strsplit(winners[i], '')))) >
sum(tolower(unlist(strsplit(completed$away[i], ''))) %in% tolower(unlist(strsplit(winners[i], ''))))) {
index <- c(index, F)
} else {
index <- c(index, stringdist::stringdist(completed$home[i], winners[i]) > stringdist::stringdist(completed$away[i], winners[i]))
}
}
completed$home_score[index] <- losing_scores[index]
completed$home_score[!index] <- winning_scores[!index]
completed$away_score[!index] <- losing_scores[!index]
completed$away_score[index] <- winning_scores[index]
}
if(any(!is.na(schedule[1]))) {
schedule <- rbind(schedule, dplyr::select(completed, -away_anchor, -result))
}else{
schedule <- completed
}
schedule <- dplyr::filter(schedule, !is.na(home), !is.na(away))
schedule <- dplyr::mutate(schedule, "game_id" = x[1:nrow(schedule)])
schedule <- dplyr::select(schedule, game_id, away, home, away_rank, home_rank, away_score, home_score)
schedule$date <- as.Date(date)
return(schedule)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.