#' scrape_games
#'
#' @param ssn The starting year of the season from where we are scraping games
#' @param wk_start The first week number of games being scraped
#' @param wk_stop The final week number of games being scraped
#'
#' @return A data frame with columns Team, Score, Year, Week
#' @export
#' @importFrom magrittr %>%
#'
#' @examples
#' G <- scrape_games(ssn = 2017, wk_stop = 17)
#' tail(G)
scrape_games <- function(ssn = 2021, wk_start = 1, wk_stop){
#Define a few variables
#Where we scrape from:
nfl_base <- "https://www.pro-football-reference.com/"
#teams involved from 2010 onward; we look for these specific strings
nfl_teams <- c("Green Bay Packers",
"Chicago Bears",
"Tennessee Titans",
"Cleveland Browns",
"Los Angeles Rams",
"Carolina Panthers",
"Washington Redskins",
"Washington Football Team",
"Philadelphia Eagles",
"Buffalo Bills",
"New York Jets",
"Atlanta Falcons",
"Minnesota Vikings",
"Baltimore Ravens",
"Miami Dolphins",
"Kansas City Chiefs",
"Jacksonville Jaguars",
"Cincinnati Bengals",
"Seattle Seahawks",
"Indianapolis Colts",
"Los Angeles Chargers",
"Las Vegas Raiders",
"San Francisco 49ers",
"Tampa Bay Buccaneers",
"New York Giants",
"Dallas Cowboys",
"Detroit Lions",
"Arizona Cardinals",
"Pittsburgh Steelers",
"New England Patriots",
"Houston Texans",
"New Orleans Saints",
"Denver Broncos",
"Oakland Raiders",
"St. Louis Rams",
"San Diego Chargers")
#total number of teams
l <- nchar(nfl_teams)
date_list <- c()
score_step <- 3
scores <- c()
for (year in ssn) {
for (week in wk_start:wk_stop) {
#manipulate base string to get a specific week in a specific season
date <- paste0("years/", year, "/week_", week, ".htm")
paste0(nfl_base, date) -> url
#execute string scrape
game <- rvest::read_html(url) %>%
rvest::html_node("#content") %>%
rvest::html_text()
#getting the teams that played during the current week
hold <- sapply(nfl_teams, regexpr, game)
played <- hold[hold > 0]
ord <- order(played)
#ordering teams that played; eliminates having to check entire list
played <- played[ord]
l_play <- l[hold > 0][ord]
#number of teams that played
n <- length(played)
#getting score information
day_data <- c()
if (n > 0) {
for (t in 1:n) {
#stepping through string
score_loc <- played[t] + l_play[t] + score_step
#collecting scores... plus garbage
score_text <- substr(game, score_loc, score_loc + 3)
#read score
score <- readr::parse_number(score_text)
#add to weekly data frame
new <- c(names(played)[t], score, year, week)
day_data <- rbind(day_data, new)
}
}
#add to entire list of scores
scores <- rbind(scores, day_data)
}
}
scores <- as.data.frame(scores)
names(scores) <- c("Team", "Score", "Year", "Week")
scores$Score <- as.numeric(scores$Score)
scores$Year <- as.numeric(scores$Year)
scores$Week <- as.numeric(scores$Week)
scores <- scores[!is.na(scores$Score),]
return(scores)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.