knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)

Simulating NFL Playoff Teams by Game Results

National Football League

Playoff Seed Breakdown

Webscraping Functions

WeeklyScores <- function(Week=1){
  require(rvest)
  require(tidyverse)
  url <- paste("http://www.nfl.com/schedules/2016/REG",Week,sep="")
  html <- read_html(url)
  NFL <- html %>% 
    html_nodes(".schedules-list-date,.time,.away,.home") %>% 
    html_text()
  NFL <- gsub("\r","",gsub("\n","",gsub("\t","",NFL)))
  Games <- length(which(NFL=="FINAL"))
  for (i in 1:Games){
    Date <- ifelse(NFL[(8*(i-1)+1)]!="FINAL",NFL[(8*(i-1)+1)],Date)
    NFL <- if(i==1) {NFL} else if(NFL[(8*(i-1)+1)]==Date) {NFL} else 
      {c(NFL[1:(8*(i-1))],Date,NFL[(8*(i-1)+1):(length(NFL))])}
  }
  tmp <- data.frame(matrix(NFL,nrow=Games,byrow=TRUE))
  tmp <- tmp %>%
    mutate(Date=as.character(X1),AwayTeam=as.character(X3),
    AwayScore=as.numeric(as.character(X5)),HomeTeam=as.character(X8),
    HomeScore=as.numeric(as.character(X6))) %>%
    select(Date,AwayTeam,AwayScore,HomeTeam,HomeScore)
  tmp
}

Webscraping Functions

Statistics functions

Tiebreaking functions

NFLSim

NFLSim <- function(Games=NULL,sims=100,data=WeeklyUpdate()){
  require(rvest)
  require(tidyverse)
  rawscore <- data %>% 
      gather(key="ID",value="Team",c(3,5)) %>% 
      mutate(Score=ifelse(ID=="AwayTeam",AwayScore,HomeScore)) %>% 
      select(Team,Score)
  rawagainst <- data %>% 
      gather(key="ID",value="Team",c(3,5)) %>% 
      mutate(Score=ifelse(ID=="AwayTeam",HomeScore,AwayScore)) %>% 
      select(Team,Score)
  statsscore <- rawscore %>% 
        group_by(Team) %>% 
        summarise(mean=mean(Score,na.rm=TRUE),sd=sd(Score,na.rm=TRUE))
  statsagainst <- rawagainst %>% 
        group_by(Team) %>% 
        summarise(mean=mean(Score,na.rm=TRUE),sd=sd(Score,na.rm=TRUE))
  tmp <- data
  x <- Games

NFLSim cont.

  if (is.null(x)==FALSE){
    y <- gregexpr("s[^a-z]*[0-9]+",x)
    Spot <- as.vector(y[[1]])
    Lens <- attr(y[[1]],"match.length")
    z <- matrix(c(Spot,Lens),nrow=2,byrow=TRUE)
    z[2,] <- apply(z,2,function(x){x[2]=x[1]+x[2]})
    z[2,ncol(z)] <- nchar(x)
    a <- as.vector(c(0,z))
    b <- 0
    for (i in 1:(length(a)-1)) {
      b[i] <- substr(x,a[i]+1,a[i+1])
    }
    b[seq_along(b)[seq_along(b) %% 2 ==0]] <- 
      gsub("[^0-9]","",b[seq_along(b) %% 2 == 0])
    c <- data.frame(matrix(b,ncol=4,byrow=TRUE))
    colnames(c) <- c("AwayTeam","AwayScore","HomeTeam","HomeScore")

NFLSim cont.

    for (j in 1:nrow(c)){
      if (length(which(data$AwayTeam==c$AwayTeam[1] & 
                  data$HomeTeam==c$HomeTeam[1] & 
                  is.na(data$AwayScore))) == 0) {
        stop(paste(
        c$AwayTeam[1]," at ",c$HomeTeam[1],
        " is not a game still remaining to be played.",sep=""))
      }
      tmp$AwayScore[tmp$AwayTeam==c$AwayTeam[j] & 
                    tmp$HomeTeam==c$HomeTeam[j] & 
                    is.na(tmp$AwayScore)] <- c$AwayScore[j]
      tmp$HomeScore[tmp$AwayTeam==c$AwayTeam[j] & 
                    tmp$HomeTeam==c$HomeTeam[j] & 
                    is.na(tmp$HomeScore)] <- c$HomeScore[j]
    }
  }

NFLSim cont.

  for (s in 1:sims){
    tmp$AwayScore <- as.numeric(apply(dat[,3:6],1,function(x){
      ifelse(is.na(x[2]),
        ifelse(max(round(rnorm(1,
          mean=mean(c(statsscore$mean[statsscore$Team==x[1]],statsagainst$mean[statsagainst$Team==x[3]])),
          sd=sqrt((statsscore$sd[statsscore$Team==x[1]]^2+(statsagainst$sd[statsagainst$Team==x[3]])^2)/4))),0)==1,
          2,
          max(round(rnorm(1,
          mean=mean(c(statsscore$mean[statsscore$Team==x[1]],statsagainst$mean[statsagainst$Team==x[3]])),
          sd=sqrt((statsscore$sd[statsscore$Team==x[1]]^2+(statsagainst$sd[statsagainst$Team==x[3]])^2)/4))),0)),x[2])}))
    tmp$HomeScore <- as.numeric(apply(dat[,3:6],1,function(x){
      ifelse(is.na(x[4]),
        ifelse(max(round(rnorm(1,
          mean=mean(c(statsscore$mean[statsscore$Team==x[3]],statsagainst$mean[statsagainst$Team==x[1]])),
          sd=sqrt((statsscore$sd[statsscore$Team==x[3]]^2+(statsagainst$sd[statsagainst$Team==x[1]])^2)/4))),0)==1,
          2,
          max(round(rnorm(1,
          mean=mean(c(statsscore$mean[statsscore$Team==x[3]],statsagainst$mean[statsagainst$Team==x[1]])),
          sd=sqrt((statsscore$sd[statsscore$Team==x[3]]^2+(statsagainst$sd[statsagainst$Team==x[1]])^2)/4))),0)),x[4])}))
    tm3 <- FinalRank(scores=tmp)
    y <- tm3 %>% select(Team,ConfRank)
    if (s==1) {x<-y} else {x<-full_join(x,y,by="Team")}
    tmp <- data
  }
  n <- c(1:sims)
  colnames(x) <- c("Team",sapply(n,function(x){paste("Sim",x,sep="")}))
  x
}

Example

set.seed(513)
library(devtools)
install_github("mstuart2097/STAT585Final")
library(ProjectNFL)
dat <- WeeklyUpdate(16)
tmp <- NFLSim(data=dat)
head(tmp[,c(1:11)])

Graph Function

SeedPlot <- function(TeamX="Bills",Sim=NFLSim(data=WeeklyUpdate()),Plotly=TRUE){
  require(tidyverse)
  require(plotly)
  t <- Sim %>% filter(Team==TeamX) %>% select(-Team)
  count <- sapply(c(1:16),function(x){length(which(t==x))})
  num <- data.frame(count)
  num$seed <- c(1:16)
  gg <- ggplot(num,aes(x=seed,y=count))+geom_bar(stat="identity")+
    xlab("Seed")+ylab("Frequency")+ggtitle(paste("Seeding Plot of",TeamX))+
    theme(plot.title = element_text(hjust = 0.5))
  if(Plotly==TRUE) {ggplotly(gg)} else {gg}
}

Graph Example

SeedPlot(TeamX="Raiders",Sim=tmp,Plotly=TRUE)

Thank You!



mstuart2097/STAT585Final documentation built on May 23, 2019, 8:16 a.m.