\tableofcontents
library(tidyverse)
games <- readr::read_csv('data/afl.csv', col_types = cols( Date = col_character(), AwayTeam = col_character(), AwayScore = col_double(), HomeTeam = col_character(), HomeScore = col_double(), IsNeutralSite = col_logical()) ) games <- data.frame(games, stringsAsFactors = FALSE) games$IsNeutralSite <- FALSE games$Date <- lubridate::parse_date_time(games$Date, 'ymd IM p', tz = 'Australia/Melbourne') games$GameId <- seq_len(nrow(games)) gameCount <- nrow(games) print(gameCount)
Get list of AFL games. Games have the time of the games, the names of the teams involved, and the final score of the game. Additionally, a game ID is generated for each game. There are r gameCount
games.
games$GameTotal <- games$AwayScore + games$HomeScore games$HomeMOV <- games$HomeScore - games$AwayScore # MOV = Margin of Victory
For each game, the GameTotal (sum of both scores) and HomeMOV (Margin Of Victory for the home team) are calculated.
source('R/BradleyTerry.R') bradleyTerryResults <- bradley_terry(games$GameId, games$HomeTeam, games$AwayTeam, games$HomeScore, games$AwayScore) bradleyTerryRatings <- bradleyTerryResults$teamStrengths ratingsDisplay <- data.frame(Team = names(bradleyTerryResults$teamStrengths), Rating = bradleyTerryResults$teamStrengths, stringsAsFactors = FALSE) rownames(ratingsDisplay) <- NULL knitr::kable(ratingsDisplay, caption = '2018 AFL Team Ratings')
The team ratings, per the Bradley Terry method, can be found above.
source('R/SetupGames.R') sseMinFunction <- function(homeStrength, awayStrength, homeCoef, awayCoef, homeAdvCoef){ return(homeAdvCoef + (homeStrength * homeCoef) + (awayStrength * awayCoef)) } f <- function(p, teamStrengths, games){ # p = PARAMETERS # p[1] = Home Field Advantage Coefficient # p[2] = Home Strength Coefficient # p[3] = Away Strength Coefficient g <- games %>% mutate(HomeStrength = teamStrengths[HomeTeamId], AwayStrength = teamStrengths[AwayTeamId], SseMinResult = sseMinFunction( homeStrength = HomeStrength, awayStrength = AwayStrength, homeAdvCoef = p[1], homeCoef = p[2], awayCoef = p[3]), MarginOfVictoryErrSq = (HomeMarginOfVictory - SseMinResult) ** 2) result <- sum(g$MarginOfVictoryErrSq) return(result) }
source('R/SetupGames.R') g <- setup_games(gameIds = games$GameId, homeTeamIds = games$HomeTeam, awayTeamIds = games$AwayTeam, homeScores = games$HomeScore, awayScores = games$AwayScore, isNeutralSite = FALSE)
p <- rep(1, times = 3) coefficientOptimization <- nlm(f, p, teamStrengths = bradleyTerryRatings, games = g) coefs <- list(HomeFieldAdv = coefficientOptimization$estimate[1], HomeStr = coefficientOptimization$estimate[2], AwayStr = coefficientOptimization$estimate[3]) print(coefs)
g <- g %>% mutate(EstimatedSpread = sseMinFunction(homeStrength = as.numeric(bradleyTerryRatings[HomeTeamId]), awayStrength = as.numeric(bradleyTerryRatings[AwayTeamId]), homeAdvCoef = as.numeric(coefs['HomeFieldAdv']), homeCoef = as.numeric(coefs['HomeStr']), awayCoef = as.numeric(coefs['AwayStr'])))
m <- lm(formula = HomeMarginOfVictory ~ EstimatedSpread, data = g) stdDev <- summary(m)$sigma g <- g %>% mutate(HomeWinProb = 1 - pnorm(0, mean = EstimatedSpread, sd = stdDev), PredictedResult = ifelse(HomeWinProb > 0.5, 1, 0), IsResultPredicted = ifelse(GameResult == PredictedResult, 1, 0), PredictionError = EstimatedSpread - GameResult, ProbErrorSq = (GameResult - PredictedResult) ** 2, LogError = (GameResult * log(HomeWinProb)) + ((1-GameResult) * log(1 - HomeWinProb))) coefIntercept <- m$coefficients['(Intercept)'] coefSpread <- m$coefficients['EstimatedSpread']
benchmarks <- data.frame(RawAccuracy = mean(g$IsResultPredicted), RSQ = summary(m)$r.squared, RMSE = sqrt(mean(g$PredictionError ** 2)), MAE = mean(abs(g$PredictionError)), BrierScore = mean(g$ProbErrorSq), LogLoss = -1 * mean(g$LogError))
predictByIds <- function(homeTeamId, awayTeamId, homeSpread = 0){ homeStrength <- bradleyTerryRatings[as.character(homeTeamId)] awayStrength <- bradleyTerryRatings[as.character(awayTeamId)] return(predict(homeStrength, awayStrength, homeSpread)) } predict <- function(homeStrength, awayStrength, homeSpread = 0){ homeGoalsFavored <- -1 * homeSpread awayGoalsFavored <- -1 * homeGoalsFavored sseMinResult <- sseMinFunction(homeStrength = homeStrength, awayStrength = awayStrength, homeAdvCoef = as.numeric(coefs['HomeFieldAdv']), homeCoef = as.numeric(coefs['HomeStr']), awayCoef = as.numeric(coefs['AwayStr'])) predictedHomeSpread <- as.numeric(coefIntercept + (coefSpread * sseMinResult)) predictedAwaySpread <- -1 * predictedHomeSpread homeWinPct <- 1 - pnorm(homeGoalsFavored + ifelse(homeGoalsFavored%%1==0, 0.5, 0), mean = predictedHomeSpread, sd = stdDev) awayWinPct <- 1 - pnorm(awayGoalsFavored + ifelse(awayGoalsFavored%%1==0, 0.5, 0), mean = predictedAwaySpread, sd = stdDev) drawWinPct <- 1 - (homeWinPct + awayWinPct) result <- list(HomeSpread = homeSpread, HomeWinPct = homeWinPct, DrawWinPct = drawWinPct, AwayWinPct = awayWinPct) return(result) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.