R/linear.R

#' Linear MOV Elo Ratings
#'
#' This function calculates MOV Elo ratings using a linear model that uses information only from the MOV.
#'
#' @param winners. Character vector or formula specifying the winners of each result
#' @param losers. Character vector or formula specifying the losers of each result
#' @param margin. Numeric vector vector or formula specifying the margin of victory, given as winner score - loser score
#' @param k.margin. Numeric value of the learning rate to be applied to the MOV
#' @param scale.margin. Numeric scaling factor applied in the expectation step for the MOV
#' @param data. Data frame containing winner, loser, and margin variables if using a data/formula specification.
#' @param default. Numeric value of the initial rating to assign to new competitors
#'
#' @return A data frame with Elo ratings before and after each event result.
#'
#' @section Details:
#' Datasets should be ordered from first game result to last.
#' Competitors must be uniquely and consistently identified in the winner and loser vectors.
#' Missing values in the MOV variable will be omitted and will throw a warning. 
#' 
#' The E-step for the linear model involves a proportional model for the MOV in terms of the difference in player ratings. The expected margin for player \eqn{i} 
#' against \eqn{j} is:
#' \deqn{\hat{MOV} = \frac{R_i - R_j}{\sigma_{margin}}}.
#' A reasonable choice for \eqn{\sigma_{margin}} is 10 to 30 times the standard deviation of the MOV variable.
#' The U-step for the linear model involves updates based on the residual for the MOV only. In terms of the \eqn{i}th player, 
#' \deqn{R_{i+1} = R_i + K_{margin} (MOV_{ij} - \hat{MOV}_{ij})}.
#' The unknown parameter in the update step is the constant learning rate\eqn{K_{margin}}. A reasonable choice for \eqn{K_{margin}} is the standard deviation of the MOV variable.
#' @examples
#' # Grand Slam MOV Elo Rating
#' ratings <- linear(~ winner, ~loser, ~ game_margin, data = atp_games, k.margin = 3,  scale.margin = 75)
#' @export
linear <- function(winners, losers, margin, k.margin, scale.margin, data, default = 1500) {		

	   ratings <- list()
	   
	   if(missing(data)){
		   winners <- as.character(winners)
		   losers <- as.character(losers)
		}
		else{
			v <- variables(winners, losers, margin, data)						
			winners <- as.character(v[[1]])
			losers <- as.character(v[[2]])
			margin <- v[[3]]
		}
	   
	   if(any(is.na(margin))){
	   	
	   		
	   		warning("Missing values in MOV found and will be excluded.")
	   		
	   		exclude <- is.na(margin)
	   		winners <- winners[!exclude]
	   		losers <- losers[!exclude]
	   		margin <- margin[!exclude]
	   }
	   
	   nlength <- length(winners)
		
	   results <- data.frame(
	   						winner = winners,
	   						loser = losers,
	   						winner_margin = margin,
	   						winner_before_elo = numeric(nlength),
	   						loser_before_elo = numeric(nlength),	   					
	   						margin_prediction = numeric(nlength),
	   						winner_elo = numeric(nlength), 
	              			loser_elo = numeric(nlength),
	              			stringsAsFactors = F
	              			)
              			
	
	   for (i in 1:nlength) {
		
	    cur_winner <- winners[i]
	    cur_loser <- losers[i]
	
	    winner_elo <- lookup(cur_winner, ratings, default)
	    loser_elo <- lookup(cur_loser, ratings, default)
	
	    winner_margin <- (winner_elo - loser_elo) / scale.margin
	  	    	    
	    winner_update <- k.margin * (margin[i] - winner_margin)  
	 	loser_update <-  -1 * k.margin * (margin[i] - winner_margin) 
	     	   
	    ratings[[cur_winner]] = winner_elo + winner_update 
	    ratings[[cur_loser]] = loser_elo + loser_update 	   
	    results$margin_prediction[i] <- winner_margin
	    results$winner_elo[i] <- ratings[[cur_winner]]
	    results$loser_elo[i] <- ratings[[cur_loser]]
	    results$winner_before_elo[i] <- winner_elo
	    results$loser_before_elo[i] <- loser_elo	    
	    
	  }
	  
results		
}
GIGTennis/elomov documentation built on June 15, 2019, 12:01 a.m.