R/logistic.R

#' Logistic MOV Elo Ratings
#'
#' This function calculates MOV Elo ratings using a logistic model that combines information for both the win result and MOV result.
#'
#' @param winners. Character vector or formula specifying the winners of each result
#' @param losers. Character vector or formula specifying the losers of each result
#' @param margin. Numeric vector vector or formula specifying the margin of victory, given as winner score - loser score
#' @param k.win. Numeric value of the learning rate to be applied to the win result
#' @param scale.margin. Numeric scaling factor applied in the expectation step for the MOV
#' @param scale.win. Numeric scaling factor applied in the expectation step for the win prediction
#' @param alpha. Numeric base rate for the logistic multiplication factor.
#' @param data. Data frame containing winner, loser, and margin variables if using a data/formula specification.
#' @param default. Numeric value of the initial rating to assign to new competitors
#'
#' @return A data frame with Elo ratings before and after each event result.
#'
#' @section Details:
#' Datasets should be ordered from first game result to last.
#' Competitors must be uniquely and consistently identified in the winner and loser vectors.
#' Missing values in the MOV variable will be omitted and will throw a warning. 
#' 
#' The E-step for the logistic model is a generalized of the standard Elo model. For the win outcome:
#' \deqn{\hat{W} = \frac{1}{1+\alpha^({R_j - R_i}{\sigma_{win}})}}.
#' In the standard Elo system, \eqn{sigma_{win} = 400} and \eqn{\alpha = 10}. 
#' The U-step for the logistic model involves updates based on the residual for the win prediction compared to a 0-1 logistic transformation of the MOV. In terms of the \eqn{i}th player, 
#' \deqn{R_{i+1} = R_i + K_{win} (L(MOV_{ij}/\sigma_{margin}) - L((R_i - R_j)/\sigma_{win}))}, 
#' where \eqn{L(x) = 1/(1 + \alpha^-x)}.
#' The unknown parameters are the scaling factors \eqn{\sigma_{margin}}, \eqn{\sigma_{win}}, the base rate \eqn{\alpha} and learning rate \eqn{K_{win}}. Choices for \eqn{\sigma_{margin}} are on the scale of twice the standard deviation of the MOV, in keeping with the scale of the win exponent. Typical values for \eqn{K_{win}} are 3 times or more of the value of \eqn{K} in the standard Elo system. A reasonable choice for \eqn{\alpha} is an integer between 2 and 10. 
#' @examples
#' # Grand Slam MOV Elo Rating
#' ratings <- logistic(~ winner, ~loser, ~ game_margin, data = atp_games, alpha = 10,  k.win = 60, scale.margin = 4, scale.win = 400)
#' @export
logistic <- function(winners, losers, margin, k.win, scale.margin, scale.win, alpha,  data, default = 1500) {		

	   ratings <- list()
	   
	   if(missing(data)){
		   winners <- as.character(winners)
		   losers <- as.character(losers)
		}
		else{
			v <- variables(winners, losers, margin, data)						
			winners <- as.character(v[[1]])
			losers <- as.character(v[[2]])
			margin <- v[[3]]
		}
	   
	   if(any(is.na(margin))){
	   	
	   		warning("Missing values in MOV found and will be excluded.")
	   	
	   		exclude <- is.na(margin)
	   		winners <- winners[!exclude]
	   		losers <- losers[!exclude]
	   		margin <- margin[!exclude]
	   }
	   
	   nlength <- length(winners)
		
	   results <- data.frame(
	   						winner = winners,
	   						loser = losers,
	   						winner_margin = margin,
	   						winner_before_elo = numeric(nlength),
	   						loser_before_elo = numeric(nlength),
	   						win_prediction = numeric(nlength),
	   						winner_elo = numeric(nlength), 
	              			loser_elo = numeric(nlength),
	              			stringsAsFactors = F
	              			)
              			
	
	   for (i in 1:nlength) {
		
	    cur_winner <- winners[i]
	    cur_loser <- losers[i]
	
	    winner_elo <- lookup(cur_winner, ratings, default)
	    loser_elo <- lookup(cur_loser, ratings, default)
	
	    winner_prob <- 1/(1 + alpha^(-1 * (winner_elo - loser_elo)/scale.win))
	    	margin_prob <- 1/(1 + alpha^(-margin[i]/scale.margin))
	    	    
	    winner_update <- k.win * (margin_prob - winner_prob)

	    loser_update <-  -winner_update
	    	     	   
	    ratings[[cur_winner]] = winner_elo + winner_update 
	    ratings[[cur_loser]] = loser_elo + loser_update 	   
	    results$win_prediction[i] <- winner_prob
	    results$winner_elo[i] <- ratings[[cur_winner]]
	    results$loser_elo[i] <- ratings[[cur_loser]]
	    results$winner_before_elo[i] <- winner_elo
	    results$loser_before_elo[i] <- loser_elo	    
	    
	  }
	  
results		
}
GIGTennis/elomov documentation built on June 15, 2019, 12:01 a.m.