Nothing
#' @title ELO calculation using pit stop data
#' @description Function to calculate ELO ratings from a pit stop dataset
#' @details
#' The function calculates ELO ratings, using the position of each team's pit stop in each round.
#'
#' By default, the \strong{median} stat is used because it better reflects global pit stop performance across teams.
#' It is less sensitive to extreme real data values and outliers, but provides more stability than the mean. For example, a single poor pit stop does not significantly affect efficiency.
#'
#' Meanwhile, the \strong{mean} is more affected by small sample sizes or outliers, such as penalties served by a driver during pit stops, but it is more sensitive to pit stop mistakes.
#'
#' The last statistic uses the best pit stop result by team, ignoring consistency and reflecting maximum peak of performance potential.
#'
#' On the other hand, two methodologies can be applied for ELO calculation: \strong{sequential} or \strong{batch}.
#'
#' In the \strong{sequential approach}, the ELO is updated after each match intra-event, so depends on the order in which matchups are processed.
#' This changes future expectations within the same race, reinforcing early leadership while reducing later gains for the leader and smoothing final differences.
#' Consequently, this approach is more suitable for analyzing individual races.
#'
#' In contrast, the \strong{batch approach} updates the ELO only after the entire event has finished. Race matchups are not considered immediately; instead they are aggregated afterward, making long-term comparisons more fair, neutral and independent of temporal order.
#'
#' \code{k} parameter controls the magnitude of change in each ELO cycle calculation.
#'
#' \code{c} establishes the win probability ratio between two teams based on a specific difference in rating points.
#'
#' \code{d} is the expected probability to win (sensitivity).
#'
#' Default values \code{k}, \code{c} and \code{d} used are based on Hvattum and Arntzen (2010). With these default values, a 400 point ELO difference implies that the stronger team has 10 times more chances of winning than the weaker team.
#' @param pits_data Tibble data generated by the pits() function
#' @param stat_fun Type of stat used in ELO calculations: median (1, by default), mean (2) and min value (3) position
#' @param calc Type of calculation: batch (1, by default) or sequential (2)
#' @param k Weight factor magnitude (velocity) to change ELO ratings (by default, 20)
#' @param c Base factor that establishes the win probability ratio between two teams for a given rating difference in the score ELO calculation (by default, 10)
#' @param d Scaling factor amplitude that determines the sensitivity of the expected win probability to rating differences in the score ELO calculation (by default, 400)
#' @param fml Team family mode. Collapse the different names of the same team structure (by default TRUE, enabled)
#' @param elo ELO provided (if is omitted, 1000 will be used for all by default)
#' @return A tibble containing the ELO calculations
#' @examples
#' \donttest{
#' pitstop_data_elo_example <- tibble::tibble(
#' Pos. = 1:11,
#' Team = c("Team_3","Team_1","Team_1","Team_1","Team_1",
#' "Team_2","Team_1","Team_3","Team_3","Team_2","Team_3"),
#' Driver = rep("Driver", 11),
#' "Time (sec)" = c(2.18, 2.21, 2.24, 3, 4, 4.04, 4.07, 4.08, 7.88, 8.88, 14.54),
#' Lap = 1:11,
#' Points = rep(0, 11),
#' Round = rep(0, 11),
#' Year = rep(2026, 11))
#' pitelo(pitstop_data_elo_example)
#' pitelo(pitstop_data_elo_example, stat_fun = 2)
#' pitelo(pitstop_data_elo_example, stat_fun = 3)
#' }
#' @references Elo, A. E. (1978). \emph{The rating of chessplayers, past and present}. Arco Publishing. ISBN-10: 0668047216.
#'
#' Glickman, M. E., & Jones, A. C. (1999). Rating the chess rating system. \emph{CHANCE-BERLIN THEN NEW YORK-}, 12, 21-28.
#'
#' Hvattum, L. M., & Arntzen, H. (2010). Using ELO ratings for match result prediction in association football. \emph{International Journal of Forecasting}, 26(3), 460--470. \doi{10.1016/j.ijforecast.2009.10.002}.
#' @importFrom dplyr filter count slice pull mutate recode group_by summarise distinct arrange inner_join desc
#' @importFrom tibble tibble
#' @importFrom stats median setNames
#' @export
pitelo <- function(pits_data, stat_fun = 1, calc = 1, k=20, c=10, d=400, fml = TRUE,elo=NULL) {
### 0.a Checking arguments
if (is.null(stat_fun)) {
stat_fun <- 1 # default
}
if (stat_fun == 1) {
message("Using default median stats")
} else if (stat_fun == 2) {
message("Using mean stats")
} else if (stat_fun == 3) {
message("Using min stats")
} else {
stop("stat_fun argument must be 1, 2 or 3")
}
param1 <- switch(as.character(stat_fun),
"1" = median, # median, default (similar to mean, but you lost sensibility to real extreme data, but more stable than mean for outliers -for one bad pitstop not down your efficiency-
"2" = mean, # mean: global pitstop pace. This penalty teams without data
"3" = min) # min (IGNORES consistency, it reflects max-potential, peak-potential)
if (is.null(calc)) {
calc <- 1 # default
}
if (calc == 1) {
message("Using default batch calc")
} else if (calc == 2) {
message("Using sequential calc")
} else {
stop("calc argument must be 1 or 2")
}
### 0.b PREPROCESSING RAW DATA (CONVERSION TEAM NAMES FROM SAME FAMILY TEAM)
if (fml) {
message("Team family mode enabled")
if (!"Year" %in% names(pits_data)) {
stop("Column 'Year' is required in pits_data")
}
team_family <- list(
c("Toro Rosso","AlphaTauri", "RB", "Racing Bulls"),
c("Renault", "Alpine"),
c("Sauber", "Alfa Romeo", "Audi"),
c("Force India", "Racing Point", "Aston Martin"))
latest_year <- max(pits_data$Year, na.rm = TRUE)
team_map <- unlist(lapply(team_family, function(group) {
tmp <- pits_data %>%
filter(Team %in% group, Year == latest_year) %>%
count(Team)
if (nrow(tmp) == 0) {
return(setNames(group, group))
}
last_team <- tmp %>%
slice(1) %>%
pull(Team)
setNames(rep(last_team, length(group)), group)
}))
pits_data2 <- pits_data %>%
mutate(
Team = recode(Team, !!!team_map)
)
} else {
message("Team family mode disabled")
pits_data2 <- pits_data
}
### 0.c ELO base value
if (is.null(elo) || !is.data.frame(elo)) {
elo <- data.frame(
Team = unique(pits_data2$Team),
Rating = 1000
)
message("elo_data is missing or is not a data.frame \nUsing ELO default value (1000)")
}
### 1. Data observations race by race
team_race <- pits_data2 %>%
group_by(Year, Round, Team) %>%
summarise(
team_score = param1(Pos.), # mean: global pitstop pace. This penalty teams without data or teams with drivers penalties in pits to serve
.groups = "drop" # min (IGNORES consistency, it refects max-potential, peak-potential)
) # median, default (similar to mean, but you lost sensibility to real extreme data, but more stable than mean for outliers -for one bad pitstop or driver penalty not down a lot your efficiency-
### 3. ELO function
elo_update <- function(RA, RB, S, K = k) { # k: Velocity update. Small time differences, must be low
EA <- 1 / (1 + c ^ ((RB - RA) / d)) # expected probability, sensibility model, how much i points i must win?
RA + K * (S - EA) # Standard values based from: https://doi.org/10.1016/j.ijforecast.2009.10.002
}
### 4. Order
schedule <- team_race %>%
distinct(Year, Round) %>%
arrange(Year, Round)
### 5. race by race ELO calc (argument)
if (calc == 1) {
### loop carreras
for (ki in 1:nrow(schedule)) {
y <- schedule$Year[ki]
r <- schedule$Round[ki]
race <- team_race %>%
filter(Year == y, Round == r)
pairwise <- race %>%
inner_join(
race,
by = c("Year", "Round"),
relationship = "many-to-many"
) %>%
filter(Team.x < Team.y) %>%
mutate(
S = ifelse(team_score.x < team_score.y, 1, 0)
)
### ratings PRE-carrera
old_ratings <- elo$Rating
names(old_ratings) <- elo$Team
### acumulador de cambios
delta <- setNames(rep(0, nrow(elo)), elo$Team)
### recorrer duelos
for (i in 1:nrow(pairwise)) {
A <- pairwise$Team.x[i]
B <- pairwise$Team.y[i]
S <- pairwise$S[i]
RA <- old_ratings[A]
RB <- old_ratings[B]
### calcular nuevos ratings hipotéticos
new_A <- elo_update(RA, RB, S, K = k)
new_B <- elo_update(RB, RA, 1 - S, K = k)
### convertir a cambios (IMPORTANTÍSIMO)
change_A <- new_A - RA
change_B <- new_B - RB
### acumular
delta[A] <- delta[A] + change_A
delta[B] <- delta[B] + change_B
}
### aplicar TODO al final
elo$Rating <- elo$Rating + delta
}
} else if (calc == 2) {
for (ki in 1:nrow(schedule)) {
y <- schedule$Year[ki]
r <- schedule$Round[ki]
race <- team_race %>%
filter(Year == y, Round == r)
pairwise <- race %>%
inner_join(
race,
by = c("Year", "Round"),
relationship = "many-to-many"
) %>%
filter(Team.x < Team.y) %>%
mutate(
S = ifelse(team_score.x < team_score.y, 1, 0)
)
for (i in 1:nrow(pairwise)) {
A <- pairwise$Team.x[i]
B <- pairwise$Team.y[i]
S <- pairwise$S[i]
RA <- elo$Rating[elo$Team == A]
RB <- elo$Rating[elo$Team == B]
elo$Rating[elo$Team == A] <- elo_update(RA, RB, S, K = k)
elo$Rating[elo$Team == B] <- elo_update(RB, RA, 1 - S, K = k)
}
}
}
elo_final <- elo %>%
arrange(desc(Rating))
return(elo_final)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.