knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(sysmod)
pacman::p_load(tidyverse, optimx, caret, lubridate)

Variable Dose-response

Data initialisation

example_data <- data.frame("training_load" = c(rnorm(100, mean = 1000, sd = 150),
                                               rep(0,50)),
                           "rest" = c(rnorm(100, mean= 2, sd=1),
                                      rep(1,50)),
                           "perf" = c(seq(from = 10, to = 30, length.out = 100),
                                      rep(0,50)) * rnorm(150, 1, 0.05),
                           "datetime" = seq(ISOdate(2020, 1, 1), by = "day", length.out = 150))

A simulated data set with 150 observations, including training sessions, rest days and performances is used for the purpose of functions presentation. A gaussian noise is applied to the simulated performances. Initial values of gain and time constants are settled according to the values found in the litterature.

data <- example_data
target <- "perf"
vars <- list("input" = example_data$training_load, "time" = example_data$rest)
k1 = 0.1
k3 = 0.01
tau1 = 40
tau2 = 20
tau3 = 5

Adaptations calculation

adaptation_fn <- function(data, k1, tau1, vars){

  adapt_val <- vector(length=nrow(data))

  # Make the function return aberration if the time constant takes a negative or 0 value
  if (is.na(tau1)) adapt_val <- rep(-9000, nrow(data))
  else if(tau1 == 0) adapt_val <- rep(-9000, nrow(data))
  else {
    adapt_val[1] <- 0
    for(i in 2:nrow(data)){
      adapt_val[i] <- (k1*vars[["input"]][i-1] + adapt_val[i-1])*exp(-vars[["time"]][i]/(tau1))
    }
  }
  return(adapt_val)
}

adaptations <- adaptation_fn(data = example_data, k1 = 0.5, tau1 = 40, vars = list("input" = example_data$training_load, "time" = example_data$rest))
plot(adaptations)

Fatigue calculation

k2i_fn <- function(data, k3, tau3, vars){

  k2i_val <- vector(length=nrow(data))

  if (is.na(tau3)) k2i_val <- rep(-9000, nrow(data))
  else if(tau3==0) k2i_val <- rep(-9000, nrow(data))
  else {
    k2i_val[1] <- 0
    for(i in 2:nrow(data)){
      k2i_val[i] <- (k3*vars[["input"]][i-1] + k2i_val[i-1])*exp(-vars[["time"]][i]/(tau3))
    }
  }
  return(k2i_val)
}
fatigue_fn <- function(data, k3, tau2, tau3, vars){
  fat <- vector(length=nrow(data))
  if (is.na(tau3) | is.na(tau2)) apt <- rep(-9000, nrow(data))
  else if (tau3==0 | tau2==0) fatigue <- rep(-9000, nrow(data))
  else {
    fat[1] <- 0
    k2i <- k2i_fn(data, k3, tau3, vars)
    for(i in 2:nrow(data)){
      fat[i] <- (k2i[i-1]*vars[["input"]][i-1]+fat[i-1])*exp(-vars[["time"]][i]/(tau2))
    }
  }
  return(fat)
}

fatigue <- fatigue_fn(data = example_data, k3 = 0.1, tau2 = 10, tau3 = 5, vars = list("input" = example_data$training_load, "time" = example_data$rest))
plot(fatigue)

The model

Equation

Eq. :

$$\hat{p}^n = p^{*} + k_1 \: \sum_{i=1}^{n-1} w_i \: e^{\frac{-(n-i)}{\tau1}} \: - \: \sum_{i=1}^{n-1} k^{i}_2 \: w_i \cdot e^{\frac{-(n-i)}{\tau_2}}$$

init_perf <- function(data, target){
  return(data %>% dplyr::filter(all_of(target) != 0) %>%
           data.table::first() %>%
           dplyr::select(all_of(target)) %>%
           as.numeric())
}
real_perf <- function(data, target){
  res <- NULL
  res <- data[,target]
  res[is.na(res)] <- 0
  return(res)
}

Parameter estimates

perf_model <- function(data, P0, k1, k3, tau1, tau2, tau3, vars, target){

  apt <- adaptation_fn(data, k1, tau1, vars)
  fat <- fatigue_fn(data, k3, tau3, tau2, vars)
  res <- vector(length = length(fat))
  P0 <- P0
  obs <- real_perf(data, target)

  for(i in 1:length(fat)){
    ifelse(obs[i] != 0,
           res[i] <- P0 + apt[i] - fat[i],
           res[i] <- 0)
  }
  return(res)
}
RSS <- function(theta, data, target, vars){
  y <- real_perf(data, target)
  y_hat <- perf_model(data, P0=theta[1], k1=theta[2], k3=theta[3], tau1=theta[4], tau2=theta[5], tau3=theta[6], vars, target)
  diff <- rep(0, length=length(y))

  for(i in 1:length(y)){
    if(y[i]!=0){
      diff[i] <- y[i]-y_hat[i]
    }
  }
  rss <- sum((diff)^2)
  return(rss)
}

Main function

These functions being presented, we can model the performance according to the variable dose-response model (Busso, 2003).

Performance modelling within a simple 80 / 20 data split for model validation

P0_init <- init_perf(data = data, target = target)
theta_init <- c(P0_init = init_perf(data = example_data, target = "perf"), k1_init = 0.5, k3_init = 0.1, tau1_init = 40, tau2_init = 20, tau3_init = 5)
lower <- c(P0_init - 0.10 * P0_init, 0, 0, 10, 1, 1)
upper <- c(P0_init, 1, 1, 80, 40, 10)
model_results <- sysmod(data = example_data,
       vars = list("input" = example_data$training_load, "time" = example_data$rest),
       target = "perf", date_ID = "datetime",
       specify = list("theta_init" = theta_init, "lower" = lower, "upper" = upper, "optim.method" = "nlm"),
       validation.method = "simple",
       specs = list("initialWindow" = 0.8, "horizon" = 0.2, "fixedWindow" = FALSE))
res <- data.frame("RMSE" = model_results[["rmse_vec"]],
                  "MAE" = model_results[["MAE_vec"]],
                  "Rsquared" = model_results[["Rsq_vec"]])
knitr::kable(x = res, format = "simple", digits = 3)

Performance modelling within a time series cross-validation.

 model_results_TSCV <- sysmod(data = example_data,
      vars = list("input" = example_data$training_load, "time" = example_data$rest),
      target = "perf", date_ID = "datetime",
      specify = list("theta_init" = theta_init, "lower" = lower, "upper" = upper, "optim.method" = "nlm"),
      validation.method = "TS-CV",
      specs = list("initialWindow" = 50, "horizon" = 15, "fixedWindow" = FALSE))
res_TSCV <- data.frame("RMSE" = mean(model_results_TSCV[["rmse_vec"]]),
                  "MAE" = mean(model_results_TSCV[["MAE_vec"]]),
                  "Rsquared" = mean(model_results_TSCV[["Rsq_vec"]]))
knitr::kable(x = res_TSCV, format = "simple", digits = 3)
rmse_train <- c(model_results_TSCV$dfs %>%
  filter(base == "train") %>%
  group_by(folder) %>%
  summarise(rmse = caret::RMSE(pred = predicted, obs= perf)) %>%
  dplyr::select(rmse)) %>%
  unlist() %>% as.numeric()

rmse_test <- c(model_results_TSCV$dfs %>%
  filter(base == "test") %>%
  group_by(folder) %>%
  summarise(rmse = caret::RMSE(pred = predicted, obs= perf)) %>%
  dplyr::select(rmse)) %>%
  unlist() %>% as.numeric()


df_boxplot <- data.frame(rmse_train = rmse_train, 
                         rmse_test = rmse_test) %>%
  pivot_longer(cols = c("rmse_train", "rmse_test"), names_to = "base") %>%
  mutate(base = as.factor(base))
df_boxplot$base <- factor(df_boxplot$base, levels = c("rmse_train", "rmse_test"))


ggplot(df_boxplot, mapping = aes(x = base, y = value, colour = base)) +
  geom_boxplot() +
  scale_colour_discrete(name = "Distributions") +
  ylab("RMSE") +
  theme_classic()
df_to_plot2 <- model_results_TSCV$dfs %>% filter(folder == max(folder)) %>%
  mutate("model" = "model")

ggplot(data=df_to_plot2, mapping = aes(x = datetime, y = perf, colour = base)) +
  geom_point() +
  scale_colour_manual(name = "data",
                      breaks = c("train", "test"),
                      values = c("black", "red"))+
  geom_line(data=df_to_plot2, aes(x = datetime, y = predicted, linetype = as.factor(model)), col = "red")+
  scale_linetype_discrete(name = "")+
  xlab(label = "date")+
  ylab(label = "performance")+
  theme_classic()

References

Busso, T. Variable dose-response relationship between exercise training and performance. Med. Sci. Sports Exerc. 35,1188–1195 (2003).



fimbach/sysmod documentation built on Jan. 1, 2021, 1:21 a.m.