Nothing
## ---- include = FALSE---------------------------------------------------------
library(knitr)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE) # to supress R-CMD check
## to fold/hook the code
hook_output <- knit_hooks$get("output")
knit_hooks$set(output = function(x, options) {
lines <- options$output.lines
if (is.null(lines)) {
return(hook_output(x, options)) # pass to default hook
}
x <- unlist(strsplit(x, "\n"))
more <- "..."
if (length(lines) == 1) {
if (length(x) > lines) {
# truncate the output, but add ....
x <- c(head(x, lines), more)
}
} else {
x <- c(if (abs(lines[1]) > 1) more else NULL,
x[lines],
if (length(x) > lines[abs(length(lines))]) more else NULL
)
}
# paste these lines together
x <- paste(c(x, ""), collapse = "\n")
hook_output(x, options)
})
modern_r <- getRversion() >= "4.1.0"
## ----setup, message = F, warning = F------------------------------------------
library(injurytools)
library(dplyr)
library(stringr)
library(tidyr)
library(lme4)
library(pscl)
# library(glmmTMB)
library(MASS)
library(ggplot2)
library(gridExtra)
library(knitr)
## ----setup2, echo = F---------------------------------------------------------
# set the global theme of all plots
theme_set(theme_bw())
## ----datasetup1, warning = F--------------------------------------------------
## 17/18
df_exposures1718 <- prepare_exp(df_exposures0 =
raw_df_exposures |> filter(season == "17/18"),
player = "player_name",
date = "year",
time_expo = "minutes_played") |>
mutate(seasonb = date2season(date))
df_injuries1718 <- prepare_inj(df_injuries0 =
raw_df_injuries |> filter(season == "17/18"),
player = "player_name",
date_injured = "from",
date_recovered = "until")
injd1718 <- prepare_all(data_exposures = df_exposures1718,
data_injuries = df_injuries1718,
exp_unit = "matches_minutes")
## ----datasetup2, warning = F--------------------------------------------------
## 18/19
df_exposures1819 <- prepare_exp(df_exposures0 =
raw_df_exposures |> filter(season == "18/19"),
player = "player_name",
date = "year",
time_expo = "minutes_played") |>
mutate(seasonb = date2season(date))
df_injuries1819 <- prepare_inj(df_injuries0 =
raw_df_injuries |> filter(season == "18/19"),
player = "player_name",
date_injured = "from",
date_recovered = "until")
injd1819 <- prepare_all(data_exposures = df_exposures1819,
data_injuries = df_injuries1819,
exp_unit = "matches_minutes")
## ---- warning = F-------------------------------------------------------------
## calculate injury summary statistics
injds1718 <- injsummary(injd1718, quiet = T)
injds1819 <- injsummary(injd1819, quiet = T)
injds1718p <- injds1718$playerwise
injds1819p <- injds1819$playerwise
injdsp <- bind_rows("Season 17-18" = injds1718p,
"Season 18-19" = injds1819p,
.id = "season")
## ---- eval = F----------------------------------------------------------------
# ## plot
# p1 <- ggplot(data = injdsp) +
# geom_histogram(aes(x = injincidence, fill = season)) +
# facet_wrap(~season) +
# scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
# ylab("Number of players") +
# xlab("Incidence (number of injuries per 100 player-match)") +
# ggtitle("Histogram of injury incidence in each season") +
# theme(legend.position = "none")
#
# p2 <- ggplot(data = injdsp) +
# geom_histogram(aes(x = injburden, fill = season)) +
# facet_wrap(~season) +
# scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
# ylab("Number of players") +
# xlab("Burden (number of days lost due to injury per 100 player-match)") +
# ggtitle("Histogram of injury burden in each season") +
# theme(legend.position = "none")
#
# grid.arrange(p1, p2, ncol = 1)
## ---- echo = F, warning = F---------------------------------------------------
p1 <- ggplot(data = injdsp) +
geom_histogram(aes(x = injincidence, fill = season)) +
facet_wrap(~season) +
scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
ylab("Number of players") +
xlab("Incidence (number of injuries per 100 player-match)") +
ggtitle(bquote("Histogram of injury" ~ bold("incidence") ~ "in each season")) +
theme(legend.position = "none")
p2 <- ggplot(data = injdsp) +
geom_histogram(aes(x = injburden, fill = season)) +
facet_wrap(~season) +
scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
ylab("Number of players") +
xlab("Burden (number of days lost due to injury per 100 player-match)") +
ggtitle(bquote("Histogram of injury" ~ bold("burden") ~ "in each season")) +
theme(legend.position = "none")
## ---- eval = F----------------------------------------------------------------
# theme_counts <- theme(axis.text = element_text(size = rel(1.2)),
# axis.title = element_text(size = rel(1.3)),
# strip.text = element_text(size = rel(1.4)),
# plot.title = element_text(size = rel(1.4)),
# legend.text = element_text(size = rel(1.3)),
# legend.title = element_text(size = rel(1.3)))
# p1 <- p1 + theme_counts
# p2 <- p2 + theme_counts
## ---- warning = F, message = F, echo = F, fig.width = 10, fig.height = 6.8----
theme_counts <- theme(axis.text = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1.3)),
strip.text = element_text(size = rel(1.4)),
plot.title = element_text(size = rel(1.4)),
legend.text = element_text(size = rel(1.3)),
legend.title = element_text(size = rel(1.3)))
p1 <- p1 + theme_counts
p2 <- p2 + theme_counts
grid.arrange(p1, p2, ncol = 1)
## -----------------------------------------------------------------------------
## 17/18
df_exposures1718 <- prepare_exp(df_exposures0 =
raw_df_exposures |> filter(season == "17/18"),
player = "player_name",
date = "year",
time_expo = "minutes_played") |>
mutate(seasonb = date2season(date))
injds1718p <- injds1718p |>
mutate(seasonb = "2017/2018") |>
## join to have info, such as position, age, citizenship etc.
left_join(df_exposures1718, by = c("player" = "player",
"seasonb" = "seasonb")) |>
## create positionb column
## (so that the categories are: Attack, Defender, Goalkeeper and Midfield)
mutate(positionb = factor(str_split_i(position, "_", 1)))
## -----------------------------------------------------------------------------
## quit Goalkeepers
injds1718p <- dplyr::filter(injds1718p, positionb != "Goalkeeper") |>
droplevels()
## -----------------------------------------------------------------------------
incidence_glm_pois <- glm(ninjuries ~ positionb, # + offset(log(totalexpo))
offset = log(totalexpo),
data = injds1718p,
family = poisson)
## ---- eval = F----------------------------------------------------------------
# # incidence_glm_pois2 <- glmmTMB(formula = ninjuries ~ foot,
# # offset = log(totalexpo),
# # family = poisson(), data = injds1718p)
# # summary(incidence_glm_pois2)
## -----------------------------------------------------------------------------
df_exposures <- prepare_exp(df_exposures0 = raw_df_exposures,
player = "player_name",
date = "year",
time_expo = "minutes_played") |>
mutate(seasonb = date2season(date))
injdsp <- injdsp |>
mutate(seasonb = if_else(season == "Season 17-18", "2017/2018", "2018/2019")) |>
## join to have info, such as position, age, citizenship etc.
left_join(df_exposures, by = c("player" = "player",
"seasonb" = "seasonb")) |>
## create positionb column
## (so that the categories are: Attack, Defender, Goalkeeper and Midfield)
mutate(positionb = factor(str_split_i(position, "_", 1))) |>
droplevels()
## ---- eval = F----------------------------------------------------------------
# incidence_glmm_pois <- glmer(formula = ninjuries ~ positionb + (1 | player),
# offset = log(totalexpo),
# data = injdsp,
# family = poisson)
# # incidence_glmm_pois2 <- glmmTMB::glmmTMB(formula = ninjuries ~ positionb + (1 | player),
# # offset = log(totalexpo),
# # data = injdsp,
# # family = poisson)
## -----------------------------------------------------------------------------
burden_glm_pois <- glm(ndayslost ~ positionb, offset = log(totalexpo), ## or ~ foot
data = injds1718p,
family = poisson)
## -----------------------------------------------------------------------------
summary(burden_glm_pois)
## -----------------------------------------------------------------------------
cbind(estimate = exp(coef(burden_glm_pois)) * c(90*100, 1, 1),
exp(confint(burden_glm_pois)) * c(90*100, 1, 1)) |> # (to report per 100 player-matches)
kable()
## ---- warning = F-------------------------------------------------------------
burden_glm_nb <- glm.nb(ndayslost ~ positionb + offset(log(totalexpo)),
data = injds1718p)
## -----------------------------------------------------------------------------
summary(burden_glm_nb)
## -----------------------------------------------------------------------------
burden_zinfpois <- zeroinfl(ndayslost ~ positionb | positionb,
offset = log(totalexpo),
data = injds1718p,
link = "logit",
dist = "poisson",
trace = FALSE, EM = FALSE)
## Or:
# burden_zinfpois <- glmmTMB::glmmTMB(formula = ndayslost ~ 1 + positionb,
# offset = log(totalexpo),
# ziformula = ~ 1 + positionb,
# data = injds1718p,
# family = poisson)
## -----------------------------------------------------------------------------
summary(burden_zinfpois)
## -----------------------------------------------------------------------------
burden_zinfnb <- zeroinfl(ndayslost ~ positionb | positionb,
offset = log(totalexpo),
data = injds1718p,
link = "logit",
dist = "negbin",
trace = FALSE, EM = FALSE)
## Or:
# burden_zinfnb <- glmmTMB::glmmTMB(ndayslost ~ 1 + positionb, offset = log(totalexpo),
# ziformula = ~ 1 + positionb,
# data = injds1718p,
# family = nbinom2)
## -----------------------------------------------------------------------------
summary(burden_zinfnb)
## -----------------------------------------------------------------------------
## pois
## predprob: for each subj, prob of observing each value
phat_pois <- predprob(burden_glm_pois)
phat_pois_mn <- apply(phat_pois, 2, mean)
## nb
phat_nb <- predprob(burden_glm_nb)
phat_nb_mn <- apply(phat_nb, 2, mean)
## zinfpois
phat_zinfpois <- predprob(burden_zinfpois)
phat_zinfpois_mn <- apply(phat_zinfpois, 2, mean)
## zinfnb
phat_zinfnb <- predprob(burden_zinfnb)
phat_zinfnb_mn <- apply(phat_zinfnb, 2, mean)
## put in a data frame
idx <- seq(1, 62, length.out = 30)
df_probs <- data.frame(phat_pois_mn = phat_pois_mn[idx],
phat_nb_mn = phat_nb_mn[idx],
phat_zinfpois_mn = phat_zinfpois_mn[idx],
phat_zinfnb_mn = phat_zinfnb_mn[idx],
x= idx) |>
tidyr::gather(key = "prob_type", value = "value", -x) |>
mutate(prob_type = factor(prob_type))
## ---- fig.width = 9, fig.height = 4.8-----------------------------------------
ggplot(data = injds1718p) +
geom_histogram(aes(x = injburden/100, after_stat(density)),
breaks = seq(-0.5, 62, length.out = 30),
col = "black", alpha = 0.5) +
geom_point(data = df_probs, aes(x = x, y = value,
group = prob_type, col = prob_type)) +
geom_line(data = df_probs, aes(x = x, y = value,
group = prob_type, col = prob_type)) +
ylim(c(0, 0.3)) + xlab("Injury burden") + ylab("Density") +
scale_color_manual(name = "Model:",
labels = c("Negative Binomial", "Poisson",
"Zero-Inflated Negative Binomial",
"Zero-Inflated Poisson"),
values = c("darkblue", "chocolate", "purple", "red")) +
ggtitle("Histogram of injury burden in 2017/2018\nwith conditional Poisson, NB, ZIP and ZINB Densities") +
theme_counts +
theme(legend.position = c(0.7, 0.7))
## ---- echo = F, eval = F------------------------------------------------------
# ## using base R
# with(injds1718p, {
# hist(ndayslost, prob = TRUE, breaks = seq(-0.5, 316.5, length.out = 30),
# xlab = "Injury burden (number of injuries per player-season)",
# main = "Histogram of overall injury burden\nwith conditional Poisson, NB, ZIP and ZINB Densities")
# lines(x = idx, y = phat_pois_mn[idx], type = "b", lwd = 2, col = "black")
# lines(x = idx, y = phat_nb_mn[idx], type = "b", lwd = 2, col = "purple")
# lines(x = idx, y = phat_zinfpois_mn[idx], type = "b", lwd = 2, col = "red")
# lines(x = idx, y = phat_zinfnb_mn[idx], type = "b", lwd = 2, col = "blue")
# })
# legend(250, 0.026, c("Poisson", "NB", "ZIP","ZINB"), lty = 1,
# col = c("black", "purple", "red","blue"), lwd = 2)
#
## -----------------------------------------------------------------------------
models <- list("Poisson model" = burden_glm_pois,
"Negative binomial model" = burden_glm_nb,
"Zero-inflated Poisson model" = burden_zinfpois,
"Zero-inflated Negative Binomial model" = burden_zinfnb)
res_gof <- lapply(models, function(model) {
aic <- AIC(model)
bic <- BIC(model)
if (class(model)[[1]] == "zeroinfl") {
deviance <- -2*logLik(model)[[1]]
null_model <- update(model, .~ -positionb)
null_deviance <- -2*logLik(null_model)[[1]]
} else {
deviance <- model$deviance
null_deviance <- model$null.deviance
}
dev_expl <- (null_deviance - deviance)/null_deviance * 100
return(res = data.frame(aic = aic, bic = bic, dev_expl = dev_expl))
})
## -----------------------------------------------------------------------------
res_gof |>
bind_rows(.id = "model") |>
## arrange them according to dev_expl.
arrange(desc(dev_expl)) |>
knitr::kable(digits = 2,
col.names = c("Model", "AIC", "BIC", "Deviance Explained"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.