Nothing
## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.align='center',
dpi = 92,
fig.retina = 2,
eval = requireNamespace("lmerTest")
)
options(tibble.print_min = 4L, tibble.print_max = 4L)
## ----warning=FALSE,message=FALSE----------------------------------------------
# Attach some packages
library(groupdata2)
library(dplyr)
library(knitr) # kable()
library(lmerTest) #lmer()
# Create data frame
df <- data.frame("participant" = factor(as.integer(
rep(c('1','2', '3', '4', '5',
'6', '7', '8', '9', '10'), 3))),
"age" = rep(c(20,23,27,21,32,31,43,21,34,32), 3),
"diagnosis" = factor(rep(c('a', 'b', 'a', 'b', 'b',
'a', 'a', 'a', 'b', 'b'), 3)),
"score" = c(10,24,15,35,24,14,11,16,33,29, # for 1st session
24,40,30,50,54,25,35,32,53,55, # for 2nd session
45,67,40,78,62,30,41,44,66,81)) # for 3rd session
# Order by participant
df <- df %>% arrange(participant)
# Remove index
rownames(df) <- NULL
# Add session info
df$session <- as.integer(rep(c('1','2', '3'), 10))
# Show the data frame
kable(df, align = 'c')
## -----------------------------------------------------------------------------
xpectr::set_test_seed(1) # For reproducibility
# Split data in 20/80 (percentage)
partition(df, p = 0.2, id_col = "participant") %>%
.[1] %>% # See only the test set
kable() # Pretty tables :)
## -----------------------------------------------------------------------------
xpectr::set_test_seed(1) # For reproducibility
# Split data in 20/80 (percentage)
parts <- partition(df, p = 0.2, id_col = "participant", cat_col = 'diagnosis')
test_set <- parts[[1]]
train_set <- parts[[2]]
# Show test_set
test_set %>% kable()
## -----------------------------------------------------------------------------
train_set %>%
count(diagnosis) %>%
kable(align = 'c')
## -----------------------------------------------------------------------------
train_set %>%
count(participant) %>%
kable(align = 'c')
## -----------------------------------------------------------------------------
xpectr::set_test_seed(1) # For reproducibility
train_set <- fold(train_set, k = 4, cat_col = 'diagnosis', id_col = 'participant')
# Order by .folds
train_set <- train_set %>% arrange(.folds)
train_set %>% kable()
## -----------------------------------------------------------------------------
train_set %>%
count(participant, diagnosis) %>%
kable(align = 'c')
## -----------------------------------------------------------------------------
# RMSE metric
rmse <- function(predictions, targets){
sqrt(mean((predictions - targets)^2))
}
crossvalidate <- function(data, k, model, dependent, random = FALSE){
# 'data' is the training set with the ".folds" column
# 'k' is the number of folds we have
# 'model' is a string describing a linear regression model formula
# 'dependent' is a string with the name of the score column we want to predict
# 'random' is a logical (TRUE/FALSE); do we have random effects in the model?
# Initialize empty list for recording performances
performances <- c()
# One iteration per fold
for (fold in 1:k){
# Create training set for this iteration
# Subset all the datapoints where .folds does not match the current fold
training_set <- data[data$.folds != fold,]
# Create test set for this iteration
# Subset all the datapoints where .folds matches the current fold
testing_set <- data[data$.folds == fold,]
## Train model
# If there is a random effect,
# use lmer() to train model
# else use lm()
if (isTRUE(random)){
# Train linear mixed effects model on training set
model <- lmer(model, training_set, REML = FALSE)
} else {
# Train linear model on training set
model <- lm(model, training_set)
}
## Test model
# Predict the dependent variable in the testing_set with the trained model
predicted <- predict(model, testing_set, allow.new.levels = TRUE)
# Get the Root Mean Square Error between the predicted and the observed
RMSE <- rmse(predicted, testing_set[[dependent]])
# Add the RMSE to the performance list
performances[fold] <- RMSE
}
# Return the mean of the recorded RMSEs
return(c('RMSE' = mean(performances)))
}
## ----eval=requireNamespace("broom")-------------------------------------------
lm(score ~ diagnosis, df) %>%
broom::tidy()
## -----------------------------------------------------------------------------
m0 <- 'score ~ 1 + (1 | participant)'
m1 <- 'score ~ diagnosis + (1 | participant)'
m2 <- 'score ~ diagnosis + age + (1 | participant)'
m3 <- 'score ~ diagnosis + session + (1 | participant)'
m4 <- 'score ~ diagnosis * session + (1 | participant)'
m5 <- 'score ~ diagnosis * session + age + (1 | participant)'
## ----message=FALSE------------------------------------------------------------
m0
crossvalidate(train_set, k = 4, model = m0, dependent = 'score', random = TRUE)
m1
crossvalidate(train_set, k = 4, model = m1, dependent = 'score', random = TRUE)
m2
crossvalidate(train_set, k = 4, model = m2, dependent = 'score', random = TRUE)
m3
crossvalidate(train_set, k = 4, model = m3, dependent = 'score', random = TRUE)
m4
crossvalidate(train_set, k = 4, model = m4, dependent = 'score', random = TRUE)
m5
crossvalidate(train_set, k = 4, model = m5, dependent = 'score', random = TRUE)
## -----------------------------------------------------------------------------
# Training the model on the full training set
model_m4 <- lmer(m4, train_set, REML = FALSE)
# Predict the dependent variable in the test set with the trained model
predicted <- predict(model_m4, test_set, allow.new.levels = TRUE)
# Get the Root Mean Square Error between the predicted and the observed
RMSE <- rmse(predicted, test_set[['score']])
RMSE
## -----------------------------------------------------------------------------
model_m4 %>%
summary()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.