knitr::opts_chunk$set(echo = TRUE, warning = FALSE, error = FALSE, message = FALSE, include = TRUE, fig.width = 11, fig.height = 5)
if(!require("rprojroot")) install.packages("rprojroot", dependencies = TRUE) library(rprojroot) root <- rprojroot::find_root_file(criterion = is_rstudio_project) root
# install.packages("gdata") # install.packages("stargazer") # install.packages("arm") # library(arm) # library(stargazer) # library(gdata) # library(RCurl) # install.packages("svglite") library(tidyverse) library(purrr) library(readxl) library(readr) library(svglite) library(ggplot2) library(gridExtra) library(grid) library(car) library(pastecs) library(psych) # library(Rcmdr)
library(citrulliner) data <- citrulliner::citrulline_data
head(data) levels(data$subject) levels(data$protocol) levels(data$time) levels(data$analyte %>% as.factor())
names(data) data <- data %>% mutate(concentration_log10 = log10(concentration)) %>% dplyr::select(subject, protocol, time, analyte, concentration_log10, concentration, duplicated ) data
## split for analytes data_by_analyte <- data %>% # as_tibble() %>% # gather(exam:numeracy, key = "measure", value = "value") %>% group_by(analyte, time, protocol) %>% nest() %>% print() data_by_analyte$data[[1]]
mutate()
data_by_analyte <- data_by_analyte %>% mutate(descriptives = map(data, stat.desc, basic = TRUE, norm = TRUE)) %>% print() data_by_analyte$descriptives[1]
get_shapiro_wilk <- function(df){ p_value <- df[20 , c(2,3)] return(p_value) } ## test function df <- data_by_analyte$descriptives[[1]] #df #pluck(df, normtest.p) get_shapiro_wilk(df = df)
data_by_analyte <- data_by_analyte %>% mutate(shap_wilk = map(descriptives, get_shapiro_wilk)) descriptives <- unnest(data_by_analyte, shap_wilk) descriptives[ , c("uni", "measure", "shap_wilk")]
descriptives[ , c("uni", "measure", "shap_wilk")] %>% ggplot(aes(x = measure, y = shap_wilk)) + geom_point(aes(colour = uni), size = 2) + geom_hline(yintercept= 0.05, linetype="dotted", size = 1.5)
library(foreign) #to load stata dataset library(lme4) #mlm functions # library(arm) #to calculate se's on mlm library(plyr) #to reshape data library(lmerTest) #to include p value in the mixed model summary library(lattice) #required for dotplot function
The protocols (protocol
) are nested within the subjects (each subject was exposed to every protocol). The sample times (time
) are nested within each protocol (during each protocol all sample times were used to get a biological sample). Each sample was measured for a number of biomarkers. For the full list see below:
`r levels(separate_grinta$analyte)
concentration
- Randomprotocol
, time
- FixedWe can assume that between subject variation can vary in a random fashion, so subject
can be considered as an additional random variable.
'lmer' function uses a particular syntax to specify the 'random' component of the model. It takes the following structure:
dependent variable ~ fixed effect variable + (random effect variable | grouping variable for random intercept)
In the following formula fixed effect is substituted with 1, as we're fitting a null model, i.e. there is no fixed effect. Similarly, at this stage we are only estimating random intercepts and hence we substitute random effect with 1 as well.
By default 'lmer' function selects estimates to optimise Restricted Maximum Likelihood (REML), we set this option to false in order to optimise the log-likelihood criterion.
## null model definition create_lme_null <- function(df){ model <- lmer(concentration_log10 ~ 1 + (1 | protocol), data = df, REML = FALSE) # In the following summary Random effects are separated from Fixed effects. Within the Random effects # summary Intercept Std.Dev corresponds to level 2 standard deviation and Residual Std.Dev # corresponds to subject-level standard deviation within each group. #result <- list(summary, model) return(model) }
nested <- separate_grinta %>% dplyr::select(subject, protocol, time, analyte, concentration_log10) %>% group_by(analyte) %>% nest() head(nested) ## to access the tibble and the individual tibbles in colomn 2 we can use the single [] followed by the double [[]] nested[1,2][[1]]
models <- nested %>% mutate( null_model = map(data, create_lme_null) ) head(models) models[1,3][[1]]
separate_grinta %>% # filter(analyte == "ifabp") %>% ggplot() + geom_point(aes(x=time, y = concentration_log10, colour = protocol, group = protocol), position = position_dodge(width = 0.5)) + geom_smooth(aes(x=time, y = concentration_log10, group = protocol, colour = protocol), method = "lm", se = FALSE) + facet_wrap(~ analyte)
##plot(models$mod) library(broom) models_unnest <- models %>% mutate( glance = map(null_model, broom::glance), tidy = map(null_model, broom::tidy), augment = map(null_model, broom::augment) ) ## str(head(models[1,5])) %>% head(models_unnest) tidy <- models_unnest %>% dplyr::select(tidy, analyte) %>% unnest() augment <- models_unnest %>% dplyr::select(augment, analyte) %>% unnest() glance <- models_unnest %>% dplyr::select(glance, analyte) %>% unnest() ## spread(analyte)
tidy %>% # filter(term == "sd_(Intercept).protocol") %>% ggplot(aes(x = analyte, y = estimate)) + geom_point() + facet_wrap(~ group)
To build a full model we gradually add terms to the existing null model: To recap the experimental design:
The protocols (protocol
) are nested within the subjects (each subject was exposed to every protocol). The sample times (time
) are nested within each protocol (during each protocol all sample times were used to get a biological sample). Each sample was measured for a number of biomarkers. For the full list see below:
`r levels(separate_grinta$analyte)
concentration
- Randomprotocol
, time
- FixedWe can assume that between subject variation can vary in a random fashion, so subject
can be considered as an additional random variable.
We can describe the full model as
library(nlme) create_full_model <- function(df){ #model <- lmer(concentration_log10 ~ protocol * time + (1 | subject) , data = df, REML = FALSE) model <- nlme::lme(concentration_log10 ~ protocol * time, data = df, method = "ML", na.action = "na.omit", random = ~ 1 | subject/protocol) # In the following summary Random effects are separated from Fixed effects. Within the Random effects # summary Intercept Std.Dev corresponds to level 2 standard deviation and Residual Std.Dev # corresponds to subject-level standard deviation within each group. #summary <- summary(model) #result <- list(summary, model) return(model) } x <- create_full_model(split_grinta[[1]]) broom::tidy(x) %>% as_tibble() anova(x) models <- nested %>% mutate( null_model = map(data, create_lme_null), anova_null = map(null_model, anova) ) %>% mutate( full_model = map(data, create_full_model), anova_full = map(full_model, anova) ) head(models) models_unnest <- models %>% mutate( glance_null = map(null_model, broom::glance), tidy_null = map(null_model, broom::tidy), augment_null = map(null_model, broom::augment), glance_full = map(full_model, broom::glance), tidy_full = map(full_model, broom::tidy), augment_full = map(full_model, broom::augment), glance_anova_full = map(anova_full, broom::glance) ) ## str(head(models[1,5])) %>% head(models_unnest[1,5][[1]]) glance_anova <- models_unnest %>% dplyr::select(glance_anova_full, analyte) %>% unnest()
Dag Marc
Dit deed jij:
MANOVA <- aov(ResultAssay ~ Time * Protocol + Error(VolunteerID/(Protocol*Time)), data = DF)
Kun je dit eens proberen?
MANOVA <- aov(ResultAssay ~ Time * Protocol + Error(VolunteerID/Protocol/Time), data = DF)
Groeten
Eric
Video : https://www.youtube.com/watch?v=nPdrWq_Sb-U by Erin Buchanan
library(pastecs) library(lme4) library(nlme) library(tidyverse) library(purrr)
Testing a general linear model (gls) vs a mixed effects model (lme)
## function to obtain intercepts for concentration, fixed intercepts model glsModel <- function(DF){ glsModels <- gls(concentration ~ 1, data = DF, method = "ML", na.action = "na.omit") summary(glsModels) }
lmeModel <- function(DF){ lmeModels <- lme(concentration ~ 1, data = DF, method = "ML", na.action = "na.omit", random = ~1|protocol) summary(lmeModels) }
Apply both of both (GLS and LME) models to the GRINTA! data
## test function glsModel(split_grinta[[1]]) ## apply model to all data GLS_MODEL_SUMMARY <- lapply(split_grinta, glsModel) GLS_MODEL_SUMMARY[[1]] for(i in 1: length(GLS_MODEL_SUMMARY)){ GLS_list <- list() GLS_list <- GLS_MODEL_SUMMARY[[i]]$tTable print(GLS_list) } GLS_df <- GLS_list %>% purrr::transpose() %>% dplyr::bind_rows()
LME Model Summary
LME_MODEL_SUMMARY <- lapply(split_grinta, lmeModel) for(i in 1: length(LME_MODEL_SUMMARY)){ LME_LIST <- LME_MODEL_SUMMARY[[i]]$tTable print(LME_LIST) } LME_MODEL_SUMMARY[[1]]
ANOVA_onModels <- function(DF){ glsModels <- gls(concentration ~ 1, data = DF, method = "ML", na.action = "na.omit") lmeModels <- lme(concentration ~ 1, data = DF, method = "ML", na.action = "na.omit", random = ~1|protocol) anova(glsModels,lmeModels) } ANOVA_Summary <- lapply(split_grinta, ANOVA_onModels) ANOVA_Summary[[6]] # str(ANOVA_Summary$Alanine$`p-value`[2]) for(i in 1: length(ANOVA_Summary)){ ANOVA_list <- list() length(ANOVA_list) <- levels(separate_grinta$analyte) %>% length() names(ANOVA_list) <- levels(separate_grinta$analyte) ANOVA_list[[i]] <- ANOVA_Summary[[i]]$`p-value`[2] } ANOVA_list[[1]] anova_df <- dplyr::bind_rows(ANOVA_list)
if the result is significant the model with lowest BIC and AIC is the best
The design is repeated measures
Now we know that we need to perform an LME
lmeModel_protocol <- function(DF) { lmeModels <- lme(concentration ~ protocol, data = DF, method = "ML", na.action = "na.omit", random = ~1|subject) summary(lmeModels) } LME_MODEL_Protocol_SUMMARY <- lapply(split_tidy, lmeModel_protocol) LME_MODEL_Protocol_SUMMARY #for(i in 1: length(LME_MODEL_Protocol_SUMMARY)){{ # LME_LIST[i] <- as.data.frame(LME_MODEL_Protocol_SUMMARY[[i]]$tTable[c(2:5), 5]) #} # LME_LIST #} #LME_LIST <- as.data.frame(LME_LIST)
from Field et al., 2012 R Code for Chapter 14 from the book: Field, A. P., Miles, J. N. V., & Field, Z. C. (2012). Discovering Statistics Using R: and Sex and Drugs and Rock 'N' Roll. London Sage (c) 2011 Andy P. Field, Jeremy N. V. Miles & Zoe C. Field
p1_vs_p2 <- c(0, 1, 0, 0) p1_vs_p4 <- c(0, 0, 1, 0) p1_vs_p6 <- c(0, 0, 0, 1) t0_vs_t1 <- c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0) t0_vs_t2 <- c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0) t0_vs_t3 <- c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0) t0_vs_t6 <- c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0) t0_vs_t24 <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1) separate_tempo$protocol <- as.factor(separate_tempo$protocol) levels(separate_tempo$protocol) separate_tempo$time <- as.factor(separate_tempo$time) levels(separate_tempo$time) contrasts(separate_tempo$protocol) <- cbind(control_vs_p2, control_vs_p4, control_vs_p6 ) contrasts(separate_tempo$time) <- cbind(t0_vs_t1, t0_vs_t2, t0_vs_t3, t0_vs_t6, t0_vs_t24) contrasts(tidy_tempo_amino_acids$protocol) contrasts(tidy_tempo_amino_acids$time)
multilevel_mixed <- function(DF){ baseline_model <- nlme::lme(concentration ~ 1, random = ~1|subject/protocol/time, data = DF, method = "ML") protocol_model <- update(baseline_model, .~. + protocol) time_model <- update(protocol_model, .~. + time) interaction_protocol_time_model <- update(time_model, .~. + protocol:time) anova <- anova(baseline_model, protocol_model, time_model, interaction_protocol_time_model) anova_df <- as_tibble(anova) anova_df$analyte <- DF$analyte[1] anova_df$model_description <- row.names(anova_df) anova_df$`p-value` <- round(anova_df$`p-value`, digits = 5) return(anova_df) } multilevel_mixed_summary <- function(DF){ baseline_model <- nlme::lme(concentration ~ 1, random = ~1|subject/protocol/time, data = DF, method = "ML") protocol_model <- update(baseline_model, .~. + protocol) time_model <- update(protocol_model, .~. + time) interaction_protocol_time_model <- update(time_model, .~. + protocol:time) summary <- summary(interaction_protocol_time_model) return(summary) }
names(tidy_tempo_amino_acids) tidy_tempo_amino_acids_saliva <- tidy_tempo_amino_acids %>% filter(matrix == "speeksel") %>% select(subject, protocol, time, analyte, concentration) %>% na.omit() tidy_tempo_amino_acids_serum <- tidy_tempo_amino_acids %>% filter(matrix == "serum") %>% select(subject, protocol, time, analyte, concentration) %>% na.omit() # spliting assays in a list split_saliva <- split(tidy_tempo_amino_acids_saliva, tidy_tempo_amino_acids_saliva$analyte) split_serum <- split(tidy_tempo_amino_acids_serum, tidy_tempo_amino_acids_serum$analyte)
#library(lme) #DF = ###### probability = 0.05 #summary <- summary_models(DF = DF) #summary$tTable #multilevel_test <- multilevel_mixed(DF = DF) #multilevel_test_summary <- multilevel_mixed_summary(DF = DF) #multilevel_test_summary multilevel_saline <- lapply(split_diagrams, purrr::safely(multilevel_mixed)) multilevel_serum <- lapply(split_serum, purrr::safely(multilevel_mixed)) results_nutricia_saline <- purrr::transpose(multilevel_saline) results_nutricia_serum <- purrr::transpose(multilevel_serum) all_results_nutricia <- dplyr::bind_rows(results_nutricia_saline$result, results_nutricia_serum$result) all_results_nutricia
get_contrasts <- function(DF, probability){ model <- multilevel_mixed(DF) summary <- multilevel_mixed_summary(DF) contrasts <- row.names(summary$tTable) index_contrasts <- c(1:length(contrasts)) model_params <- summary$call analyte <- as.character(summary$data[1,"analyte"]) results <- summary$tTable %>% as_tibble() %>% # dplyr::bind_cols(., index_contrasts) %>% mutate(contrasts = contrasts, analyte = analyte) %>% # filter(`p-value` < probability) %>% # select(contrasts, analyte, `p-value`) %>% arrange(`p-value`) return(results) }
(all_contrasts_saliva <- lapply(split_saliva, get_contrasts, probability = 0.05) %>% dplyr::bind_rows()) (all_contrasts_serum <- lapply(split_serum, get_contrasts, probability = 0.05) %>% dplyr::bind_rows()) (all_contrasts_nutricia <- dplyr::bind_rows(all_contrasts_saliva, all_contrasts_serum))
Filter for:
load(paste0(root, "/data/grinta_all_noNA.Rda")) head(new_grinta_all) new_grinta_all_serum <- new_grinta_all %>% filter(matrix == "serum") %>% select(subject, protocol, time, analyte, concentration) new_grinta_all_plasma <- new_grinta_all %>% filter(matrix == "plasma") %>% select(subject, protocol, time, analyte, concentration)
############## serum grinta_all_serum_split <- split(new_grinta_all_serum, droplevels(new_grinta_all_serum$analyte)) grinta_all_serum_split[[1]] all_results_grinta_serum <- lapply(grinta_all_serum_split, purrr::safely(multilevel_mixed)) all_results_grinta_serum_transposed <- purrr::transpose(all_results_grinta_serum) all_results_grinta_serum_transposed$result is_null <- all_results_grinta_serum_transposed$result == "NULL" p_values_grinta_serum <- all_results_grinta_serum_transposed$result[!is_null] %>% dplyr::bind_rows() ############# plasma grinta_all_plasma_split <- split(new_grinta_all_plasma, droplevels(new_grinta_all_plasma$analyte)) grinta_all_plasma_split[[2]] all_results_grinta_plasma <- lapply(grinta_all_plasma_split, purrr::safely(multilevel_mixed)) all_results_grinta_plasma_transposed <- purrr::transpose(all_results_grinta_plasma) all_results_grinta_plasma_transposed$result is_null <- all_results_grinta_plasma_transposed$result == "NULL" p_values_grinta_plasma <- all_results_grinta_plasma_transposed$result[!is_null] %>% dplyr::bind_rows() p_all_grinta <- dplyr::bind_rows(p_values_grinta_plasma, p_values_grinta_serum)
new_grinta_all$concentration_scaled <- as.numeric(scale(new_grinta_all$concentration)) new_grinta_all new_grinta_all %>% mutate(analyte_matrix = paste(analyte, matrix, sep = "_")) %>% group_by(protocol) %>% summarise(mean_conc = mean(concentration_scaled)) %>% ggplot(aes(x = protocol, y = mean_conc)) + geom_point() + geom_bar(stat = "identity")
new_grinta_all %>% mutate(analyte_matrix = paste(analyte, matrix, sep = "_")) %>% group_by(time) %>% summarise(mean_conc = mean(concentration_scaled)) %>% ggplot(aes(x = time, y = mean_conc)) + geom_point() + geom_bar(stat = "identity")
new_grinta_all %>% mutate(analyte_matrix = paste(analyte, matrix, sep = "_")) %>% group_by(subject) %>% summarise(mean_conc = mean(concentration_scaled)) %>% ggplot(aes(x = subject, y = mean_conc)) + geom_point() + geom_bar(stat = "identity")
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
You can also embed plots, for example:
plot(pressure)
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.