knitr::opts_chunk$set(echo = TRUE, 
                      warning = FALSE,
                      error = FALSE,
                      message = FALSE,
                      include = TRUE,
                      fig.width = 11,
                      fig.height = 5)
if(!require("rprojroot")) install.packages("rprojroot", dependencies = TRUE)
library(rprojroot)
root <- rprojroot::find_root_file(criterion = is_rstudio_project)
root
# install.packages("gdata")
# install.packages("stargazer")
# install.packages("arm")
# library(arm)
# library(stargazer)
# library(gdata)
# library(RCurl)
# install.packages("svglite")
library(tidyverse)
library(purrr)
library(readxl)
library(readr)
library(svglite)
library(ggplot2)
library(gridExtra)
library(grid)
library(car)
library(pastecs)
library(psych)
# library(Rcmdr)

Data

Data load

library(citrulliner)
data <- citrulliner::citrulline_data

Inpect data

head(data)
levels(data$subject)
levels(data$protocol)
levels(data$time)
levels(data$analyte %>% as.factor())

Select and rename Variables

names(data)
data <- data %>%
  mutate(concentration_log10 = log10(concentration)) %>%
  dplyr::select(subject,
         protocol,
         time,
         analyte,
         concentration_log10,
         concentration,
         duplicated
         )
data

List columns for each analyte

## split for analytes
data_by_analyte <- data %>%
#  as_tibble() %>%
#  gather(exam:numeracy, key = "measure", value = "value") %>%
  group_by(analyte, time, protocol) %>%
  nest() %>% print()

data_by_analyte$data[[1]]

Exploring assumptions

Adding a column with descriptive statistics with mutate()

data_by_analyte <- data_by_analyte %>%
  mutate(descriptives = map(data, 
                            stat.desc, 
                            basic = TRUE, 
                            norm = TRUE)) %>%
  print()
data_by_analyte$descriptives[1]

Getting the Shapiro-Wilk results: a function

get_shapiro_wilk <- function(df){
  p_value <- df[20 , c(2,3)]
  return(p_value)
}

## test function
df <- data_by_analyte$descriptives[[1]] 
#df
#pluck(df, normtest.p)

get_shapiro_wilk(df = df)

Apply function to nested table and unnest

data_by_analyte <- data_by_analyte %>%
  mutate(shap_wilk = map(descriptives, get_shapiro_wilk))

descriptives <- unnest(data_by_analyte, shap_wilk)

descriptives[ , c("uni", "measure", "shap_wilk")]

Plot results Shapiro-Wilk test

descriptives[ , c("uni", "measure", "shap_wilk")] %>%
  ggplot(aes(x = measure, y = shap_wilk)) +
  geom_point(aes(colour = uni), size = 2) +
  geom_hline(yintercept= 0.05, linetype="dotted", size = 1.5)

STATISTICAL ANALYSIS with Multilevel Models: after the book by Robson and Pevalin, 2016, "Multilevel Modeling in Plain Language"

library(foreign)    #to load stata dataset
library(lme4)       #mlm functions
# library(arm)      #to calculate se's on mlm
library(plyr)       #to reshape data
library(lmerTest)   #to include p value in the mixed model summary
library(lattice)    #required for dotplot function

The experimental design:

The protocols (protocol) are nested within the subjects (each subject was exposed to every protocol). The sample times (time) are nested within each protocol (during each protocol all sample times were used to get a biological sample). Each sample was measured for a number of biomarkers. For the full list see below:

`r levels(separate_grinta$analyte)

The variables in the diagrams data:

We can assume that between subject variation can vary in a random fashion, so subject can be considered as an additional random variable.

Models

Fit a linear model

Fit a null model with random intercepts.

'lmer' function uses a particular syntax to specify the 'random' component of the model. It takes the following structure:

dependent variable ~ fixed effect variable + (random effect variable | grouping variable for random intercept)

In the following formula fixed effect is substituted with 1, as we're fitting a null model, i.e. there is no fixed effect. Similarly, at this stage we are only estimating random intercepts and hence we substitute random effect with 1 as well.

By default 'lmer' function selects estimates to optimise Restricted Maximum Likelihood (REML), we set this option to false in order to optimise the log-likelihood criterion.

## null model definition
create_lme_null <- function(df){

model <- lmer(concentration_log10 ~ 1 + (1 | protocol), data = df, REML = FALSE)

# In the following summary Random effects are separated from Fixed effects. Within the Random effects 
# summary Intercept Std.Dev corresponds to level 2 standard deviation and Residual Std.Dev 
# corresponds to subject-level standard deviation within each group. 

#result <- list(summary, model)


return(model)

}

Create a nested table

nested <- separate_grinta %>% dplyr::select(subject, protocol, 
                                    time, analyte, 
                                    concentration_log10) %>%
  group_by(analyte) %>% nest()

head(nested)

## to access the tibble and the individual tibbles in colomn 2 we can use the single [] followed by the double [[]]
nested[1,2][[1]]

Applying null model to each row

models <- nested %>% 
  mutate(
    null_model = map(data, create_lme_null)
      )

head(models)


models[1,3][[1]]

Plotting the assumed model

separate_grinta %>% 
 # filter(analyte == "ifabp") %>%
  ggplot() +
  geom_point(aes(x=time, y = concentration_log10, colour = protocol, group = protocol),
             position = position_dodge(width = 0.5)) + 

 geom_smooth(aes(x=time, y = concentration_log10, group = protocol,
                 colour = protocol), method = "lm", se = FALSE) +
  facet_wrap(~ analyte)
##plot(models$mod)
library(broom)

models_unnest <- models %>% 
  mutate(

    glance = map(null_model, broom::glance),
    tidy = map(null_model, broom::tidy),
    augment = map(null_model, broom::augment)

  )

##  str(head(models[1,5])) %>%

head(models_unnest)


tidy <- models_unnest %>%
dplyr::select(tidy, analyte) %>% 
  unnest()  


augment <- models_unnest %>%
dplyr::select(augment, analyte) %>% 
  unnest()  


glance <- models_unnest %>%
dplyr::select(glance, analyte) %>% 
  unnest()  


##  spread(analyte)

Plot intercepts for each analyte

tidy %>% 
#  filter(term == "sd_(Intercept).protocol") %>%
  ggplot(aes(x = analyte, y = estimate)) +
  geom_point() +
  facet_wrap(~ group) 

Adding aditional terms to the model

To build a full model we gradually add terms to the existing null model: To recap the experimental design:

The protocols (protocol) are nested within the subjects (each subject was exposed to every protocol). The sample times (time) are nested within each protocol (during each protocol all sample times were used to get a biological sample). Each sample was measured for a number of biomarkers. For the full list see below:

`r levels(separate_grinta$analyte)

The variables in the diagrams data:

We can assume that between subject variation can vary in a random fashion, so subject can be considered as an additional random variable.

We can describe the full model as

library(nlme)

create_full_model <- function(df){

#model <- lmer(concentration_log10 ~ protocol * time + (1 | subject) , data = df, REML = FALSE)

model <-  nlme::lme(concentration_log10 ~ protocol * time, data = df, method = "ML", 
                    na.action = "na.omit", random = ~ 1 | subject/protocol) 



# In the following summary Random effects are separated from Fixed effects. Within the Random effects 
# summary Intercept Std.Dev corresponds to level 2 standard deviation and Residual Std.Dev 
# corresponds to subject-level standard deviation within each group. 

#summary <- summary(model)
#result <- list(summary, model)

return(model)

}


x <- create_full_model(split_grinta[[1]])

broom::tidy(x) %>% as_tibble()
anova(x)

models <- nested %>% 
  mutate(
    null_model = map(data, create_lme_null),
    anova_null = map(null_model, anova)
    ) %>%

    mutate(    
    full_model = map(data, create_full_model),
    anova_full = map(full_model, anova)

  )

head(models)



models_unnest <- models %>% 
  mutate(

    glance_null = map(null_model, broom::glance),
    tidy_null = map(null_model, broom::tidy),
  augment_null = map(null_model, broom::augment),
  glance_full = map(full_model, broom::glance),
  tidy_full = map(full_model, broom::tidy),
  augment_full = map(full_model, broom::augment),
  glance_anova_full = map(anova_full, broom::glance)

  )

##  str(head(models[1,5])) %>%

head(models_unnest[1,5][[1]])


glance_anova <- models_unnest %>%
dplyr::select(glance_anova_full, analyte) %>% 
  unnest() 

Dag Marc

Dit deed jij:

MANOVA <- aov(ResultAssay ~ Time * Protocol + Error(VolunteerID/(Protocol*Time)), data = DF)

Kun je dit eens proberen?

MANOVA <- aov(ResultAssay ~ Time * Protocol + Error(VolunteerID/Protocol/Time), data = DF)

Groeten

Eric

Linear mixed effects model (LME Models)

Video : https://www.youtube.com/watch?v=nPdrWq_Sb-U by Erin Buchanan

Packages

library(pastecs)
library(lme4)
library(nlme)
library(tidyverse)
library(purrr)

Model definition

Testing a general linear model (gls) vs a mixed effects model (lme)

GLS model definition

## function to obtain intercepts for concentration, fixed intercepts model
glsModel <- function(DF){

  glsModels <-  gls(concentration ~ 1, data = DF, method = "ML", 
                    na.action = "na.omit") 
  summary(glsModels)

}

Linear Mixed Effects Model (LME) definition, random intercepts per protocol

lmeModel <- function(DF){

  lmeModels <-  lme(concentration ~ 1, data = DF, method = "ML", 
                    na.action = "na.omit", random = ~1|protocol) 
  summary(lmeModels)

}

GRINTA! - analysis

Apply both of both (GLS and LME) models to the GRINTA! data

GLS Model Summary

## test function 
glsModel(split_grinta[[1]])

## apply model to all data
GLS_MODEL_SUMMARY <- lapply(split_grinta, glsModel)

GLS_MODEL_SUMMARY[[1]]

for(i in 1: length(GLS_MODEL_SUMMARY)){
GLS_list <- list()
GLS_list <- GLS_MODEL_SUMMARY[[i]]$tTable
  print(GLS_list)

}

GLS_df <- GLS_list %>% purrr::transpose() %>% dplyr::bind_rows()

LME Model Summary

LME_MODEL_SUMMARY <- lapply(split_grinta, lmeModel)

for(i in 1: length(LME_MODEL_SUMMARY)){
  LME_LIST <- LME_MODEL_SUMMARY[[i]]$tTable
  print(LME_LIST)

}
LME_MODEL_SUMMARY[[1]]

Comparing the gls and lme with ANOVA - TEMPO

ANOVA_onModels <- function(DF){

  glsModels <-  gls(concentration ~ 1, data = DF, method = "ML", 
                    na.action = "na.omit") 


  lmeModels <-  lme(concentration ~ 1, data = DF, method = "ML", 
                    na.action = "na.omit", random = ~1|protocol) 


  anova(glsModels,lmeModels)



}

ANOVA_Summary <- lapply(split_grinta, ANOVA_onModels) 

ANOVA_Summary[[6]]

# str(ANOVA_Summary$Alanine$`p-value`[2])


for(i in 1: length(ANOVA_Summary)){
 ANOVA_list <- list()
 length(ANOVA_list) <- levels(separate_grinta$analyte) %>% length()

 names(ANOVA_list) <- levels(separate_grinta$analyte)
     ANOVA_list[[i]] <- ANOVA_Summary[[i]]$`p-value`[2]
}

ANOVA_list[[1]]

anova_df <- dplyr::bind_rows(ANOVA_list)

if the result is significant the model with lowest BIC and AIC is the best

question: how about the hiarchy of the Grinta data set:

The design is repeated measures

Now we know that we need to perform an LME

Add protocol and time to the model

lmeModel_protocol <- function(DF)
{

  lmeModels <-  lme(concentration ~ protocol, data = DF, method = "ML", 
                    na.action = "na.omit", 
                    random = ~1|subject) 
  summary(lmeModels)

}

LME_MODEL_Protocol_SUMMARY <- lapply(split_tidy, lmeModel_protocol)

LME_MODEL_Protocol_SUMMARY

#for(i in 1: length(LME_MODEL_Protocol_SUMMARY)){{
#  LME_LIST[i] <- as.data.frame(LME_MODEL_Protocol_SUMMARY[[i]]$tTable[c(2:5), 5])

#}
#  LME_LIST
#}

#LME_LIST <- as.data.frame(LME_LIST)

Alternative analysis:

Andy Field: chapter GLM-5

from Field et al., 2012 R Code for Chapter 14 from the book: Field, A. P., Miles, J. N. V., & Field, Z. C. (2012). Discovering Statistics Using R: and Sex and Drugs and Rock 'N' Roll. London Sage (c) 2011 Andy P. Field, Jeremy N. V. Miles & Zoe C. Field

TEMPO STATISTICS

Contrasts

p1_vs_p2 <- c(0, 1, 0, 0)
p1_vs_p4 <- c(0, 0, 1, 0)
p1_vs_p6 <- c(0, 0, 0, 1)

t0_vs_t1  <- c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0)
t0_vs_t2  <- c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0)
t0_vs_t3  <- c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0)
t0_vs_t6  <- c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0)
t0_vs_t24 <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1)


separate_tempo$protocol <- as.factor(separate_tempo$protocol)
levels(separate_tempo$protocol)

separate_tempo$time <- as.factor(separate_tempo$time)
levels(separate_tempo$time)

contrasts(separate_tempo$protocol) <- cbind(control_vs_p2,
                                                control_vs_p4,
                                                control_vs_p6
                                                )

contrasts(separate_tempo$time) <- cbind(t0_vs_t1,
                                            t0_vs_t2,
                                            t0_vs_t3,
                                            t0_vs_t6,
                                            t0_vs_t24)

contrasts(tidy_tempo_amino_acids$protocol)
contrasts(tidy_tempo_amino_acids$time)

Building the mixed model model in a function

multilevel_mixed <- function(DF){

  baseline_model <- nlme::lme(concentration ~ 1, random = ~1|subject/protocol/time, 
                        data = DF, method = "ML")
  protocol_model <- update(baseline_model, .~. + protocol)
  time_model <- update(protocol_model, .~. + time)
  interaction_protocol_time_model <- update(time_model, .~. + protocol:time)

  anova <- anova(baseline_model, 
                 protocol_model,
                 time_model,
                 interaction_protocol_time_model)

  anova_df <- as_tibble(anova)
  anova_df$analyte <- DF$analyte[1]
  anova_df$model_description <- row.names(anova_df)
  anova_df$`p-value` <- round(anova_df$`p-value`, digits = 5)  

  return(anova_df)

  }


multilevel_mixed_summary <- function(DF){

  baseline_model <- nlme::lme(concentration ~ 1, random = ~1|subject/protocol/time, 
                       data = DF, method = "ML")
  protocol_model <- update(baseline_model, .~. + protocol)
  time_model <- update(protocol_model, .~. + time)
  interaction_protocol_time_model <- update(time_model, .~. + protocol:time)

  summary <- summary(interaction_protocol_time_model)

  return(summary)

  }

Select data

names(tidy_tempo_amino_acids)
tidy_tempo_amino_acids_saliva <- tidy_tempo_amino_acids %>%
  filter(matrix == "speeksel") %>%
  select(subject, protocol, time, analyte, concentration) %>%
  na.omit()

tidy_tempo_amino_acids_serum <- tidy_tempo_amino_acids %>%
  filter(matrix == "serum") %>%
  select(subject, protocol, time, analyte, concentration) %>%
  na.omit()


# spliting assays in a list
split_saliva <- split(tidy_tempo_amino_acids_saliva, 
                      tidy_tempo_amino_acids_saliva$analyte)


split_serum <- split(tidy_tempo_amino_acids_serum, 
                      tidy_tempo_amino_acids_serum$analyte)

Apply model to data

#library(lme)

#DF = ######
probability = 0.05

#summary <- summary_models(DF = DF)
#summary$tTable
#multilevel_test <- multilevel_mixed(DF = DF)
#multilevel_test_summary <- multilevel_mixed_summary(DF = DF)
#multilevel_test_summary

multilevel_saline <- lapply(split_diagrams, purrr::safely(multilevel_mixed))
multilevel_serum <- lapply(split_serum, purrr::safely(multilevel_mixed))

results_nutricia_saline <- purrr::transpose(multilevel_saline)
results_nutricia_serum <-  purrr::transpose(multilevel_serum)

all_results_nutricia <- dplyr::bind_rows(results_nutricia_saline$result,
                                         results_nutricia_serum$result)

all_results_nutricia

Get p-values for the contrasts

  get_contrasts <- function(DF, probability){

  model <- multilevel_mixed(DF)
  summary <- multilevel_mixed_summary(DF)
  contrasts <- row.names(summary$tTable)

  index_contrasts <- c(1:length(contrasts))

  model_params <- summary$call
  analyte <- as.character(summary$data[1,"analyte"])


  results <- summary$tTable %>%
    as_tibble() %>% 
  # dplyr::bind_cols(., index_contrasts) %>%
    mutate(contrasts = contrasts,
           analyte = analyte) %>%
#  filter(`p-value` < probability) %>%
#    select(contrasts, analyte, `p-value`) %>%
    arrange(`p-value`) 


 return(results)

 }    

Apply contrast function to list of assays

(all_contrasts_saliva <- lapply(split_saliva, get_contrasts, probability = 0.05) %>%
  dplyr::bind_rows())

(all_contrasts_serum <- lapply(split_serum, get_contrasts, probability = 0.05) %>%
  dplyr::bind_rows())

(all_contrasts_nutricia <- dplyr::bind_rows(all_contrasts_saliva, all_contrasts_serum))

Reanlyze GRINTA! dataset

Read GRINTA dataset

Filter for:

load(paste0(root, "/data/grinta_all_noNA.Rda"))
head(new_grinta_all)
new_grinta_all_serum <- new_grinta_all %>%
  filter(matrix == "serum") %>%
  select(subject, protocol, time, analyte, concentration)

new_grinta_all_plasma <- new_grinta_all %>%
  filter(matrix == "plasma") %>%
  select(subject, protocol, time, analyte, concentration)
############## serum
grinta_all_serum_split <- split(new_grinta_all_serum, droplevels(new_grinta_all_serum$analyte))
grinta_all_serum_split[[1]]

all_results_grinta_serum <- lapply(grinta_all_serum_split, 
                                   purrr::safely(multilevel_mixed))

all_results_grinta_serum_transposed <- purrr::transpose(all_results_grinta_serum)
all_results_grinta_serum_transposed$result

is_null <- all_results_grinta_serum_transposed$result == "NULL"

 p_values_grinta_serum <- all_results_grinta_serum_transposed$result[!is_null] %>%
   dplyr::bind_rows()

 ############# plasma


 grinta_all_plasma_split <- split(new_grinta_all_plasma, droplevels(new_grinta_all_plasma$analyte))
grinta_all_plasma_split[[2]]

all_results_grinta_plasma <- lapply(grinta_all_plasma_split, 
                                   purrr::safely(multilevel_mixed))

all_results_grinta_plasma_transposed <- purrr::transpose(all_results_grinta_plasma)
all_results_grinta_plasma_transposed$result

is_null <- all_results_grinta_plasma_transposed$result == "NULL"

 p_values_grinta_plasma <- all_results_grinta_plasma_transposed$result[!is_null] %>%
   dplyr::bind_rows()

p_all_grinta <- dplyr::bind_rows(p_values_grinta_plasma,
                                 p_values_grinta_serum) 

Graphical respresentation of the models

Main effect Protocol

new_grinta_all$concentration_scaled <- as.numeric(scale(new_grinta_all$concentration))
new_grinta_all



 new_grinta_all %>%
  mutate(analyte_matrix = paste(analyte, matrix, sep = "_")) %>%
  group_by(protocol) %>% 
  summarise(mean_conc = mean(concentration_scaled)) %>%
  ggplot(aes(x = protocol, y = mean_conc)) +
  geom_point() +
   geom_bar(stat = "identity")

Main effect time

 new_grinta_all %>%
  mutate(analyte_matrix = paste(analyte, matrix, sep = "_")) %>%
  group_by(time) %>% 
  summarise(mean_conc = mean(concentration_scaled)) %>%
  ggplot(aes(x = time, y = mean_conc)) +
  geom_point() +
   geom_bar(stat = "identity")

Main differences between subjects

 new_grinta_all %>%
  mutate(analyte_matrix = paste(analyte, matrix, sep = "_")) %>%
  group_by(subject) %>% 
  summarise(mean_conc = mean(concentration_scaled)) %>%
  ggplot(aes(x = subject, y = mean_conc)) +
  geom_point() +
   geom_bar(stat = "identity")

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

Including Plots

You can also embed plots, for example:

plot(pressure)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.



uashogeschoolutrecht/citrulliner documentation built on Dec. 12, 2020, 7:12 a.m.