data_prep <- function(input_tsv, roi_start, roi_end, sep='\t')
{
#' A function to process the tsv data from T1 modality
#' @param input_tsv the tsv file containing all the ROI measurement. The file should strictly contian these columns: "participant_id session_id group age_bl prevdemals_family_code sex", then following each column for each ROI
#' @param roi_start the first ROI name
#' @param roi_end the last ROI name
#' @param sep the separator for the tsv file, by default is '\t'
#'
#' @return a list of the preprocessed data from the tsv files
#' @export
#' @examples
#' WARNING: the first 7 columns should be 'participant_id session_id group age_bl prevdemals_family_code sex
input_tsv <- read.table(input_tsv, sep = sep, header = T, dec = ',')
#### Check out the two files contens are consitent
ses_info <- table(input_tsv$session_id)
cat("The session info comparison: ", ses_info, '\n')
print(ses_info)
cat('\n')
group_info <- table(input_tsv$group)
cat("The group info comparison: ")
print(group_info)
cat('\n')
age_info <- summary(as.numeric(as.character(input_tsv$age_bl)))
cat("The age_bl info: ")
print(age_info)
cat('\n')
summary(as.numeric(as.character(input_tsv$expected_years_aoo_bl)))
sex_info <- table(input_tsv$sex)
cat("The sex info: ")
print(sex_info)
cat('\n')
### checkout dimension
input_tsv_dim <- dim(input_tsv)
cat("The source data tsv dimension is: ")
print(input_tsv_dim)
cat('\n')
## simplifier les noms de participant_id
input_tsv$participant_id <- gsub('sub-PREVDEMALS', '', input_tsv$participant_id)
length(unique( input_tsv$participant_id))
length(unique(substr(input_tsv$participant_id, 6, 9)))
input_tsv$participant_id <- substr(input_tsv$participant_id, 6, 9)
### make sure the numerical input_tsv to be numerical
input_tsv[, setdiff(colnames(input_tsv), c("participant_id", 'prevdemals_family_code', "session_id", "group", "sex"))] <- apply(input_tsv[, setdiff(colnames(input_tsv), c("participant_id", 'prevdemals_family_code', "session_id", "group", "sex"))], 2, function(x)as.numeric(as.character(x))) # 2 means columns to run
## Suppression des colonnes non utiles
data_analyse <- input_tsv[, c(which(colnames(input_tsv) %in% c('participant_id', 'prevdemals_family_code', 'group', 'age_bl', 'sex')), match(roi_start, colnames(input_tsv)):match(roi_end, colnames(input_tsv)))]
## Make sure all the categorical factor to be character
data_analyse[, c("participant_id", 'prevdemals_family_code', "group", "sex")] <- apply(data_analyse[, c("participant_id", 'prevdemals_family_code', "group", "sex")], 2, as.character)
###############
#########Make sure there is no NA in the data
#################
NA_res <- apply(data_analyse, 2, function(x) sum(is.na(x)))# 0 OK
cat("Note, important, check out the number of every column, make sure it is 0: ")
print(NA_res)
cat('\n')
nom_ROIs_volume <- setdiff(colnames(data_analyse), c("participant_id", 'prevdemals_family_code', "group", "age_bl", "sex"))
### extract the MEAN from the nom_ROIs_volume
nom_ROIs_volume <- nom_ROIs_volume[which(regexpr('_Mean', nom_ROIs_volume, fixed = T) != (-1))]
###Redefine the data_analyses
data_analyse <- data_analyse[, c(c("participant_id", 'prevdemals_family_code', "group", "age_bl", "sex"), nom_ROIs_volume)]
###Compute the mean and std of the CN, PS, PT.
data_analyse_CN <- data_analyse[which(data_analyse$group %in% c('CN')), ]
data_analyse_PS <- data_analyse[which(data_analyse$group %in% c('PS')), ]
data_analyse_PT <- data_analyse[which(data_analyse$group %in% c('PT')), ]
data_analyse_CN_age_mean <- mean(data_analyse_CN[, c("age_bl")])
data_analyse_CN_age_std <- sd(data_analyse_CN[, c("age_bl")])
cat("The mean and stand deviation of CN: ")
print(data_analyse_CN_age_mean)
print(data_analyse_CN_age_std)
cat('\n')
sex_info_CN <- table(data_analyse_CN$sex)
cat("The sex info for CN: ")
print(sex_info_CN)
cat('\n')
data_analyse_PS_age_mean <- mean(data_analyse_PS[, c("age_bl")])
data_analyse_PS_age_std <- sd(data_analyse_PS[, c("age_bl")])
cat("The mean and stand deviation of PS: ")
print(data_analyse_PS_age_mean)
print(data_analyse_PS_age_std)
cat('\n')
sex_info_PS <- table(data_analyse_PS$sex)
cat("The sex info for PS: ")
print(sex_info_PS)
cat('\n')
data_analyse_PT_age_mean <- mean(data_analyse_PT[, c("age_bl")])
data_analyse_PT_age_std <- sd(data_analyse_PT[, c("age_bl")])
cat("The mean and stand deviation of PT: ")
print(data_analyse_PT_age_mean)
print(data_analyse_PT_age_std)
cat('\n')
sex_info_PT <- table(data_analyse_PT$sex)
cat("The sex info for PT: ")
print(sex_info_PT)
cat('\n')
return(list(data_analyse, nom_ROIs_volume))
}
##########
## QQPLOT
qqplot_data <- function (vec) # argument: vector of numbers
{
# following four lines from base R's qqline()
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1L] - slope * x[1L]
return(data.frame(resids = vec,
slope = slope,
int = int))
}
qqplot_func <- function(data_validation, ROIs){
return(do.call('rbind', lapply(ROIs, function(var)
cbind(data.frame(var = gsub('.TIV', '', var)),
qqplot_data(data_validation$res[which(data_validation$var == var)])))))
}
##### FUNC to do group comparison
# Create a function to round the numerical data
format_arrondi <- function(x, arrondi = 2)format(round(x, arrondi), nsmall=arrondi, scientific=TRUE)
# Create a function to format the pvalue for the following result
format_pvalue <- function(pvalue, n.arrondi = 3) gsub(' ', '', ifelse(pvalue < 0.05, paste(format(ifelse(round(pvalue, n.arrondi) == 0, '<0.001', round(pvalue, n.arrondi)), nsmall=n.arrondi), '*', sep=''), format(round(pvalue, n.arrondi), nsmall=n.arrondi)))
## studentized residuals for lmer
StudentResid = function(fit){
#### This is a function to calculate the studentized residuals for mixed-effect linear model
res = residuals(fit)
H = hatvalues(fit)
sigma = summary(fit)$sigm
sres = sapply(1:length(res), function(i) res[[i]]/(sigma*sqrt(1-H[[i]])))
return(sres)
}
# this is a function to calculate the cohen's f value for fixed model
cohenD_fixed_model <- function(model_complet, model_emboite){
return( (summary(model_complet)$r.squared-summary(model_emboite)$r.squared)/(1-summary(model_complet)$r.squared))
}
## this is a function to calculate the cohen's f value for mixed model
# According to Cohen’s (1988) guidelines, f2f2≥ 0.02, f2f2≥ 0.15, and f2f2 ≥ 0.35 represent small, medium, and large effect sizes, respectively.
# this is a function to calculate the cohen's f value for mixed model
cohenD_mixed_model <- function(model_complet, model_emboite){
cohenf = (r.squaredGLMM(model_complet)[2]-r.squaredGLMM(model_emboite)[2])/r.squaredGLMM(model_complet)[2]
return(cohenf[1])
}
analysisPCA <- function(groupPalette, data_analyse, nom_ROIs_volume)
{
#### This is a function to run PCA for group factor
data_PCA <- data_analyse[, setdiff(colnames(data_analyse), c('age_bl', 'sex'))]
data_PCA$group <- factor(data_PCA$group) # encode a vector as a factor
res.pca_vol_normalise <- PCA(data_PCA[, c('group', nom_ROIs_volume)], scale.unit = TRUE, ncp = length(nom_ROIs_volume), quali.sup = 1, graph = F)
plot.PCA(res.pca_vol_normalise,
habillage = 1,
ellipse = coord.ellipse(cbind.data.frame(data_analyse$group,res.pca_vol_normalise$ind$coord), bary=T),
cex = 2, label = 'none', col.hab = groupPalette)
plot.PCA(res.pca_vol_normalise, axes=c(1, 2), choix="var", cex = 0.7)
return(res.pca_vol_normalise)
}
##################
###################
# Linear model function
#####################
######################
general_linear_model_with_interaction <- function(data, effets, ROIs, model = 'fixe', arrondi = 10, correction = 'BH'){
if(model == 'fixe'){
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lm(as.formula(paste(var, paste(effets, collapse = ' + '), sep = ' ~ ')), data = data)
results <- cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
if(any(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1)) & regexpr(':', e) == (-1))
model_complet <- lm(as.formula(paste(var, paste(setdiff(effets, effets[which(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1))]), collapse = ' + '), sep = ' ~ ')), data = data)
model_low <- lm(as.formula(paste(var, paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), sep = ' ~ ')), data = data)
if(regexpr(':', e) != (-1)){ coef <- format_arrondi(coef(model_complet)[which(regexpr(e, names(coef(model_complet))) != (-1))], arrondi)}else{coef <- format_arrondi(coef(model_complet)[which(regexpr(e, names(coef(model_complet))) != (-1) & regexpr(':', names(coef(model_complet))) == (-1))], arrondi)}
res <- data.frame(data.frame(t(coef)),
f2cohen = format_arrondi(cohenD_fixed_model(model_complet, model_low), 2),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chi)"]][2],
# pvalueWALDconsist = waldtest(model_low, model_complet, vcov = vcov_consist)$"Pr(>F)"[2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}else{
##### mixed Model
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(effets, collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
if(any(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1)) & regexpr(':', e) == (-1)) model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
model_low <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
if(regexpr(':', e) != (-1)){ coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1))], arrondi)}else{coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1) & regexpr(':', names(fixef(model_complet))) == (-1))], arrondi)}
res <- data.frame(data.frame(t(coef)),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chisq)"]][2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}
#### multiple correction for the pvalue
for(col in colnames(results_tableau)[which(regexpr('_mult', colnames(results_tableau)) != (-1))]){
results_tableau[, col] <- p.adjust(results_tableau[, gsub('_mult', '', col), ], 'BH', nrow(results_tableau))
}
############ format the result tabel
results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] <- apply(results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] , 2, function(x) format_pvalue(x))
return(results_tableau)
}
##########################################################################
############## General linear model without interaction between age and group
##########################################################################
general_linear_model_without_interaction <- function(data, effets, ROIs, model = 'fixe', arrondi = 10, correction = 'BH'){
if(model == 'fixe'){
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lm(as.formula(paste(var, paste(effets, collapse = ' + '), sep = ' ~ ')), data = data)
results <- cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
model_low <- lm(as.formula(paste(var, paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), sep = ' ~ ')), data = data)
coef <- format_arrondi(coef(model_complet)[which(regexpr(e, names(coef(model_complet))) != (-1))], arrondi)
res <- data.frame(data.frame(t(coef)),
f2cohen = format_arrondi(cohenD_fixed_model(model_complet, model_low), 2),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chi)"]][2],
# pvalueWALDconsist = waldtest(model_low, model_complet, vcov = vcov_consist)$"Pr(>F)"[2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}else{
##### mixed Model
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(effets, collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
### This is actually for model construction with interaction.
## this sentense is to detect the interaction, if e is not interaction, use the complete model in the beginning
if(any(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1)) & regexpr(':', e) == (-1))
model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
## This is the reduced_model without the var
model_low <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
### This is for interaction
if(regexpr(':', e) != (-1)){ coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1))], arrondi)}
## This is for without interaction
else{coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1) & regexpr(':', names(fixef(model_complet))) == (-1))], arrondi)}
res <- data.frame(data.frame(t(coef)),
f2cohen = format_arrondi(cohenD_mixed_model(model_complet, model_low), 2), ### cohen's f2
cohenD = as.numeric(coef) / sum(resid(model_complet)^2),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chisq)"]][2], # Performs a Likelihood Ratio Test between two nested IRT models.
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}
#### multiple correction for the pvalue
for(col in colnames(results_tableau)[which(regexpr('_mult', colnames(results_tableau)) != (-1))]){
results_tableau[, col] <- p.adjust(results_tableau[, gsub('_mult', '', col), ], 'BH', nrow(results_tableau))
}
############ format the result tabel
results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] <- apply(results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] , 2, function(x) format_pvalue(x))
return(results_tableau)
}
######################
########### Post Hoc analysis function with interaction
########################
postHOC_GR_ageGR <- function(data, ROIs, model = 'fixe', pvalue_GR, pvalue_ageGR, correction = 'BH', arrondi = 4){
results <- do.call('rbind', lapply(ROIs, function(var){
if(model == 'fixe'){
if(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1)){
model_complet <- lm(as.formula(paste(var, ' - 1 + group + age_bl + sex + group:age_bl', sep = ' ~ ')), data = data)
}else{
model_complet <- lm(as.formula(paste(var, ' - 1 + group + age_bl + sex', sep = ' ~ ')), data = data)
}
}else{
#### fixed effect
if(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1)){
model_complet <- lmer(as.formula(paste(var, ' - 1 + group + age_bl + sex + group:age_bl + (1|prevdemals_family_code)', sep = ' ~ ')), data = data)
}else{
model_complet <- lmer(as.formula(paste(var, ' - 1 + group + age_bl + sex + (1|prevdemals_family_code)', sep = ' ~ ')), data = data)
}
}
# if(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1) | regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1)){
return(data.frame(var = var,
GR_PS_CN_coeff = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0, 0, 0), 1)))$test$coeff[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0), 1)))$test$coeff[1], arrondi)),
NA),
GR_PS_CN_pvalue = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0, 0, 0), 1)))$test$pvalues[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0), 1)))$test$pvalues[1], arrondi)),
NA),
GR_PS_CN_pvalue_corr = NA,
# GR_CN_PT = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
# summary(glht(model_complet, linfct = matrix(c(-1, 0, 1, 0, 0, 0, 0), 1)))$test$pvalues[1],
# NA),
GR_PT_PS_coeff = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0, 0, 0), 1)))$test$coeff[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0), 1)))$test$coeff[1], arrondi)),
NA),
GR_PT_PS_pvalue = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0, 0, 0), 1)))$test$pvalues[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0), 1)))$test$pvalues[1], arrondi)),
NA),
GR_PT_PS_pvalue_corr = NA,
ageGR_PS_CN_coeff = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
summary(glht(model_complet, linfct = matrix(c(0, 0, 0, -1, 0, 1, 0), 1)))$test$coeff[1],
NA),
ageGR_PS_CN_pvalue = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, 0, 0, -1, 0, 1, 0), 1)))$test$pvalues[1], arrondi),
NA),
ageGR_PS_CN_pvalue_corr = NA,
# ageGR_CN_PT = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
# summary(glht(model_complet, linfct = matrix(c(0, 0, 0, -1, 0, 0, 1), 1)))$test$pvalues[1],
# NA),
ageGR_PT_PS_coef = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, 0, 0, 0, 0, -1, 1), 1)))$test$coeff[1], arrondi),
NA),
ageGR_PT_PS_pvalue = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
summary(glht(model_complet, linfct = matrix(c(0, 0, 0, 0, 0, -1, 1), 1)))$test$pvalues[1],
NA),
ageGR_PT_PS_pvalue_corr = NA))
# }
}))
results[, c('GR_PS_CN_pvalue', 'GR_PT_PS_pvalue')] <- t(apply(results[, c('GR_PS_CN_pvalue', 'GR_PT_PS_pvalue')], 1, function(x) p.adjust(x, 'holm', 2)))
results[, c('ageGR_PS_CN_coeff', 'ageGR_PT_PS_pvalue')] <- t(apply(results[, c('ageGR_PS_CN_coeff', 'ageGR_PT_PS_pvalue')], 1, function(x) p.adjust(x, 'holm', 2)))
results[, c('GR_PS_CN_pvalue_corr', 'GR_PT_PS_pvalue_corr', 'ageGR_PS_CN_pvalue_corr', 'ageGR_PT_PS_pvalue_corr')] <- apply(results[, c('GR_PS_CN_pvalue', 'GR_PT_PS_pvalue', 'ageGR_PS_CN_pvalue', 'ageGR_PT_PS_pvalue')], 2, function(x) p.adjust(as.numeric(x), correction, sum(!is.na(x))))
results[, which(regexpr('pvalue', colnames(results)) != (-1))] <- apply( results[, which(regexpr('pvalue', colnames(results)) != (-1))], 2, function(x) format_pvalue(as.numeric(x)))
return(results)
}
###############################
########### model validation
###################################
data_validation <- function(data, ROIs, model = 'fixe'){
if(model == 'fixe'){
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lm(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group', sep = '')), data = data)
return(data.frame(var = var,
id = data$participant_id,
y = data[,var],
y_hat = fitted(model),
res = residuals(model),
res_studentise = studres(model),
levier = hatvalues(model)) ## In statistics and in particular in regression analysis, leverage is a measure of how far away the independent variable values of an observation are from those of the other observations.
)
}, mc.cores = 3)))
}else{
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lmer(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group + (1|prevdemals_family_code)', sep = '')), data = data)
return(data.frame(var = var,
id = data$participant_id,
y = data[,var],
y_hat = fitted(model),
res = residuals(model),
levier = hatvalues(model))
)
}, mc.cores = 3)))
}
}
################
###### Data validation table creation
###############
random_effect_inspect <- function(data, ROIs, arrondi = 3){
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lmer(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group + (1|prevdemals_family_code)', sep = '')), data = data)
return(cbind(data.frame(var = var),
setNames(data.frame(t(data.frame(res = format_arrondi(ranef(model)$prevdemals_family_code[, 1], arrondi)))), rownames(ranef(model)$prevdemals_family_code)),
data.frame(var_alea = format_arrondi(attr(VarCorr(model)$prevdemals_family_code, "stddev")[1], arrondi), ### Get the stand deviatin of the random effect
var_res = format_arrondi(summary(model)$sigma, arrondi),
prop_var_tot = format_arrondi(attr(VarCorr(model)$prevdemals_family_code, "stddev")[1]/(summary(model)$sigma+attr(VarCorr(model)$prevdemals_family_code, "stddev")[1]), arrondi))))
}, mc.cores = 3)))
}
#################
############ heterogeneity check function for fixed effect linear model
#############################
inspect_heterogeneite_fixed <- function(data, ROIs){
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lm(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group', sep = '')), data = data)
return(data.frame(var = gsub('.TIV', '', gsub('Right', 'R', gsub('Left', 'L', var))),
bp = format_pvalue(bptest(model)$p.value),
white = format_pvalue(bptest(model, ~ age_bl*sex + I(age_bl^2) + age_bl*group*sex, data = data_analyse)$p.value))
)
}, mc.cores = 3)))
}
###########################
##### Group comparison
############################
multiple_compare_groupe <- function(data, var_group = 'group', vars = setdiff(colnames(data_analyse), c('group', 'participant_id')), factorVars = c('sex'), test = 'wilcox', arrondi = 2, indicateur = 'median', format_pvalue = T, correct = 'bonferroni'){
# ##### When you wanna use this to compare two groups, you can just use this
# do.call constructs and exectues a function call.
#mclapply is a parallelized version of lapply mclapply(x, FUN...)
# if the var in vars is in factorVars, and if it is sex, gointo first if, else gointo else, and if not in factorVars, gointo second else
# for sex, is to find the correlation between the var and the groups, if they are independent
###### Choose between non-parametric(wilcox) and parametric(t-test):
###Note: One principled response to this is "safety first": with no way to reliably verify the normality assumption in a small sample, stick to non-parametric methods
######
groupe <- sort(unique(as.character(data[, var_group])))
table <- do.call('rbind', mclapply(vars, function(var){
if(var %in% factorVars){
#### This is to consider just sex
if(length(unique(data[, var])) == 2){
### This is for sex
res <- do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = paste(sum(subset(data, get(var_group) == gr)[,var] == sort(unique(data[, var]))[1]), ' (',
format_arrondi(100*sum(subset(data, get(var_group) == gr)[,var] == sort(unique(data[, var]))[1])/
sum(!is.na(subset(data, get(var_group) == gr)[,var]))), '%)', sep= ''))
}))
colnames(res) <- groupe
return(cbind(data.frame(var = paste(var, sort(unique(data[, var]))[1], sep=' = ')),
res,
data.frame(pvalue = chisq.test(data[, var_group], data[, var])$p.value)))
}else{
### This is to do the real multiple comparison for the diff ROIs
res <- do.call('rbind', lapply(sort(unique(data[, var])), function(mod){
cbind(data.frame(var = paste(var, mod, sep=' = ')),
do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = paste(sum(subset(data, get(var_group) == gr)[,var] == mod), ' (',
format_arrondi(100*sum(subset(data, get(var_group) == gr)[,var] == mod)/
sum(!is.na(subset(data, get(var_group) == gr)[,var]))), '%)', sep= ''))
})))
}))
colnames(res) <- c('var', groupe)
return(cbind(res,
data.frame(pvalue = c(chisq.test(data[, var_group], data[, var])$p.value, rep(NA, length(unique(data[, var]))-1)))))
}
}else{
res <- do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = ifelse(indicateur == 'median',
paste(format_arrondi(median(subset(data, get(var_group) == gr)[,var])),
' [', format_arrondi(quantile(subset(data, get(var_group) == gr)[,var], 0.25)), ',',
format_arrondi(quantile(subset(data, get(var_group) == gr)[,var], 0.75)), ']', sep=''),
paste(format_arrondi(mean(subset(data, get(var_group) == gr)[,var])),
' (', format_arrondi(sd(subset(data, get(var_group) == gr)[,var])), ')', sep='')))
}))
colnames(res) <- groupe
return(cbind(data.frame(var = var),
res,
pvalue = ifelse(length(groupe) == 2,
ifelse(test == 'wilcox',
wilcox.test(data[,var] ~ data[,var_group])$p.value,
t.test(data[,var] ~ data[,var_group], var.equal = ifelse(var.test(data[,var] ~ data[,var_group])$p.value<0.05, F, T))$p.value),
ifelse(test == 'wilcox',
kruskal.test(data[,var] ~ factor(data[,var_group]))$p.value,
anova(lm(data[,var] ~ factor(data[,var_group])))[["Pr(>F)"]][1]))))
}
}, mc.cores = 3))
########## To get the group info
res <- do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = paste(nrow(subset(data, get(var_group) == gr)), ' (',
format_arrondi(100*nrow(subset(data, get(var_group) == gr))/sum(!is.na(data[, var_group]))), '%)', sep= ''))
}))
colnames(res) <- groupe
############
########## concatenate the res and table
table <- rbind(cbind(data.frame(var = 'N'),
res,
data.frame(pvalue = NA)),
table)
table$pvalue_multiple <- p.adjust(as.numeric(as.character(table$pvalue)), correct, sum(!is.na(table$pvalue)))
if(format_pvalue) table[, c('pvalue', 'pvalue_multiple')] <- apply(table[, c('pvalue', 'pvalue_multiple')] , 2, function(x) format_pvalue(pvalue = as.numeric(as.character(x))))
return(table)
}
#######################################################################
####### This is to plot the regression plot of ROI
#######################################################################
plot_regreesion_scatter <- function(data, ROI, subtitle, xlabel="", ylabel="", legend_postion="right", y_axis_limits=NULL, y_label_format=NULL){
plot_data <- data[, c('participant_id', 'group', 'sex', "age_bl", ROI)]
nom_ROIs_volume_TIV_sig_newname = c('participant_id', 'group', 'sex', "age_bl", subtitle)
colnames(plot_data) <- nom_ROIs_volume_TIV_sig_newname
ggplot_img <- ggplot(melt(plot_data, id.vars = c('participant_id', 'group', 'sex', "age_bl")),
aes(x = age_bl, y = value, color=group)) + geom_point(size=3) +
geom_smooth(data = melt(plot_data, id.vars = c('participant_id', 'group', 'sex', "age_bl")),
aes(x = age_bl, y = value, color=group), method = "lm", se = FALSE) + facet_wrap(~variable, scales="free") + scale_colour_manual(labels = c("C9-", "C9+"), values=c("blue", "deeppink4")) + theme_bw() +
theme(axis.line.x = element_line(colour = "black", size=1.5),
axis.line.y = element_line(color="black", size=1.5),
axis.text=element_text(size=20, colour="black", face="bold"),
axis.title=element_text(size=20, colour="black", face="bold"),
axis.ticks = element_line(colour = "black", size=1.5, linetype = 'longdash'),
axis.ticks.length=unit(0.3,"cm"),
axis.ticks.x = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
strip.background = element_blank(),
legend.position = legend_postion,
legend.title = element_text(size=20, colour="black", face="bold"),
legend.text = element_text(size=20, colour="black", face="bold"),
strip.text = element_text(size=24, colour="black", face="bold", vjust=3)) + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(labels = y_label_format, expand = c(0, 0)) + labs(x = xlabel, y = ylabel, color = "Group", face="bold") + coord_cartesian(xlim=c(20, 81), ylim=y_axis_limits)
return(ggplot_img)
}
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot_ggplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.