data_prep <- function(input_tsv, roi_start, roi_end, sep='\t', decimal='.')
{
#' A function to process the tsv data from T1 modality
#' @param input_tsv the tsv file containing all the ROI measurement. The file should strictly contian these columns: "participant_id session_id group age_bl prevdemals_family_code sex", then following each column for each ROI
#' @param roi_start the first ROI name
#' @param roi_end the last ROI name
#' @param sep the separator for the tsv file, by default is '\t'
#' @param decimal how to define the decimal in the tsv file, by default is "."
#'
#' @return a list of the preprocessed data from the tsv files
#' WARNING: the first 7 columns should be 'participant_id session_id group age_bl prevdemals_family_code sex EstimatedTotalIntraCranialVol'
input_tsv <- read.table(input_tsv, sep = sep, header = T, dec = decimal)
info_data <- input_tsv[, c("participant_id", "session_id", "group", "age_bl", "prevdemals_family_code", "sex", "EstimatedTotalIntraCranialVol")]
#### Check out the two files contens are consitent
ses_info <- table(info_data$session_id)# 75 M0 OK
cat("The session info comparison: ", ses_info, '\n')
print(ses_info)
cat('\n')
group_info <- table(info_data$group)
cat("The group info comparison: ")
print(group_info)
cat('\n')
age_info <- summary(as.numeric(as.character(info_data$age_bl)))# 0 OK
cat("The age_bl info : ")
print(age_info)
cat('\n')
table(info_data$sex)#43 F, 32 M OK
### checkout dimension
input_tsv_dim <- dim(input_tsv)
cat("The source data csv dimension is: ")
print(input_tsv_dim)
cat('\n')
## simplifier les noms de participant_id
input_tsv$participant_id <- gsub('sub-PREVDEMALS', '', input_tsv$participant_id)
length(unique(input_tsv$participant_id))# 75
length(unique(substr(input_tsv$participant_id, 6, 9)))# ok 75
input_tsv$participant_id <- substr(input_tsv$participant_id, 6, 9)
### make sure the numerical input_tsv to be numerical
input_tsv[, setdiff(colnames(input_tsv), c("participant_id", "session_id", "group", "prevdemals_family_code", "sex"))] <- apply(input_tsv[, setdiff(colnames(input_tsv), c("participant_id", "session_id", "group", "prevdemals_family_code", "sex"))], 2, function(x)as.numeric(as.character(x))) # 2 means columns to run
## Calculate the mean TIV of the CN + PS
data_analyse_CN_PS <- input_tsv[which(input_tsv$group %in% c('CN', 'PS')), ]
mean_TIV <- mean(data_analyse_CN_PS$EstimatedTotalIntraCranialVol)
## Suppression des colonnes non utiles
data_analyse <- input_tsv[, c(which(colnames(input_tsv) %in% c('participant_id', 'group', 'age_bl', "prevdemals_family_code", 'sex', 'EstimatedTotalIntraCranialVol')), match(roi_start, colnames(input_tsv)):match(roi_end, colnames(input_tsv)))]
## Make sure all the categorical factor to be character
data_analyse[, c("participant_id", "prevdemals_family_code", "group", "sex")] <- apply(data_analyse[, c("participant_id", "prevdemals_family_code", "group", "sex")], 2, as.character)
### Make sure there is no NA in the data
apply(data_analyse, 2, function(x) sum(is.na(x)))# 0 OK
### Calculate the TIV for each roi
nom_ROIs_volume <- setdiff(colnames(data_analyse), c("participant_id", "group", "age_bl", "prevdemals_family_code", "sex", "EstimatedTotalIntraCranialVol"))
nom_ROIs_volume_TIV <- paste(nom_ROIs_volume, 'TIV', sep = '.')
### Append the TIV with the model variables togethe, here, the normalized TIV * mean_TIV of all the subjects
data_TIV <- apply(data_analyse[, nom_ROIs_volume], 2, function(x) mean_TIV*x/data_analyse$EstimatedTotalIntraCranialVol)
nom_ROIs_volume_TIV <- gsub('_', '.', nom_ROIs_volume)
colnames(data_TIV) <- nom_ROIs_volume_TIV# add the columns 2 the dataframe
data_analyse <- cbind(data_analyse[1:6], data_TIV)# combine these two dataframes
###Compute the mean and std of the CN, PS, PT.
data_analyse_CN <- data_analyse[which(data_analyse$group %in% c('CN')), ]
data_analyse_PS <- data_analyse[which(data_analyse$group %in% c('PS')), ]
data_analyse_PT <- data_analyse[which(data_analyse$group %in% c('PT')), ]
data_analyse_CN_age_mean <- mean(data_analyse_CN[, c("age_bl")])
data_analyse_CN_age_std <- sd(data_analyse_CN[, c("age_bl")])
cat("The mean and stand deviation of CN: ")
print(data_analyse_CN_age_mean)
print(data_analyse_CN_age_std)
cat('\n')
sex_info_CN <- table(data_analyse_CN$sex)
cat("The sex info for CN: ")
print(sex_info_CN)
cat('\n')
data_analyse_PS_age_mean <- mean(data_analyse_PS[, c("age_bl")])
data_analyse_PS_age_std <- sd(data_analyse_PS[, c("age_bl")])
cat("The mean and stand deviation of PS: ")
print(data_analyse_PS_age_mean)
print(data_analyse_PS_age_std)
cat('\n')
sex_info_PS <- table(data_analyse_PS$sex)
cat("The sex info for PS: ")
print(sex_info_PS)
cat('\n')
data_analyse_PT_age_mean <- mean(data_analyse_PT[, c("age_bl")])
data_analyse_PT_age_std <- sd(data_analyse_PT[, c("age_bl")])
cat("The mean and stand deviation of PT: ")
print(data_analyse_PT_age_mean)
print(data_analyse_PT_age_std)
cat('\n')
sex_info_PT <- table(data_analyse_PT$sex)
cat("The sex info for PT: ")
print(sex_info_PT)
cat('\n')
# chi2 test for sex
cat("The chi square test for sex: ")
mydata = cbind(table(data_analyse_CN[, c("sex")]),table(data_analyse_PS[, c("sex")]))
print(chisq.test(mydata)$p.value)
pvalue_sex = chisq.test(mydata)$p.value
cat('\n')
# Mann-whitney test for age
cat("The Mann-Whitney test for age: ")
print(wilcox.test(data_analyse_CN[, c("age_bl")], data_analyse_PS[, c("age_bl")], exact=FALSE)$p.value)
pvalue_age = wilcox.test(data_analyse_CN[, c("age_bl")], data_analyse_PS[, c("age_bl")], exact=FALSE)$p.value
cat('\n')
return(list(data_analyse, nom_ROIs_volume, nom_ROIs_volume_TIV, pvalue_sex, pvalue_age, data_analyse_PS_age_mean, data_analyse_PS_age_std, data_analyse_CN_age_mean, data_analyse_CN_age_std))
}
qqplot_data <- function (vec) # argument: vector of numbers
{
# following four lines from base R's qqline()
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1L] - slope * x[1L]
return(data.frame(resids = vec,
slope = slope,
int = int))
}
qqplot_func <- function(data_validation, ROIs){
#' A function for QQ plot
#'
#' This function allows you to express your love of cats.
#' input_tsv: str, path to tsv
#' roi_start: str, firt ROI
#' roi_end: str, last ROI
#' sep: the delimiter of the tsv
#' decimal: the decimal in the tsv, . or ,
return(do.call('rbind', lapply(ROIs, function(var)
cbind(data.frame(var = gsub('.TIV', '', var)),
qqplot_data(data_validation$res[which(data_validation$var == var)])))))
}
##### FUNC to do group comparison
# Create a function to round the numerical data
format_arrondi <- function(x, arrondi = 2)format(round(x, arrondi), nsmall=arrondi)
# Create a function to format the pvalue for the following result
format_pvalue <- function(pvalue, n.arrondi = 3) gsub(' ', '', ifelse(pvalue < 0.05, paste(format(ifelse(round(pvalue, n.arrondi) == 0, '<0.001', round(pvalue, n.arrondi)), nsmall=n.arrondi), '*', sep=''), format(round(pvalue, n.arrondi), nsmall=n.arrondi)))
## studentized residuals for lmer
StudentResid = function(fit){
#### This is a function to calculate the studentized residuals for mixed-effect linear model
res = residuals(fit)
H = hatvalues(fit)
sigma = summary(fit)$sigm
sres = sapply(1:length(res), function(i) res[[i]]/(sigma*sqrt(1-H[[i]])))
return(sres)
}
cohenD_fixed_model <- function(model_complet, model_emboite){
return( (summary(model_complet)$r.squared-summary(model_emboite)$r.squared)/(1-summary(model_complet)$r.squared))
}
analysisPCA <- function(groupPalette, data_analyse, nom_ROIs_volume)
{
#### This is a function to run PCA for group factor
data_PCA <- data_analyse[, setdiff(colnames(data_analyse), c('age_bl', 'sex'))]
data_PCA$group <- factor(data_PCA$group) # encode a vector as a factor
res.pca_vol_normalise <- PCA(data_PCA[, c('group', nom_ROIs_volume)], scale.unit = TRUE, ncp = length(nom_ROIs_volume), quali.sup = 1, graph = F)
# this is the individual factor map for all the participants
# explanation for Confidence ellipses: The variance of the underlying population relates to the confidence ellipse. High variance will mean that the data are all over the place, so the mean is not well estimated, so the confidence ellipse will be larger than if the variance were smaller.
# plot.PCA(res.pca_vol_normalise,
# habillage = 1,
# ellipse = coord.ellipse(cbind.data.frame(data_analyse$group,res.pca_vol_normalise$ind$coord), bary=T),
# cex = 2, label = 'none', col.hab = groupPalette)
# The squared cosine shows the importance of a component for a given observation.
fviz_pca_ind(res.pca_vol_normalise, col.ind="cos2") +
scale_color_gradient2(low="white", mid="blue",
high="red", midpoint=0.50) + theme_minimal()
#plot.PCA(res.pca_vol_normalise, axes=c(1, 2), choix="var", cex = 0.7)
# this is the variable factor maps for cos2, higher value, give red color which means high correlation with the first two PCs
# The squared cosine shows the importance of a component for a given observation.
fviz_pca_var(res.pca_vol_normalise, col.var="cos2") +
scale_color_gradient2(low="white", mid="blue",
high="red", midpoint=0.5) + theme_minimal()
# this is to plot the scree plot of the data
fviz_screeplot(res.pca_vol_normalise, ncp=10)
return(res.pca_vol_normalise)
}
##################
###################
# Linear model function
#####################
######################
general_linear_model_with_interaction <- function(data, effets, ROIs, model = 'fixe', arrondi = 4, correction = 'BH'){
if(model == 'fixe'){
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lm(as.formula(paste(var, paste(effets, collapse = ' + '), sep = ' ~ ')), data = data)
results <- cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
if(any(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1)) & regexpr(':', e) == (-1))
model_complet <- lm(as.formula(paste(var, paste(setdiff(effets, effets[which(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1))]), collapse = ' + '), sep = ' ~ ')), data = data)
model_low <- lm(as.formula(paste(var, paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), sep = ' ~ ')), data = data)
if(regexpr(':', e) != (-1)){ coef <- format_arrondi(coef(model_complet)[which(regexpr(e, names(coef(model_complet))) != (-1))], arrondi)}else{coef <- format_arrondi(coef(model_complet)[which(regexpr(e, names(coef(model_complet))) != (-1) & regexpr(':', names(coef(model_complet))) == (-1))], arrondi)}
res <- data.frame(data.frame(t(coef)),
f2cohen = format_arrondi(cohenD_fixed_model(model_complet, model_low), 2),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chi)"]][2],
# pvalueWALDconsist = waldtest(model_low, model_complet, vcov = vcov_consist)$"Pr(>F)"[2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}else{
##### mixed Model
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(effets, collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
if(any(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1)) & regexpr(':', e) == (-1)) model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
model_low <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
if(regexpr(':', e) != (-1)){ coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1))], 4)}else{coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1) & regexpr(':', names(fixef(model_complet))) == (-1))], 4)}
res <- data.frame(data.frame(t(coef)),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chisq)"]][2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}
#### multiple correction for the pvalue
for(col in colnames(results_tableau)[which(regexpr('_mult', colnames(results_tableau)) != (-1))]){
results_tableau[, col] <- p.adjust(results_tableau[, gsub('_mult', '', col), ], 'BH', nrow(results_tableau))
}
############ format the result tabel
results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] <- apply(results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] , 2, function(x) format_pvalue(x))
return(results_tableau)
}
##########################################################################
############## General linear model without interaction between age and group
##########################################################################
general_linear_model_without_interaction <- function(data, effets, ROIs, model = 'fixe', arrondi = 6, correction = 'BH'){
if(model == 'fixe'){
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lm(as.formula(paste(var, paste(effets, collapse = ' + '), sep = ' ~ ')), data = data)
results <- cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
model_low <- lm(as.formula(paste(var, paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), sep = ' ~ ')), data = data)
coef <- format_arrondi(coef(model_complet)[which(regexpr(e, names(coef(model_complet))) != (-1))], arrondi)
res <- data.frame(data.frame(t(coef)),
f2cohen = format_arrondi(cohenD_fixed_model(model_complet, model_low), 2),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chi)"]][2],
# pvalueWALDconsist = waldtest(model_low, model_complet, vcov = vcov_consist)$"Pr(>F)"[2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}else{
##### mixed Model
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(effets, collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
if(any(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1)) & regexpr(':', e) == (-1)) model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
model_low <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
if(regexpr(':', e) != (-1)){ coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1))], 4)}else{coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1) & regexpr(':', names(fixef(model_complet))) == (-1))], 4)}
res <- data.frame(data.frame(t(coef)),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chisq)"]][2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}
#### multiple correction for the pvalue
for(col in colnames(results_tableau)[which(regexpr('_mult', colnames(results_tableau)) != (-1))]){
results_tableau[, col] <- p.adjust(results_tableau[, gsub('_mult', '', col), ], 'BH', nrow(results_tableau))
}
############ format the result tabel
results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] <- apply(results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] , 2, function(x) format_pvalue(x))
return(results_tableau)
}
######################
########### Post Hoc analysis function with interaction
########################
postHOC_GR_ageGR <- function(data, ROIs, model = 'fixe', pvalue_GR, pvalue_ageGR, correction = 'BH', arrondi = 4){
results <- do.call('rbind', lapply(ROIs, function(var){
if(model == 'fixe'){
if(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1)){
model_complet <- lm(as.formula(paste(var, ' - 1 + group + age_bl + sex + group:age_bl', sep = ' ~ ')), data = data)
}else{
model_complet <- lm(as.formula(paste(var, ' - 1 + group + age_bl + sex', sep = ' ~ ')), data = data)
}
}else{
#### fixed effect
if(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1)){
model_complet <- lmer(as.formula(paste(var, ' - 1 + group + age_bl + sex + group:age_bl + (1|prevdemals_family_code)', sep = ' ~ ')), data = data)
}else{
model_complet <- lmer(as.formula(paste(var, ' - 1 + group + age_bl + sex + (1|prevdemals_family_code)', sep = ' ~ ')), data = data)
}
}
# if(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1) | regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1)){
return(data.frame(var = var,
GR_PS_CN_coeff = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0, 0, 0), 1)))$test$coeff[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0), 1)))$test$coeff[1], arrondi)),
NA),
GR_PS_CN_pvalue = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0, 0, 0), 1)))$test$pvalues[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(-1, 1, 0, 0, 0), 1)))$test$pvalues[1], arrondi)),
NA),
GR_PS_CN_pvalue_corr = NA,
# GR_CN_PT = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
# summary(glht(model_complet, linfct = matrix(c(-1, 0, 1, 0, 0, 0, 0), 1)))$test$pvalues[1],
# NA),
GR_PT_PS_coeff = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0, 0, 0), 1)))$test$coeff[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0), 1)))$test$coeff[1], arrondi)),
NA),
GR_PT_PS_pvalue = ifelse(regexpr('\\*', pvalue_GR[which(ROIs == var)]) != (-1),
ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0, 0, 0), 1)))$test$pvalues[1], arrondi),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, -1, 1, 0, 0), 1)))$test$pvalues[1], arrondi)),
NA),
GR_PT_PS_pvalue_corr = NA,
ageGR_PS_CN_coeff = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
summary(glht(model_complet, linfct = matrix(c(0, 0, 0, -1, 0, 1, 0), 1)))$test$coeff[1],
NA),
ageGR_PS_CN_pvalue = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, 0, 0, -1, 0, 1, 0), 1)))$test$pvalues[1], arrondi),
NA),
ageGR_PS_CN_pvalue_corr = NA,
# ageGR_CN_PT = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
# summary(glht(model_complet, linfct = matrix(c(0, 0, 0, -1, 0, 0, 1), 1)))$test$pvalues[1],
# NA),
ageGR_PT_PS_coef = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
format_arrondi(summary(glht(model_complet, linfct = matrix(c(0, 0, 0, 0, 0, -1, 1), 1)))$test$coeff[1], arrondi),
NA),
ageGR_PT_PS_pvalue = ifelse(regexpr('\\*', pvalue_ageGR[which(ROIs == var)]) != (-1),
summary(glht(model_complet, linfct = matrix(c(0, 0, 0, 0, 0, -1, 1), 1)))$test$pvalues[1],
NA),
ageGR_PT_PS_pvalue_corr = NA))
# }
}))
results[, c('GR_PS_CN_pvalue', 'GR_PT_PS_pvalue')] <- t(apply(results[, c('GR_PS_CN_pvalue', 'GR_PT_PS_pvalue')], 1, function(x) p.adjust(x, 'holm', 2)))
results[, c('ageGR_PS_CN_coeff', 'ageGR_PT_PS_pvalue')] <- t(apply(results[, c('ageGR_PS_CN_coeff', 'ageGR_PT_PS_pvalue')], 1, function(x) p.adjust(x, 'holm', 2)))
results[, c('GR_PS_CN_pvalue_corr', 'GR_PT_PS_pvalue_corr', 'ageGR_PS_CN_pvalue_corr', 'ageGR_PT_PS_pvalue_corr')] <- apply(results[, c('GR_PS_CN_pvalue', 'GR_PT_PS_pvalue', 'ageGR_PS_CN_pvalue', 'ageGR_PT_PS_pvalue')], 2, function(x) p.adjust(as.numeric(x), correction, sum(!is.na(x))))
results[, which(regexpr('pvalue', colnames(results)) != (-1))] <- apply( results[, which(regexpr('pvalue', colnames(results)) != (-1))], 2, function(x) format_pvalue(as.numeric(x)))
return(results)
}
###############################
########### model validation
###################################
data_validation <- function(data, ROIs, model = 'fixe'){
if(model == 'fixe'){
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lm(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group', sep = '')), data = data)
return(data.frame(var = var,
id = data$participant_id,
y = data[,var],
y_hat = fitted(model),
res = residuals(model),
res_studentise = studres(model),
levier = hatvalues(model)) ## In statistics and in particular in regression analysis, leverage is a measure of how far away the independent variable values of an observation are from those of the other observations.
)
}, mc.cores = 3)))
}else{
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lmer(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group + (1|prevdemals_family_code)', sep = '')), data = data)
return(data.frame(var = var,
id = data$participant_id,
y = data[,var],
y_hat = fitted(model),
res = residuals(model),
levier = hatvalues(model))
)
}, mc.cores = 3)))
}
}
################
###### Data validation table creation
###############
random_effect_inspect <- function(data, ROIs, arrondi = 3){
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lmer(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group + (1|prevdemals_family_code)', sep = '')), data = data)
return(cbind(data.frame(var = var),
setNames(data.frame(t(data.frame(res = format_arrondi(ranef(model)$prevdemals_family_code[, 1], arrondi)))), rownames(ranef(model)$prevdemals_family_code)),
data.frame(var_alea = format_arrondi(attr(VarCorr(model)$prevdemals_family_code, "stddev")[1], arrondi), ### Get the stand deviatin of the random effect
var_res = format_arrondi(summary(model)$sigma, arrondi),
prop_var_tot = format_arrondi(attr(VarCorr(model)$prevdemals_family_code, "stddev")[1]/(summary(model)$sigma+attr(VarCorr(model)$prevdemals_family_code, "stddev")[1]), arrondi))))
}, mc.cores = 3)))
}
#################
############ heterogeneity check function for fixed effect linear model
#############################
inspect_heterogeneite_fixed <- function(data, ROIs){
return(do.call('rbind', mclapply(ROIs, function(var){
model <- lm(as.formula(paste(var, ' ~ age_bl + sex + group + age_bl*group', sep = '')), data = data)
return(data.frame(var = gsub('.TIV', '', gsub('Right', 'R', gsub('Left', 'L', var))),
bp = format_pvalue(bptest(model)$p.value),
white = format_pvalue(bptest(model, ~ age_bl*sex + I(age_bl^2) + age_bl*group*sex, data = data_analyse)$p.value))
)
}, mc.cores = 3)))
}
###########################
##### Group comparison
############################
multiple_compare_groupe <- function(data, var_group = 'group', vars = setdiff(colnames(data_analyse), c('group', 'participant_id')), factorVars = c('sex'), test = 'wilcox', arrondi = 2, indicateur = 'median', format_pvalue = T, correct = 'bonferroni'){
# ##### When you wanna use this to compare two groups, you can just use this
# do.call constructs and exectues a function call.
#mclapply is a parallelized version of lapply mclapply(x, FUN...)
# if the var in vars is in factorVars, and if it is sex, gointo first if, else gointo else, and if not in factorVars, gointo second else
# for sex and prevdemals_family_code, is to find the correlation between the var and the groups, if they are independent
###### Choose between non-parametric(wilcox(permutation test)) and parametric(t-test):
###Note: One principled response to this is "safety first": with no way to reliably verify the normality assumption in a small sample, stick to non-parametric methods
######
groupe <- sort(unique(as.character(data[, var_group])))
table <- do.call('rbind', mclapply(vars, function(var){
if(var %in% factorVars){
if(length(unique(data[, var])) == 2){
res <- do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = paste(sum(subset(data, get(var_group) == gr)[,var] == sort(unique(data[, var]))[1]), ' (',
format_arrondi(100*sum(subset(data, get(var_group) == gr)[,var] == sort(unique(data[, var]))[1])/
sum(!is.na(subset(data, get(var_group) == gr)[,var]))), '%)', sep= ''))
}))
colnames(res) <- groupe
return(cbind(data.frame(var = paste(var, sort(unique(data[, var]))[1], sep=' = ')),
res,
data.frame(pvalue = chisq.test(data[, var_group], data[, var])$p.value)))
}else{
res <- do.call('rbind', lapply(sort(unique(data[, var])), function(mod){
cbind(data.frame(var = paste(var, mod, sep=' = ')),
do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = paste(sum(subset(data, get(var_group) == gr)[,var] == mod), ' (',
format_arrondi(100*sum(subset(data, get(var_group) == gr)[,var] == mod)/
sum(!is.na(subset(data, get(var_group) == gr)[,var]))), '%)', sep= ''))
})))
}))
colnames(res) <- c('var', groupe)
return(cbind(res,
data.frame(pvalue = c(chisq.test(data[, var_group], data[, var])$p.value, rep(NA, length(unique(data[, var]))-1)))))
}
}else{
res <- do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = ifelse(indicateur == 'median',
paste(format_arrondi(median(subset(data, get(var_group) == gr)[,var])),
' [', format_arrondi(quantile(subset(data, get(var_group) == gr)[,var], 0.25)), ',',
format_arrondi(quantile(subset(data, get(var_group) == gr)[,var], 0.75)), ']', sep=''),
paste(format_arrondi(mean(subset(data, get(var_group) == gr)[,var])),
' (', format_arrondi(sd(subset(data, get(var_group) == gr)[,var])), ')', sep='')))
}))
colnames(res) <- groupe
return(cbind(data.frame(var = var),
res,
pvalue = ifelse(length(groupe) == 2,
ifelse(test == 'wilcox',
wilcox.test(data[,var] ~ data[,var_group])$p.value,
t.test(data[,var] ~ data[,var_group], var.equal = ifelse(var.test(data[,var] ~ data[,var_group])$p.value<0.05, F, T))$p.value),
ifelse(test == 'wilcox',
kruskal.test(data[,var] ~ factor(data[,var_group]))$p.value,
anova(lm(data[,var] ~ factor(data[,var_group])))[["Pr(>F)"]][1]))))
}
}, mc.cores = 3))
res <- do.call('cbind', lapply(groupe, function(gr){
data.frame(groupe = paste(nrow(subset(data, get(var_group) == gr)), ' (',
format_arrondi(100*nrow(subset(data, get(var_group) == gr))/sum(!is.na(data[, var_group]))), '%)', sep= ''))
}))
colnames(res) <- groupe
table <- rbind(cbind(data.frame(var = 'N'),
res,
data.frame(pvalue = NA)),
table)
table$pvalue_multiple <- p.adjust(as.numeric(as.character(table$pvalue)), correct, sum(!is.na(table$pvalue)))
if(format_pvalue) table[, c('pvalue', 'pvalue_multiple')] <- apply(table[, c('pvalue', 'pvalue_multiple')] , 2, function(x) format_pvalue(pvalue = as.numeric(as.character(x))))
return(table)
}
#######################################################################
####### This is to plot the regression plot of ROI
#######################################################################
plot_regreesion_scatter <- function(data, ROI, subtitle, xlabel="", ylabel="", legend_postion="right", y_axis_limits=NULL, y_label_format=NULL){
plot_data <- data[, c('participant_id', 'group', 'sex', "age_bl", ROI)]
nom_ROIs_volume_TIV_sig_newname = c('participant_id', 'group', 'sex', "age_bl", subtitle)
colnames(plot_data) <- nom_ROIs_volume_TIV_sig_newname
ggplot_img <- ggplot(melt(plot_data, id.vars = c('participant_id', 'group', 'sex', "age_bl")),
aes(x = age_bl, y = value, color=group)) + geom_point(size=3) +
geom_smooth(data = melt(plot_data, id.vars = c('participant_id', 'group', 'sex', "age_bl")),
aes(x = age_bl, y = value, color=group), method = "lm", se = FALSE) + facet_wrap(~variable, scales="free") + scale_colour_manual(labels = c("C9-", "C9+"), values=c("blue", "deeppink4")) + theme_bw() +
theme(axis.line.x = element_line(colour = "black", size=1.5),
axis.line.y = element_line(color="black", size=1.5),
axis.text=element_text(size=20, colour="black", face="bold"),
axis.title=element_text(size=20, colour="black", face="bold"),
axis.ticks = element_line(colour = "black", size=1.5, linetype = 'longdash'),
axis.ticks.length=unit(0.3,"cm"),
axis.ticks.x = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
strip.background = element_blank(),
legend.position = legend_postion,
legend.title = element_text(size=20, colour="black", face="bold"),
legend.text = element_text(size=20, colour="black", face="bold"),
strip.text = element_text(size=24, colour="black", face="bold", vjust=3)) + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(labels = y_label_format, expand = c(0, 0)) + labs(x = xlabel, y = ylabel, color = "Group", face="bold") + coord_cartesian(xlim=c(20, 81), ylim=y_axis_limits)
return(ggplot_img)
}
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot_ggplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.