#' This is a pipeline to run GLM for group comparison based on the ROI measures for T1-weighted MRI
#' @param input_tsv the tsv file containing all the ROI measurement. The file should strictly contian these columns: "participant_id session_id group age_bl prevdemals_family_code sex", then following each column for each ROI
#' @param source_dir the github repo
#' @param output_dir the folder containing all the outputs
#' @param roi_start the first ROI name
#' @param roi_end the last ROI name
#' @param sep the separator for the tsv file, by default is '\t'
#'
#' @return the table containing the results and also the png image for the meta-analysis of the data.
#' @export
#' @examples
#' t1_stats_pipeline(input_tsv, output_dir, roi_start, roi_end)
#'
t1_stats_pipeline <- function(source_dir, input_tsv, output_dir, roi_start, roi_end, sep='\t', groupPalette=c("blue","darkorange", "red"), groupPalette_CN_PS=c("blue", "darkorange")){
### Import des packages
library(ggplot2)
library(tidyr)
library(car)
library(MASS)
library(reshape2)
library(tableone)
library(xtable)
library(FactoMineR)
library(parallel)
library(xlsx)
library(gridExtra)
library(gtable)
library(grid)
library(lmtest)
library(sandwich)
library(dplyr)
library(multcomp)
library(lme4)
library(scales)
library(factoextra)
### Checkout the result folder, if not exist, creat it
dir.create(output_dir, showWarnings = FALSE)
setwd(source_dir)
# Thème GGplot
theme_set(theme_grey())
options(xtable.comment = FALSE)
### Source the file in order to call functions from other r scripts
source("T1_utilities_function.R")
### preprocessing with the source tsv file, data_prepres will output 9 elements containing the results
data_prepres <- data_prep(input_tsv, roi_start, roi_end, sep=sep)
data_analyse <- data_prepres[[1]]
nom_ROIs_volume <- data_prepres[[2]]
nom_ROIs_volume_TIV <- data_prepres[[3]]
data_analyse_PS_CN <- data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ]
### Write the TIV data into a csv file
write.table(data_analyse, paste(output_dir, 'preprocessed_data.tsv', sep='/'), sep = '\t', row.names = F)
## Extract the groups of interest for your analysis
data_analyses_CN_PS <- data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ]
## PCA analysis
res.pca_vol_normalise <- analysisPCAres <- analysisPCA(groupPalette, data_analyse, nom_ROIs_volume_TIV)
# #### Individual geom plot, you can visualize it in Rstudio and saved it then.
# ggplot(melt(cbind(data_analyse[, c('participant_id','group', 'sex',
# 'age_bl')], apply(data_analyse[, nom_ROIs_volume_TIV], 2, scale)), id.vars =
# c('participant_id', 'group', 'sex', 'age_bl')), aes(x = variable, y = value,
# group = participant_id, col = ifelse(participant_id %in%
# c(data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[, 2] >
# (4))], data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[,
# 1] > (0) & data_analyse$group == 'PT' & res.pca_vol_normalise$ind$coord[, 2]
# < (4))], data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[,
# 1] < (-3) & data_analyse$group == 'CN')],
# data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[, 1] <
# (-4) & data_analyse$group == 'PS')]), data_analyse$participant_id,
# 'autres'))) + geom_line() + xlab('') + theme(axis.text.x =
# element_text(angle = 90, hjust = 1)) + ylab('') + ggtitle('reduced centered
# value for each subject') + theme(text = element_text(size=10),
# legend.title=element_blank())
#
# ggsave(paste(output_dir, 'pca.png', sep='/'), plot = last_plot())
#
# #boxplot_age_TIV_VS_gr_sex
# ## grid.arrange is to set up a gtable layout to
# #place multiple grobs on a page.
# grid.arrange( ggplot(melt(data_analyse[,
# c('age_bl', 'EstimatedTotalIntraCranialVol', 'participant_id', 'group',
# 'sex')], id.vars = c('participant_id', 'group', 'sex')), aes(x = group, y =
# value, fill = group)) + geom_boxplot() + facet_wrap(~variable, scales =
# 'free', ncol = 2) + ylab('') + scale_fill_manual(values=groupPalette),
# ggplot(melt(data_analyse[, c('age_bl', 'EstimatedTotalIntraCranialVol',
# 'participant_id', 'group', 'sex')], id.vars = c('participant_id', 'group',
# 'sex')), aes(x = sex, y = value, fill = sex)) + geom_boxplot() +
# facet_wrap(~variable, scales = 'free', ncol = 2) + ylab(''), nrow = 2)
#
# ggsave(paste(output_dir, 'boxplot1.png', sep='/'), plot = last_plot())
#
# # Histogram of data
# ggplot(melt(data_analyse[, c('participant_id', 'group',
# 'sex', nom_ROIs_volume_TIV)], id.vars = c('participant_id', 'group',
# 'sex')), aes(x = value)) + geom_histogram(aes(y=..density..),
# colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666") +
# facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes
# normalisés')
# ggsave(paste(output_dir, 'histogram_plot.png', sep='/'), plot = last_plot())
#
# ## Boxplot for group
# ggplot(melt(data_analyse[, c('participant_id', 'group',
# 'sex', nom_ROIs_volume_TIV)], id.vars = c('participant_id', 'group',
# 'sex')), aes(x = group, y = value, fill = group)) + geom_boxplot() +
# facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes
# normalisés') + scale_fill_manual(values=groupPalette)
# ggsave(paste(output_dir, 'boxplot2.png', sep='/'), plot = last_plot())
#
# ### Boxplot for just PS and CN
# ggplot(melt(data_analyses_CN_PS[,
# c('participant_id', 'group', 'sex', nom_ROIs_volume_TIV)], id.vars =
# c('participant_id', 'group', 'sex')), aes(x = group, y = value, fill =
# group)) + geom_boxplot() + facet_wrap(~variable, scales = 'free') +
# xlab('') + ggtitle('ROIs volumes normalisés') +
# scale_fill_manual(values=groupPalette_CN_PS)
# ggsave(paste(output_dir, 'boxplot3.png', sep='/'), plot = last_plot())
#
# ## Boxplot for sex
# ggplot(melt(data_analyse[, c('participant_id', 'group',
# 'sex', nom_ROIs_volume_TIV)], id.vars = c('participant_id', 'group',
# 'sex')), aes(x = sex, y = value, fill = sex)) + geom_boxplot() +
# facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes
# normalisés') # the sex diff between groups
# ggsave(paste(output_dir, 'boxplot4.png', sep='/'), plot = last_plot())
#
#
# ggplot(melt(data_analyse[,
# c('participant_id', 'group', 'sex', nom_ROIs_volume_TIV)], id.vars =
# c('participant_id', 'group', 'sex')), aes(x = group, y = value, fill = sex)) + geom_boxplot() + facet_wrap(~variable, scales = 'free') + xlab('') +
# ggtitle('ROIs volumes normalisés')
# ggsave(paste(output_dir, 'boxplot5.png', sep='/'), plot = last_plot())
#
# ## Scatter plot with linear
# ggplot(melt(data_analyse[, c('participant_id',
# 'group', 'sex', "age_bl", nom_ROIs_volume_TIV)], id.vars =
# c('participant_id', 'group', 'sex', "age_bl")), aes(x = age_bl, y = value,
# color=group)) + geom_point(size=1) + geom_smooth(data = melt(data_analyse[,
# c('participant_id', 'group', 'sex', "age_bl", nom_ROIs_volume_TIV)], id.vars
# = c('participant_id', 'group', 'sex', "age_bl")), aes(x = age_bl, y = value,
# color=group), method = "lm", se = FALSE) + facet_wrap(~variable, scales =
# 'free') + ylab('') + ggtitle('ROIs volumes normalisés') +
# scale_colour_manual(values=groupPalette)
# ggsave(paste(output_dir, 'scatter_plot.png', sep='/'), plot = last_plot())
#
# ######################################################################
# ######################################################################
#
# ## Multiple comparison among CN, PS and PT
#
# ### Run the func for comparison
# compare_group_PS_CTRL_normalise <- multiple_compare_groupe(data = data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ],
# vars = c("age_bl", "sex", "EstimatedTotalIntraCranialVol", nom_ROIs_volume_TIV))
# compare_group_PS_CTRL_normalise <- compare_group_PS_CTRL_normalise[-c(1, 2, 3),] ### delete the first three rows for age, tiv and sex
# compare_group_PS_PT_normalise <- multiple_compare_groupe(data = data_analyse[which(data_analyse$group %in% c('PT', 'PS')), ],
# vars = c("age_bl", "sex", "EstimatedTotalIntraCranialVol", nom_ROIs_volume_TIV))
#
# # NA pour var sex age and TIV
# compare_group_PS_CTRL_normalise[which(compare_group_PS_CTRL_normalise$var %in%
# c('age_bl', 'sex = F', 'EstimatedTotalIntraCranialVol')),
# c('pvalue', 'pvalue_multiple')] <- compare_group_PS_PT_normalise[which(compare_group_PS_PT_normalise$var %in%
# c('age_bl', 'sex = F', 'EstimatedTotalIntraCranialVol')), c('pvalue', 'pvalue_multiple')] <- NA
#
# # change colname pvalue pour cbind
# colnames(compare_group_PS_CTRL_normalise)[which(colnames(compare_group_PS_CTRL_normalise) %in% c('pvalue', 'pvalue_multiple'))] <- c('pvalue_PS_CTRL', 'pvalue_multiple_PS_CTRL')
# colnames(compare_group_PS_PT_normalise)[which(colnames(compare_group_PS_PT_normalise) %in% c('pvalue', 'pvalue_multiple'))] <- c('pvalue_PS_PT', 'pvalue_multiple_PS_PT')
#
# #### save the CN PS group comparison result as a table
# compare_group_PS_CTRL_normalise$var <- gsub('Right', 'R', gsub('Left', 'L', gsub('EstimatedTotalIntraCranialVol', 'TIV', gsub('.TIV', '', compare_group_PS_CTRL_normalise$var))))
# colnames(compare_group_PS_CTRL_normalise) <- gsub('_', ' ', colnames(compare_group_PS_CTRL_normalise))
# write.table(compare_group_PS_CTRL_normalise, paste(output_dir, 'group_comparison_CN_PS.csv', sep='/'), sep = ';', row.names = F)
# print(xtable(compare_group_PS_CTRL_normalise, caption = 'Comparaison entre les CN PS groupes pour les volumes normalisés'),
# table.placement="H", size = "tiny", include.rownames = F, add.to.row=list(pos = list(0),
# command = c("\\\\var & CN & PS & pvalue & pvaluecor \\\\")), include.colnames=F, scalebox='0.75')
#
# #######################
# ########### linear model for PS and CN
# #######################
#
# ##############
# ######### Without random
# ##############
# model_pvalue_CNvsPS <- general_linear_model_with_interaction(data = data_analyses_CN_PS, effets = c('age_bl', 'sex', 'group', 'age_bl:group'), ROIs = nom_ROIs_volume_TIV, model = 'fixe', correction = 'BH')
# write.table(model_pvalue_CNvsPS,paste( output_dir, 'fixed_model_result_PS_CN.csv', sep='/'), sep = ';', row.names = F)
# print(xtable(model_pvalue_CNvsPS, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés'), table.placement="H", size = "tiny", include.rownames = F,
# add.to.row=list(pos = list(0), command = c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe} & \\multicolumn{4}{c}{age:groupe} \\\\ & coef & f2
# & pvalue & pvalue & coef & f2 & pvalue & pvalue & coef & f2 & pvalue & pvalue & coef & f2 & pvalue & pvalue \\\\ & & & & corr & & & & corr
# & & & & corr & & & & corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
#
# model_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets = c('age_bl', 'sex', 'group'), ROIs = nom_ROIs_volume, model = 'fixe', correction = 'BH')
# write.table(model_pvalue_CNvsPS_without_interaction, paste( output_dir, 'PS_CN_fixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
# print(xtable(model_pvalue_CNvsPS_without_interaction, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés without interaction'), table.placement="H", size = "tiny", include.rownames = F,
# add.to.row=list(pos = list(0), command = c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe} \\\\ & coef & f2
# & pvalue & pvalue & coef & f2 & pvalue & pvalue & coef & f2 & pvalue & pvalue \\\\ & & & & corr & & & & corr
# & & & & corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
model_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets = c('age_bl', 'sex', 'group', 'prevdemals_family_code'), ROIs = nom_ROIs_volume_TIV, model = 'fixe', correction = 'BH')
write.table(model_pvalue_CNvsPS_without_interaction, paste( output_dir, 'PS_CN_fixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
print(xtable(model_pvalue_CNvsPS_without_interaction, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés without interaction'), table.placement="H", size = "tiny", include.rownames = F,
add.to.row=list(pos = list(0), command = c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe} \\\\ & coef & f2
& pvalue & pvalue & coef & f2 & pvalue & pvalue & coef & f2 & pvalue & pvalue \\\\ & & & & corr & & & & corr
& & & & corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
##############
######### With random effect center
##############
model_mixed_pvalue_CNvsPS <- general_linear_model_with_interaction(data = data_analyses_CN_PS, effets = c('age_bl', 'sex', 'group', 'age_bl:group'), ROIs = nom_ROIs_volume_TIV, model = 'mixed', correction = 'BH')
write.table(model_mixed_pvalue_CNvsPS, paste( output_dir, 'mixed_modeles_CNvsPS.csv', sep='/'), sep = ';', row.names = F)
print(xtable(model_mixed_pvalue_CNvsPS, caption = 'Pvalue des effets sur les modèles linéaires mixtes pour les groupes CN vs PS pour les volumes normalisés'), table.placement="H", size = "tiny", include.rownames = F,
add.to.row=list(pos = list(0), command = c(' & \\multicolumn{3}{c}{age} & \\multicolumn{3}{c}{sexe} & \\multicolumn{3}{c}{groupe} & \\multicolumn{3}{c}{age:groupe} \\\\ & coef & pvalue & pvalue & coef & pvalue & pvalue & coef & pvalue & pvalue & coef & pvalue & pvalue \\\\ & & & corr & & & corr & & & corr & & & corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
model_mixed_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets = c('age_bl', 'sex', 'group'), ROIs = nom_ROIs_volume, model = 'mixed', correction = 'BH')
write.table(model_mixed_pvalue_CNvsPS_without_interaction, paste( output_dir, 'PS_CN_mixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
print(xtable(model_mixed_pvalue_CNvsPS_without_interaction, caption = 'Pvalue des effets sur les modèles linéaires mixtes pour les groupes CN vs PS pour les volumes normalisés without interaction'), table.placement="H", size = "tiny", include.rownames = F,
add.to.row=list(pos = list(0), command = c(' & \\multicolumn{3}{c}{age} & \\multicolumn{3}{c}{sexe} & \\multicolumn{3}{c}{groupe} \\\\ & coef & pvalue & pvalue & coef & pvalue & pvalue & coef & pvalue & pvalue \\\\ & & & corr & & & corr & & & corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
}
input_tsv = "/Users/junhao.wen 1/Dropbox/2019-YueLin-SMCH_SCD/tsv/SMHC_sCN_vs_pCN_14_09_2019_demography.tsv"
output_dir = "/Users/junhao.wen 1/Dropbox/2019-YueLin-SMCH_SCD/results"
roi_start = "lh_lateraloccipital_volume"
roi_end = "Right.UnsegmentedWhiteMatter"
source_dir = "/Users/junhao.wen 1/Hao/Project/NeuroStatisticR/R"
t1_stats_pipeline(source_dir, input_tsv, output_dir, roi_start, roi_end)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.