R/T1_pipeline.R

Defines functions t1_stats_pipeline

Documented in t1_stats_pipeline

#' This is a pipeline to run GLM for group comparison based on the ROI measures for T1-weighted MRI
#' @param input_tsv the tsv file containing all the ROI measurement. The file should strictly contian these columns: "participant_id	session_id	group	age_bl	prevdemals_family_code	sex", then following each column for each ROI
#' @param source_dir the github repo
#' @param output_dir the folder containing all the outputs
#' @param roi_start the first ROI name
#' @param roi_end the last ROI name
#' @param sep the separator for the tsv file, by default is '\t'
#' 
#' @return the table containing the results and also the png image for the meta-analysis of the data.
#' @export
#' @examples
#' t1_stats_pipeline(input_tsv, output_dir, roi_start, roi_end)
#' 
t1_stats_pipeline <- function(source_dir, input_tsv, output_dir, roi_start, roi_end, sep='\t', groupPalette=c("blue","darkorange", "red"), groupPalette_CN_PS=c("blue", "darkorange")){
  
  ### Import des packages
  library(ggplot2)
  library(tidyr)
  library(car)
  library(MASS)
  library(reshape2)
  library(tableone)
  library(xtable)
  library(FactoMineR)
  library(parallel)
  library(xlsx)
  library(gridExtra)
  library(gtable)
  library(grid)
  library(lmtest)
  library(sandwich)
  library(dplyr)
  library(multcomp)
  library(lme4)
  library(scales)
  library(factoextra)
  
  ### Checkout the result folder, if not exist, creat it
  dir.create(output_dir, showWarnings = FALSE)
  setwd(source_dir)
  
  # Thème GGplot
  theme_set(theme_grey())
  
  options(xtable.comment = FALSE)
  
  ### Source the file in order to call functions from other r scripts
  source("T1_utilities_function.R")
  
  ### preprocessing with the source tsv file, data_prepres will output 9 elements containing the results
  data_prepres <- data_prep(input_tsv, roi_start, roi_end, sep=sep)
  data_analyse <- data_prepres[[1]] 
  nom_ROIs_volume <- data_prepres[[2]] 
  nom_ROIs_volume_TIV <- data_prepres[[3]]
  
  data_analyse_PS_CN <- data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ]
  
  ### Write the TIV data into a csv file
  write.table(data_analyse, paste(output_dir, 'preprocessed_data.tsv', sep='/'), sep = '\t', row.names = F)
  
  ## Extract the groups of interest for your analysis
  data_analyses_CN_PS <- data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ]
  
  ## PCA analysis
  res.pca_vol_normalise <- analysisPCAres <- analysisPCA(groupPalette, data_analyse, nom_ROIs_volume_TIV)
  
  # #### Individual geom plot, you can visualize it in Rstudio and saved it then.
  # ggplot(melt(cbind(data_analyse[, c('participant_id','group', 'sex',
  #                                    'age_bl')], apply(data_analyse[, nom_ROIs_volume_TIV], 2, scale)), id.vars =
  #               c('participant_id', 'group', 'sex', 'age_bl')), aes(x = variable, y = value,
  #                                                                   group = participant_id, col = ifelse(participant_id %in%
  #                                                                                                          c(data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[, 2] >
  #                                                                                                                                                (4))], data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[,
  #                                                                                                                                                                                                                         1] > (0) & data_analyse$group == 'PT' & res.pca_vol_normalise$ind$coord[, 2]
  #                                                                                                                                                                                         < (4))], data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[,
  #                                                                                                                                                                                                                                                                    1] < (-3) & data_analyse$group == 'CN')],
  #                                                                                                            data_analyse$participant_id[which(res.pca_vol_normalise$ind$coord[, 1] <
  #                                                                                                                                                (-4) & data_analyse$group == 'PS')]), data_analyse$participant_id,
  #                                                                                                        'autres'))) + geom_line() + xlab('') + theme(axis.text.x =
  #                                                                                                                                                       element_text(angle = 90, hjust = 1)) + ylab('') + ggtitle('reduced centered
  #                                                                                                                                                                                                                 value for each subject') + theme(text = element_text(size=10),
  #                                                                                                                                                                                                                                                  legend.title=element_blank())
  # 
  # ggsave(paste(output_dir, 'pca.png', sep='/'), plot = last_plot())
  # 
  # #boxplot_age_TIV_VS_gr_sex 
  # ## grid.arrange is to set up a gtable layout to
  # #place multiple grobs on a page. 
  # grid.arrange( ggplot(melt(data_analyse[,
  #                                        c('age_bl', 'EstimatedTotalIntraCranialVol', 'participant_id', 'group',
  #                                          'sex')], id.vars = c('participant_id', 'group', 'sex')), aes(x = group, y =
  #                                                                                                         value, fill = group)) + geom_boxplot() + facet_wrap(~variable, scales =
  #                                                                                                                                                               'free', ncol = 2) + ylab('') + scale_fill_manual(values=groupPalette),
  #               ggplot(melt(data_analyse[, c('age_bl', 'EstimatedTotalIntraCranialVol',
  #                                            'participant_id', 'group', 'sex')], id.vars = c('participant_id', 'group',
  #                                                                                            'sex')), aes(x = sex, y = value, fill = sex)) + geom_boxplot() +
  #                 facet_wrap(~variable, scales = 'free', ncol = 2) + ylab(''), nrow = 2)
  # 
  # ggsave(paste(output_dir, 'boxplot1.png', sep='/'), plot = last_plot())
  # 
  # # Histogram of data 
  # ggplot(melt(data_analyse[, c('participant_id', 'group',
  #                              'sex', nom_ROIs_volume_TIV)], id.vars = c('participant_id', 'group',
  #                                                                        'sex')), aes(x = value)) + geom_histogram(aes(y=..density..),
  #                                                                                                                  colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666") +
  #   facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes
  #                                                               normalisés')
  # ggsave(paste(output_dir, 'histogram_plot.png', sep='/'), plot = last_plot())
  # 
  # ## Boxplot for group 
  # ggplot(melt(data_analyse[, c('participant_id', 'group',
  #                              'sex', nom_ROIs_volume_TIV)], id.vars = c('participant_id', 'group',
  #                                                                        'sex')), aes(x = group, y = value, fill = group)) + geom_boxplot()  +
  #   facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes
  #                                                               normalisés') + scale_fill_manual(values=groupPalette)
  # ggsave(paste(output_dir, 'boxplot2.png', sep='/'), plot = last_plot())
  # 
  # ### Boxplot for just PS and CN 
  # ggplot(melt(data_analyses_CN_PS[,
  #                                 c('participant_id', 'group', 'sex', nom_ROIs_volume_TIV)], id.vars =
  #               c('participant_id', 'group', 'sex')), aes(x = group, y = value, fill =
  #                                                           group)) + geom_boxplot()  + facet_wrap(~variable, scales = 'free') +
  #   xlab('') + ggtitle('ROIs volumes normalisés') +
  #   scale_fill_manual(values=groupPalette_CN_PS)
  # ggsave(paste(output_dir, 'boxplot3.png', sep='/'), plot = last_plot())
  # 
  # ## Boxplot for sex 
  # ggplot(melt(data_analyse[, c('participant_id', 'group',
  #                              'sex', nom_ROIs_volume_TIV)], id.vars = c('participant_id', 'group',
  #                                                                        'sex')), aes(x = sex, y = value, fill = sex)) + geom_boxplot()  +
  #   facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes
  #                                                               normalisés') # the sex diff between groups 
  # ggsave(paste(output_dir, 'boxplot4.png', sep='/'), plot = last_plot())
  # 
  # 
  # ggplot(melt(data_analyse[,
  #                          c('participant_id', 'group', 'sex', nom_ROIs_volume_TIV)], id.vars =
  #               c('participant_id', 'group', 'sex')), aes(x = group, y = value, fill = sex)) + geom_boxplot()  + facet_wrap(~variable, scales = 'free') + xlab('') +
  #   ggtitle('ROIs volumes normalisés')
  # ggsave(paste(output_dir, 'boxplot5.png', sep='/'), plot = last_plot())
  # 
  # ## Scatter plot with linear 
  # ggplot(melt(data_analyse[, c('participant_id',
  #                              'group', 'sex', "age_bl", nom_ROIs_volume_TIV)], id.vars =
  #               c('participant_id', 'group', 'sex', "age_bl")), aes(x = age_bl, y = value,
  #                                                                   color=group)) + geom_point(size=1) + geom_smooth(data = melt(data_analyse[,
  #                                                                                                                                             c('participant_id', 'group', 'sex', "age_bl", nom_ROIs_volume_TIV)], id.vars
  #                                                                                                                                = c('participant_id', 'group', 'sex', "age_bl")), aes(x = age_bl, y = value,
  #                                                                                                                                                                                      color=group), method = "lm", se = FALSE) + facet_wrap(~variable, scales =
  #                                                                                                                                                                                                                                              'free') + ylab('') + ggtitle('ROIs volumes normalisés') +
  #   scale_colour_manual(values=groupPalette)
  # ggsave(paste(output_dir, 'scatter_plot.png', sep='/'), plot = last_plot())
  # 
  # ######################################################################
  # ######################################################################
  # 
  # ## Multiple comparison among CN, PS and PT
  # 
  # ### Run the func for comparison
  # compare_group_PS_CTRL_normalise <- multiple_compare_groupe(data = data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ],
  #                                                            vars = c("age_bl", "sex", "EstimatedTotalIntraCranialVol", nom_ROIs_volume_TIV))
  # compare_group_PS_CTRL_normalise <- compare_group_PS_CTRL_normalise[-c(1, 2, 3),] ### delete the first three rows for age, tiv and sex
  # compare_group_PS_PT_normalise <-  multiple_compare_groupe(data = data_analyse[which(data_analyse$group %in% c('PT', 'PS')), ],
  #                                                           vars = c("age_bl", "sex", "EstimatedTotalIntraCranialVol", nom_ROIs_volume_TIV))
  # 
  # # NA pour var sex age and TIV 
  # compare_group_PS_CTRL_normalise[which(compare_group_PS_CTRL_normalise$var %in% 
  #                                         c('age_bl', 'sex = F', 'EstimatedTotalIntraCranialVol')), 
  #                                 c('pvalue', 'pvalue_multiple')] <- compare_group_PS_PT_normalise[which(compare_group_PS_PT_normalise$var %in% 
  #                                                                                                          c('age_bl', 'sex = F', 'EstimatedTotalIntraCranialVol')), c('pvalue', 'pvalue_multiple')] <- NA
  # 
  # # change colname pvalue pour cbind
  # colnames(compare_group_PS_CTRL_normalise)[which(colnames(compare_group_PS_CTRL_normalise) %in% c('pvalue', 'pvalue_multiple'))] <- c('pvalue_PS_CTRL', 'pvalue_multiple_PS_CTRL')
  # colnames(compare_group_PS_PT_normalise)[which(colnames(compare_group_PS_PT_normalise) %in% c('pvalue', 'pvalue_multiple'))] <- c('pvalue_PS_PT', 'pvalue_multiple_PS_PT')
  # 
  # #### save the CN PS group comparison result as a table
  # compare_group_PS_CTRL_normalise$var <- gsub('Right', 'R', gsub('Left', 'L', gsub('EstimatedTotalIntraCranialVol', 'TIV', gsub('.TIV', '', compare_group_PS_CTRL_normalise$var))))
  # colnames(compare_group_PS_CTRL_normalise) <- gsub('_', ' ', colnames(compare_group_PS_CTRL_normalise))
  # write.table(compare_group_PS_CTRL_normalise, paste(output_dir, 'group_comparison_CN_PS.csv', sep='/'), sep = ';', row.names = F)
  # print(xtable(compare_group_PS_CTRL_normalise, caption = 'Comparaison entre les CN PS groupes pour les volumes normalisés'),  
  #       table.placement="H", size = "tiny", include.rownames = F, add.to.row=list(pos = list(0), 
  #                                                                                 command =  c("\\\\var & CN & PS & pvalue & pvaluecor \\\\")), include.colnames=F, scalebox='0.75')
  # 
  # #######################
  # ########### linear model for PS and CN
  # #######################
  # 
  # ##############
  # #########  Without random
  # ##############
  # model_pvalue_CNvsPS <- general_linear_model_with_interaction(data = data_analyses_CN_PS, effets = c('age_bl', 'sex', 'group', 'age_bl:group'), ROIs = nom_ROIs_volume_TIV, model = 'fixe', correction = 'BH')
  # write.table(model_pvalue_CNvsPS,paste( output_dir, 'fixed_model_result_PS_CN.csv', sep='/'), sep = ';', row.names = F)
  # print(xtable(model_pvalue_CNvsPS, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés'),  table.placement="H", size = "tiny", include.rownames = F,
  #       add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe}  & \\multicolumn{4}{c}{age:groupe} \\\\ & coef & f2
  #                                                   &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue  \\\\  &  &  &  &  corr &  &  &  &  corr
  #                                                   &  &  &  & corr &  &  &  &  corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
  # 
  # model_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets =  c('age_bl', 'sex', 'group'), ROIs = nom_ROIs_volume, model = 'fixe', correction = 'BH')
  # write.table(model_pvalue_CNvsPS_without_interaction, paste( output_dir, 'PS_CN_fixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
  # print(xtable(model_pvalue_CNvsPS_without_interaction, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés without interaction'),  table.placement="H", size = "tiny", include.rownames = F, 
  #       add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe} \\\\ & coef & f2
  #                                                   &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue  \\\\  &  &  &  &  corr &  &  &  &  corr 
  #                                                   &  &  &  & corr  \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
  
  model_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets =  c('age_bl', 'sex', 'group', 'prevdemals_family_code'), ROIs = nom_ROIs_volume_TIV, model = 'fixe', correction = 'BH')
  write.table(model_pvalue_CNvsPS_without_interaction, paste( output_dir, 'PS_CN_fixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
  print(xtable(model_pvalue_CNvsPS_without_interaction, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés without interaction'),  table.placement="H", size = "tiny", include.rownames = F, 
        add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe} \\\\ & coef & f2
                                                    &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue  \\\\  &  &  &  &  corr &  &  &  &  corr 
                                                    &  &  &  & corr  \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
  
  ##############
  #########  With random effect center
  ##############
  model_mixed_pvalue_CNvsPS <- general_linear_model_with_interaction(data = data_analyses_CN_PS, effets = c('age_bl', 'sex', 'group', 'age_bl:group'), ROIs = nom_ROIs_volume_TIV, model = 'mixed', correction = 'BH')
  write.table(model_mixed_pvalue_CNvsPS, paste( output_dir, 'mixed_modeles_CNvsPS.csv', sep='/'), sep = ';', row.names = F)
  print(xtable(model_mixed_pvalue_CNvsPS, caption = 'Pvalue des effets sur les modèles linéaires mixtes pour les groupes CN vs PS pour les volumes normalisés'),  table.placement="H", size = "tiny", include.rownames = F,
        add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{3}{c}{age} & \\multicolumn{3}{c}{sexe} & \\multicolumn{3}{c}{groupe}  & \\multicolumn{3}{c}{age:groupe} \\\\ & coef &  pvalue & pvalue & coef & pvalue & pvalue & coef &  pvalue & pvalue & coef &  pvalue & pvalue  \\\\  &  &  &  corr &  &  &  corr &  &  & corr &  &  &  corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
  
  model_mixed_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets =  c('age_bl', 'sex', 'group'), ROIs = nom_ROIs_volume, model = 'mixed', correction = 'BH')
  write.table(model_mixed_pvalue_CNvsPS_without_interaction, paste( output_dir, 'PS_CN_mixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
  print(xtable(model_mixed_pvalue_CNvsPS_without_interaction, caption = 'Pvalue des effets sur les modèles linéaires mixtes pour les groupes CN vs PS pour les volumes normalisés without interaction'),  table.placement="H", size = "tiny", include.rownames = F,
        add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{3}{c}{age} & \\multicolumn{3}{c}{sexe} & \\multicolumn{3}{c}{groupe} \\\\ & coef &  pvalue & pvalue & coef & pvalue & pvalue & coef &  pvalue & pvalue  \\\\  &  &  &  corr &  &  &  corr &  &  & corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
  
}

input_tsv = "/Users/junhao.wen 1/Dropbox/2019-YueLin-SMCH_SCD/tsv/SMHC_sCN_vs_pCN_14_09_2019_demography.tsv"
output_dir = "/Users/junhao.wen 1/Dropbox/2019-YueLin-SMCH_SCD/results"
roi_start = "lh_lateraloccipital_volume"
roi_end = "Right.UnsegmentedWhiteMatter"
source_dir = "/Users/junhao.wen 1/Hao/Project/NeuroStatisticR/R"
t1_stats_pipeline(source_dir, input_tsv, output_dir, roi_start, roi_end)
anbai106/NeuroStatisticR documentation built on May 27, 2020, 2:20 a.m.