NeuroStatisticR:

#' This is a pipeline to run GLM for group comparison based on the ROI measures for diffusion MRI
#' @param input_tsv the tsv file containing all the ROI measurement. The file should strictly contian these columns: "participant_id	session_id	group	age_bl	prevdemals_family_code	sex", then following each column for each ROI
#' @param output_dir the folder containing all the outputs
#' @param roi_start the first ROI name
#' @param roi_end the last ROI name
#' @param sep the separator for the tsv file, by default is '\t'
#' 
#' @return the table containing the results and also the png image for the meta-analysis of the data.
#' @export
#' @examples
#' t1_stats_pipeline(input_tsv, output_dir, roi_start, roi_end)
#' 


dti_stats_pipeline <- function(input_tsv, output_dir, roi_start, roi_end, sep='\t', groupPalette=c("blue","darkorange", "red"), groupPalette_CN_PS=c("blue", "darkorange")){
  
  ### Import des packages
  library(ggplot2)
  library(tidyr)
  library(car)
  library(MASS)
  library(reshape2)
  library(tableone)
  library(xtable)
  library(FactoMineR)
  library(parallel)
  library(xlsx)
  library(gridExtra)
  library(gtable)
  library(grid)
  library(lmtest)
  library(sandwich)
  library(dplyr)
  library(multcomp)
  library(lme4)
  library("factoextra")
  library(MuMIn)
  library(lsr)
  library(compute.es)
  library(factoextra)
  
  ### Checkout the result folder, if not exist, creat it
  dir.create(output_dir, showWarnings = FALSE)
  
  # Thème GGplot
  theme_set(theme_grey())
  
  options(xtable.comment = FALSE)
  
  ### Source the file in order to call functions from other r scripts
  source("/Users/junhao.wen\ 1/Hao/Project/Prevdemals_stat_visualize/statistics/statistics_with_R/DWI/DTI_utilities_function.R")
  
  ### Importation
  dataPrepres <- data_prep(input_tsv, roi_start, roi_end, sep)
  data_analyse <- dataPrepres[[1]] 
  nom_ROIs_volume <- dataPrepres[[2]]
  
  data_analyse_PS_CN <- data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ]
  write.table(data_analyse_PS_CN, paste(output_dir, 'preprocessed_data.tsv', sep='/'), sep = '\t', row.names = F)
  
  ##  boxplot_age_VS_gr_sex
  ## grid.arrange is to set up a gtable layout to place multiple grobs on a page.
  grid.arrange(
    ggplot(melt(data_analyse[, c('age_bl', 'participant_id', 'group', 'sex')], id.vars = c('participant_id', 'group', 'sex')),
           aes(x = group, y = value, fill = group)) + geom_boxplot() + facet_wrap(~variable, scales = 'free', ncol = 2) + ylab('') +
      scale_fill_manual(values=groupPalette),
    ggplot(melt(data_analyse[, c('age_bl', 'participant_id', 'group', 'sex')], id.vars = c('participant_id', 'group', 'sex')),
           aes(x = sex, y = value, fill = sex)) + geom_boxplot() + facet_wrap(~variable, scales = 'free', ncol = 2) + ylab(''),
    nrow = 2)
  ggsave(paste(output_dir,"boxplot_age_group_sex.png", sep='/'), width = 4, height = 4)

  # Histogram of data
  ggplot(melt(data_analyse[, c('participant_id', 'group', 'sex', nom_ROIs_volume)], id.vars = c('participant_id', 'group', 'sex')),
         aes(x = value)) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666") +
    facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes histgram')
  ggsave(paste(output_dir,"histogram.png", sep='/'), width = 4, height = 4)

  ## Histogram of data just for CN and PS 
  ggplot(melt((data_analyse[, c('participant_id', 'group', 'sex', nom_ROIs_volume)])[which(data_analyse$group %in% c('CN', 'PS')), ], id.vars = c('participant_id', 'group', 'sex')),
         aes(x = value)) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666") +
    facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes histgram')
  ggsave(paste(output_dir,"histogram_data_CN_PS.png", sep='/'), width = 4, height = 4)


  # ## Boxplot for group
  ggplot(melt(data_analyse[, c('participant_id', 'group', 'sex', nom_ROIs_volume)], id.vars = c('participant_id', 'group', 'sex')), aes(x = group, y = value, fill = group)) + geom_boxplot()  + facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes normalisés') + scale_fill_manual(values=groupPalette)
  ggsave(paste(output_dir,"boxplot_group.png", sep='/'), width = 4, height = 4)

  ## Boxplot for CN and PS
  ggplot(melt(data_analyse_PS_CN[, c('participant_id', 'group', 'sex', nom_ROIs_volume)], id.vars = c('participant_id', 'group', 'sex')), aes(x = group, y = value, fill = group)) + geom_boxplot()  + facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes normalisés') + scale_fill_manual(values=groupPalette_CN_PS)
  ggsave(paste(output_dir,"boxplot_group_cn_ps.png", sep='/'), width = 4, height = 4)

  # ## Boxplot for sex
  ggplot(melt(data_analyse[, c('participant_id', 'group', 'sex', nom_ROIs_volume)], id.vars = c('participant_id', 'group', 'sex')), aes(x = sex, y = value, fill = sex)) + geom_boxplot()  + facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes normalisés')
  ggsave(paste(output_dir,"boxplot_sex.png", sep='/'), width = 4, height = 4)

  # the sex diff beween groups
  ggplot(melt(data_analyse[, c('participant_id', 'group', 'sex', nom_ROIs_volume)], id.vars = c('participant_id', 'group', 'sex')), aes(x = group, y = value, fill = sex)) + geom_boxplot()  + facet_wrap(~variable, scales = 'free') + xlab('') + ggtitle('ROIs volumes normalisés')
  ggsave(paste(output_dir,"sex_diff_bt_groups.png", sep='/'), width = 4, height = 4)


  ##### Also just plot the CN and PS 
  ggplot(melt(data_analyse_PS_CN[, c('participant_id', 'group', 'sex', "age_bl", nom_ROIs_volume)], id.vars = c('participant_id', 'group', 'sex', "age_bl")),
         aes(x = age_bl, y = value, color=group)) + geom_point(size=1) +
    geom_smooth(data = melt(data_analyse_PS_CN[, c('participant_id', 'group', 'sex', "age_bl", nom_ROIs_volume)], id.vars = c('participant_id', 'group', 'sex', "age_bl")),
                aes(x = age_bl, y = value, color=group), method = "lm", se = FALSE) +
    facet_wrap(~variable, scales = 'free') + ylab('') + ggtitle('ROIs volumes normalisés') + scale_colour_manual(values=c("blue", "darkorange"))
  ggsave(paste(output_dir,"scatterplot_group_CN_PS.png", sep='/'), width = 4, height = 4)

  # ###################################################################
  # ####### Plot the iamges seperately 
  # ##################################################################
  #### 1 Anterior_thalamic_radiation_R_Mean
  Anterior_thalamic_radiation_R_Mean <- plot_regreesion_scatter(data_analyse_PS_CN, 'Anterior_thalamic_radiation_R_Mean', "right anterior thalamic radiation tract", xlab="age", ylab="FA", legend_postion="none")

  # ###################################################################
  # ####### Group comparison 
  # ##################################################################
  # 
  # ### Run the func for comparison
  compare_group_PS_CTRL_normalise <- multiple_compare_groupe(data = data_analyse[which(data_analyse$group %in% c('CN', 'PS')), ],
  vars = c("age_bl", "sex", nom_ROIs_volume))
  

  # # NA pour var sex age and TIV
  compare_group_PS_CTRL_normalise[which(compare_group_PS_CTRL_normalise$var %in% c('age_bl', 'sex = F')), c('pvalue', 'pvalue_multiple')] <- NA
  
  # # change colname pvalue pour cbind
  colnames(compare_group_PS_CTRL_normalise)[which(colnames(compare_group_PS_CTRL_normalise) %in% c('pvalue', 'pvalue_multiple'))] <-
    c('pvalue_PS_CTRL', 'pvalue_multiple_PS_CTRL')

  write.table(compare_group_PS_CTRL_normalise, paste(output_dir, 'group_comparison_CN_PS.csv', sep='/'), sep = ';', row.names = F)
  
  #############################################
  ########### General linear model
  #############################################
  
  ##########
  #### Mixed-effect linear model: Volume(j) =μ+—◊sexei +⁄◊agei +÷◊groupei +”◊agei :groupei +Zi +‘(j)
  ##########
  
  #######################
  ########### linear model for PS and CN
  #######################
  
  ##############
  #########  Without random 
  ##############
  model_pvalue_CNvsPS <- general_linear_model_with_interaction(data = data_analyse_PS_CN, effets = c('age_bl', 'sex', 'group', 'age_bl:group'), ROIs = nom_ROIs_volume, model = 'fixe', correction = 'BH')
  write.table(model_pvalue_CNvsPS, paste(output_dir, 'PS_CN_fixed.csv', sep='/'), sep = ';', row.names = F)
  print(xtable(model_pvalue_CNvsPS, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés'),  table.placement="H", size = "tiny", include.rownames = F,
        add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe}  & \\multicolumn{4}{c}{age:groupe} \\\\ & coef & f2
                                                    &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue  \\\\  &  &  &  &  corr &  &  &  &  corr
                                                    &  &  &  & corr &  &  &  &  corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')

  model_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets =  c('age_bl', 'sex', 'group'), ROIs = nom_ROIs_volume, model = 'fixe', correction = 'BH')
  write.table(model_pvalue_CNvsPS_without_interaction, paste(output_dir, 'PS_CN_fixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
  print(xtable(model_pvalue_CNvsPS_without_interaction, caption = 'Pvalue des effets sur les modèles linéaires pour les groupes CN vs PS pour les volumes normalisés without interaction'),  table.placement="H", size = "tiny", include.rownames = F,
        add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe} \\\\ & coef & f2
                                                    &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue  \\\\  &  &  &  &  corr &  &  &  &  corr
                                                    &  &  &  & corr  \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')


  
  ##############
  #########  With random effect 
  ##############
  model_mixed_pvalue_CNvsPS <- general_linear_model_with_interaction(data = data_analyse_PS_CN, effets = c('age_bl', 'sex', 'group', 'age_bl:group'), ROIs = nom_ROIs_volume, model = 'mixed', correction = 'BH')
  write.table(model_mixed_pvalue_CNvsPS, paste(output_dir, 'PS_CN_mixed.csv', sep='/'), sep = ';', row.names = F)
  print(xtable(model_mixed_pvalue_CNvsPS, caption = 'Pvalue des effets sur les modèles linéaires mixtes pour les groupes CN vs PS pour les volumes normalisés'),  table.placement="H", size = "tiny", include.rownames = F,
        add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{3}{c}{age} & \\multicolumn{3}{c}{sexe} & \\multicolumn{3}{c}{groupe}  & \\multicolumn{3}{c}{age:groupe} \\\\ & coef &  pvalue & pvalue & coef & pvalue & pvalue & coef &  pvalue & pvalue & coef &  pvalue & pvalue  \\\\  &  &  &  corr &  &  &  corr &  &  & corr &  &  &  corr \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')

  model_mixed_pvalue_CNvsPS_without_interaction <- general_linear_model_without_interaction(data = data_analyse_PS_CN, effets =  c('age_bl', 'sex', 'group'), ROIs = nom_ROIs_volume, model = 'mixed', correction = 'BH')
  write.table(model_mixed_pvalue_CNvsPS_without_interaction, paste(output_dir, 'PS_CN_mixed_without_interaction.csv', sep='/'), sep = ';', row.names = F)
  print(xtable(model_mixed_pvalue_CNvsPS_without_interaction, caption = 'Mixed effect generalized linear model without interaction'),  table.placement="H", size = "tiny", include.rownames = F,
        add.to.row=list(pos = list(0), command =  c(' & \\multicolumn{4}{c}{age} & \\multicolumn{4}{c}{sexe} & \\multicolumn{4}{c}{groupe} \\\\ & coef & f2
                                                    &  pvalue & pvalue & coef & f2 &  pvalue & pvalue & coef & f2 &  pvalue & pvalue  \\\\  &  &  &  &  corr &  &  &  &  corr
                                                    &  &  &  & corr  \\\\')), include.colnames=F, sanitize.text.function = force, scalebox='0.9')
}