R/Permutation_effect_size.R

Defines functions permutation_effect_size_pipeline

#' This is a pipeline to significantly compare the difference of effect size using cohen's f2. The permutation tests with 10 000 iterations was used for significant ROIs.
#' @param input_tsv1 the tsv file containing all the ROI measurement. The file should strictly contian these columns: "participant_id	session_id	group	age_bl	prevdemals_family_code	sex", then following each column for each ROI
#' @param input_tsv2 the tsv file containing all the ROI measurement. The file should strictly contian these columns: "participant_id	session_id	group	age_bl	prevdemals_family_code	sex", then following each column for each ROI
#' @param output_dir the folder containing all the outputs
#' @param roi_start the first ROI name
#' @param roi_end the last ROI name
#' @param sep the separator for the tsv file, by default is '\t'
#' @param iteration the number of iterations for the permutation test.
#' 
#' @return the table containing the results and also the png image for the meta-analysis of the data.
#' @export
#' @examples
#' permutation_effect_size_pipeline(input_tsv1, input_tsv2, output_dir, roi_start, roi_end, sep='\t', colour_groupe=c("blue", "darkorange"), iteration=10000)

permutation_effect_size_pipeline <- function(input_tsv1, input_tsv2, output_dir, roi_start, roi_end, sep='\t', colour_groupe=c("blue", "darkorange"), iteration=10000){
  
  library(ggplot2)
  library(grid)
  library(gridExtra)
  library(reshape2)
  library(xtable)
  library(parallel)
  library(lme4)
  library(MuMIn)
  library(plyr)
  
  ### Checkout the result folder, if not exist, creat it
  dir.create(output_dir, showWarnings = FALSE)
  
  # Thème GGplot
  theme_set(theme_grey())
  
  options(xtable.comment = FALSE)
  
  ### Source the file in order to call functions from other r scripts
  source("/Users/junhao.wen\ 1/Hao/Project/Prevdemals_stat_visualize/statistics/statistics_with_R/NODDI/Compare_DTI_NODDI/ICM_population/permutation_Bootstrap_ES/function.R")
  
  ###### FWF
  data_prepres1 <- data_prep(input_tsv1, roi_start = roi_start, roi_end = roi_end, sep = sep)
  data_analyse1 <- data_prepres1[[1]]  
  nom_ROIs_volume <- data_prepres1[[2]] 
  data_analyse_PS_CN_1 <- data_analyse1[which(data_analyse1$group %in% c('CN', 'PS')), ]

    ####### Volumetry
  data_prepres2 <- data_prep(input_tsv2, roi_start = roi_start, roi_end = roi_end, sep = sep)
  data_prepres2 <- data_prepres2[[1]]  
  data_analyse_PS_CN_2 <- data_prepres2[which(data_prepres2$group %in% c('CN', 'PS')), ]

  nom_ROIs_volume <- nom_ROIs_volume[which(regexpr('_Mean', nom_ROIs_volume) != (-1))]
  rm(list = c('data_prepres1', 'data_prepres2'))
  
  ##################
  # Permutation test
  ##################
  
  system.time(results_permutation <- do.call('rbind', mclapply(1:iteration, function(b){
    do.call('cbind', lapply(nom_ROIs_volume, function(roi){
       model_FWF <- lmer(as.formula(paste(roi, ' ~ age_bl + sex + group_perm + (1|prevdemals_family_code)', sep = '')), data = transform(data_analyse_PS_CN_1, group_perm = data_analyse_PS_CN_1$group[sample(1:length(data_analyse_PS_CN_1$group), replace = F)]))
       model_Volumetry <- lmer(as.formula(paste(roi, ' ~ age_bl + sex + group_perm + (1|prevdemals_family_code)', sep = '')), data = transform(data_analyse_PS_CN_2, group_perm = data_analyse_PS_CN_2$group[sample(1:length(data_analyse_PS_CN_2$group), replace = F)]))

       return(setNames(data.frame(FWF = (r.squaredGLMM(model_FWF)[1]-r.squaredGLMM(update(model_FWF, .~. - group))[1])/(1- r.squaredGLMM(model_FWF)[1]),
                                  Volumetry = (r.squaredGLMM(model_Volumetry)[1]-r.squaredGLMM(update(model_Volumetry, .~. - group))[1])/(1- r.squaredGLMM(model_Volumetry)[1])),

                       c(paste('FWF', roi, sep = '_'), paste('Volumetry', roi, sep = '_'))))
    }))
  }, mc.cores = 4)))# 88.013 sec

  write.table(results_permutation, paste(output_dir, 'results_permutation_sample.csv', sep = '/'), sep = ';', row.names = F)

  result_ES_real <- do.call('cbind', mclapply(nom_ROIs_volume, function(roi){
    model_FWF <- lmer(as.formula(paste(roi, ' ~ age_bl + sex + group + (1|prevdemals_family_code)', sep = '')), data = data_analyse_PS_CN_1)
    model_Volumetry <- lmer(as.formula(paste(roi, ' ~ age_bl + sex + group + (1|prevdemals_family_code)', sep = '')), data = data_analyse_PS_CN_2)

    return(setNames(data.frame(FWF = (r.squaredGLMM(model_FWF)[1]-r.squaredGLMM(update(model_FWF, .~. - group))[1])/(1- r.squaredGLMM(model_FWF)[1]),
                               Volumetry = (r.squaredGLMM(model_Volumetry)[1]-r.squaredGLMM(update(model_Volumetry, .~. - group))[1])/(1- r.squaredGLMM(model_Volumetry)[1])),

                    c(paste('FWF', roi, sep = '_'), paste('Volumetry', roi, sep = '_'))))
  
    }, mc.cores = 4))
  
  write.table(result_ES_real, paste(output_dir, 'results_real_sample.csv', sep = '/'), sep = ';', row.names = F)
  
  ####Compute the p-values between the cohen's f2 for two measurements across ROI
  nom_ROIs_volume <- c("lh.caudalmiddlefrontal_Mean",  "lh.cuneus_Mean",
    "lh.fusiform_Mean","lh.inferiorparietal_Mean", "lh.inferiortemporal_Mean", "lh.insula_Mean",
      "lh.lateraloccipital_Mean", "lh.lingual_Mean", "lh.parsopercularis_Mean", "lh.pericalcarine_Mean",
      "lh.postcentral_Mean" , "lh.precuneus_Mean", "lh.rostralmiddlefrontal_Mean", "lh.superiorparietal_Mean",
       "lh.supramarginal_Mean", "lh.temporalpole_Mean", "rh.fusiform_Mean", "rh.inferiortemporal_Mean",
        "rh.medialorbitofrontal_Mean", "rh.precuneus_Mean", "rh.superiorparietal_Mean")

  ## get the diff for each ROI
  results_permutation1 <- results_permutation[c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41)]
  results_permutation2 <- results_permutation[c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42)]
  results_permutation_diff <- results_permutation1 - results_permutation2

  result_ES_real1 <- result_ES_real[c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41)]
  result_ES_real2 <- result_ES_real[c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42)]
  result_ES_real_diff <- result_ES_real1 - result_ES_real2

  results_p_value <- do.call('rbind', mclapply(nom_ROIs_volume, function(roi){
    new_roi <- paste('FWF_', roi, sep = '')
    return(data.frame(roi = roi,
                      pvalue = sum(abs(results_permutation_diff[, new_roi]) > abs(result_ES_real_diff[, new_roi]))/(10000+1)))
  }, mc.cores = 4))
  
  write.table(results_p_value, paste(output_dir, 'results_pvalue.csv', sep = '/'), sep = ';', row.names = F)
  
  }
anbai106/NeuroStatisticR documentation built on May 27, 2020, 2:20 a.m.