R/ACSN_Enrichment.R

#!/usr/bin/env/R
####### ACSN enrichment analysis #######
####### Single sample analysis / single cohort analysis #######

#' Gene set enrichment analysis
#' 
#' Compute and represent gene set enrichment from your data based on pre-saved maps from ACSN or user imported maps. 
#' The gene set enrichment can be run with hypergeometric test or Fisher exact test, 
#' and can use multiple corrections. Visualization of data can be done either by barplots or heatmaps.
#' 
#' @param Genes Character vector of genes that should be tested for enrichment
#' @param maps list of maps generated by format_from_gmt. Names of element of list will be used to track modules.
#' Default: tests on the master map.
#' @param correction_multitest either FALSE, "bonferroni", "holm", "hochberg", "hommel", "BH", "fdr" (identical to BH), or "BY"
#' @param statistical_test one of "fisher", "hypergeom"
#' @param min_module_size will remove from the analysis all modules which are (strictly) smaller than threshold
#' @param universe Universe on which the statistical analysis should be performed. 
#' Can be either "HUGO","ACSN","map_defined", or a character vector of genes.
#' @param Remove_from_universe Default is NULL. A list of genes that should not be considered for enrichment 
#' (will be removed from input, maps, and universe). The size of universe and map will be updated after removal. 
#' @param threshold maximal p-value (corrected if correction is enabled) that will be displayed
#' @param alternative One of "greater", "less", "both" or "two.sided"
#' Greater will check for enrichment, less will check for depletion, and both will look for 
#' both and will keep track of the side,
#' while two-sided (only for fisher test) checks if there is a difference.
#' @return Output is a dataframe with the following columns:\describe{
#'  \item{module}{The name of the map or the module preceded by the map}
#'  \item{module_size}{The number of genes in the module after taking into account universe reduction}
#'  \item{nb_genes_in_module}{The number of genes from input list in the module}
#'  \item{genes_in_module}{Names of the genes from input list in the module, space separated}
#'  \item{universe_size}{size of the input universe}
#'  \item{nb_genes_in_universe}{number of genes from the input list that are found in the universe}
#'  \item{test}{the kind of test that was looked for. "greater" when enrichment is tested, "less" when depletion is tested, or "two.sided"}
#' }
#' @examples enrichment(genes_test,min_module_size = 10, 
#'    threshold = 0.05,
#'    maps = list(cellcycle = ACSNMineR::ACSN_maps$CellCycle),
#'    universe = "ACSN")
#' @export
#' @importFrom stats fisher.test p.adjust phyper
#' @importFrom utils read.csv
enrichment<-function(Genes=NULL,
                     maps = c("Apoptosis",
                              "CellCycle",
                              "DNA_repair",
                              "EMT_motility",
                              "Survival" ), 
                     correction_multitest = "BH",
                     statistical_test = "fisher",
                     min_module_size = 5,
                     universe = "map_defined",
                     Remove_from_universe = NULL,
                     threshold = 0.05,
                     alternative = "greater"){
  
  if(statistical_test=="hypergeom" & (alternative != "greater")){
    warning("Depletion with hypergeometric test can be unreliable. Prefer fisher test.")
  }
  ### Checking maps
  if(is.data.frame(maps) | is.matrix(maps)){
    maps<-list(maps)
  }
  else if(!is.list(maps)){
    if(is.character(maps)){
      map_names<-names(ACSNMineR::ACSN_maps)
      w<-numeric()
      for(map_index in maps){
        w<-c(w,which(grepl(pattern = map_index, x = map_names, ignore.case = TRUE)))
      }
      w<- unique(w)
	#print(w)
      if(length(w)>1){
        maps<-ACSNMineR::ACSN_maps[w]
	  mapnames<-names(ACSNMineR::ACSN_maps)[w]
	
	  names(maps)<-mapnames
      }
      else if(length(w)){
        maps<-list(ACSNMineR::ACSN_maps[[w]])
        names(maps)<-map_names[w]
      }
      else{
        stop("The name of the maps you entered are unknown")
        
      }
    }
    else{
        stop("maps should be a dataframe or a list of dataframes. Exiting")
    }
  }
  
  if(universe[1] == "ACSN"){
    universe_was_ACSN<-TRUE
  }
  else{
    universe_was_ACSN<-FALSE
  }
  

  ### Checking that gene list is unique
  Genes<-unique(Genes)
  if(!is.null(Remove_from_universe)){
    Remove_from_universe<-unique(Remove_from_universe)
    test<-Genes %in% Remove_from_universe
    if(sum(test)){
      warning(paste("The following genes were found in input and asked to be removed from universe:\n",
                    paste(Genes[test],collapse = " "))
      )
      Genes<-Genes[!test]
    }
    
  }
  Genes_size<-length(Genes)    
  if(!(alternative %in% c("greater","less","both","two.sided"))){
    stop('enrichment variable should be one of:"greater","less","both","two.sided" ')
  }
  ### Checking that parameters are correct
  if(sum(statistical_test %in% c("fisher", "hypergeom"))==0){
    stop("statistical_test should be one of 'fisher' or 'hypergeom'")
  }
  if(alternative == "two.sided" & statistical_test != "fisher"){
    warning("two-sided test only relevant for fisher test. \n Will perform both enrichment and under-representation tests.")
    alternative<-"both"
  }
  result<-data.frame()
  
  ### If universe is ACSN: extract genes from ACSN and define it as universe
  if(length(universe) == 1){
    if(universe == "ACSN"){
      if(is.null(Remove_from_universe)){
        genesACSN<-unique(unlist(lapply(X = ACSNMineR::ACSN_maps,FUN = function(z){
          return(as.character(unique(z[,-(1:2)])))
        })))
        universe<-genesACSN[genesACSN!=""]
        size<-length(genesACSN)
      }
      else{ # Filter out genes to be removed from analysis
        genesACSN<- genesACSN<-unique(unlist(lapply(X = ACSNMineR::ACSN_maps,FUN = function(z){
          return(as.character(unique(z[,-(1:2)])))
        })))
        universe<-genesACSN[!(genesACSN %in% c("",Remove_from_universe))]
        size<-length(genesACSN)
      }
    }
    else if(universe == "HUGO"){
      ###Total size of approved symbols, from http://www.genenames.org/cgi-bin/statistics, as of October 8th 2015
      if(is.null(Remove_from_universe)){
        size = 39480
      }
      else{ ## Filter out Remove_from_universe
        size = 39480 - length(Remove_from_universe)
        if(is.list(maps)){
          maps<-lapply(maps,FUN = function(df){
            df[df %in% Remove_from_universe]<-""
          })
        }
        else{
          maps[maps %in% Remove_from_universe]<-""
        }
      }
    }
    
    else if( universe == "map_defined"){
      genesmap<-character()
      iterator<-0
      if(is.list(maps)){
        genesmap<-unique(as.character(unlist(lapply(X = maps,FUN = function(z){
          return(as.character(z[,-(1:2)]))
        }))))
      }
      else{
        genesmaps<-unique(as.character(maps[,-(1:2)]))
      }
      genesmap<-as.character(genesmap[genesmap!=""])
      
      if(!is.null(Remove_from_universe)){
        genesmap<-genesmap[!(genesmap %in% Remove_from_universe)]
      }
      is_in_ACSN<-Genes %in% genesmap
      S<-sum(!is_in_ACSN)
      if(S > 0){ ### removing genes from list, that are not from ACSN
        warning(paste(S,"genes are not in ACSN modules and will be excluded from analysis"))
      }
      Genes<-Genes[is_in_ACSN]
      size <- length(genesmap)
      universe<-genesmap
      
    }
    else{
      stop("Invalid universe input: must be 'HUGO','ACSN','map_defined', or a list of genes as character vector")
    }
    
    ### Length of universe can be changed if "ACSN"
    
  }
  if(length(universe)> 1){
    ### Change maps so that they only have gene names from universe and remove modules which are too small
    i<-0
    genesACSN<-character()
    iterator<-0
    for(lt in maps){
      iterator<-iterator+1
      ### extract modules of size >= module_size and get size of ACSN reduced by universe and module size
      validmap<-data.frame()
      
      if(dim(lt)[1]>1){ ### map is a dataframe
        for(k in 1:dim(lt)[1]){
          spare_genes<-as.character(lt[k,-c(1,2)])
          spare_genes[!(spare_genes %in% universe)]<- ""
          test<-spare_genes != ""
          S<-sum(test)
          if( S>= min_module_size){
            genesACSN<-unique(c(genesACSN,unique(spare_genes[test])))
            validmap<-rbind(validmap,cbind(lt[k,1],S,t(spare_genes)))          
          }
        }
      }
      else{ #map is a single line
        if(length(lt)>2){
          spare_genes<-as.character(sapply(3:length(lt),FUN = function(z) as.character(lt[[z]])))
          test<-spare_genes != ""
          S<-sum(test)
          if( S>= min_module_size){
            genesACSN<-unique(c(genesACSN,unique(spare_genes[test])))
            validmap<-rbind(validmap,cbind(lt[1],S,t(spare_genes)))
            validmap[,2]<-as.integer(as.character(validmap[,2]))
          }
        }
        else{
          stop("Empty map, exiting")
        }
      }
      if(!universe_was_ACSN){
        size <- length(genesACSN)
      }
      maps[[iterator]]<-validmap
    }
    is_in_ACSN<-Genes %in% genesACSN
    S<-sum(!is_in_ACSN)
    if(S > 0){ ### removing genes from list, that are not from ACSN
      warning(paste(S,"genes are not in universe and will be excluded from analysis"))
    }
    Genes<-Genes[is_in_ACSN]
    
  }
  if(length(Genes)==0){
    stop("No genes in universe.")
    result<-data.frame()
  }
  ##################################
  #### Begin calculation of p-values
  ##################################
  
  
  ### get from list how many are in each sub-compartment
  result<-data.frame()
  
  ### what would be the expected?
  tracker<-0
  map_names<-names(maps)
  if(is.null(map_names)){
    if(length(maps)>1){
      message("Maps should be defined in this way: maps<-list(map1 = ... , map2 = ...)")
      message("In the absence of names, will use letters")
    }
    map_names<-paste("map",1:length(maps),sep = "_")
  }
  for(map in maps){
    tracker <- tracker + 1
    if(dim(map)[1]==0){
      message(paste("Map",map_names[tracker],"has no modules left from restriction to universe"))
    }
    else{
      
      modules<-map[,1:2]
      
      ### Calculate p-value for map as a whole if map is not ACSN_master
      ###########################
      ### SHOULD WE COMPUTE P-VAL FOR WHOLE MAP?
      ###########################
      if(universe_was_ACSN & map_names[tracker]!="ACSN_master"){
        compute_module_p.value<-TRUE
      }
      else if(!is.data.frame(maps) & length(maps)>1 & map_names[tracker]!="ACSN_master"){
        compute_module_p.value<-TRUE
      }
      else{
        compute_module_p.value<-FALSE
      }
      
      ########################
      # BEGIN MAP TESTING
      ########################
      if( compute_module_p.value ){ ###
        mapgenes<-unique(as.character(unlist(map[,-c(1:2)])))
        
        mapsize<-length(mapgenes)
        genes_in_mapgenes<-(Genes %in% mapgenes)      
        Ngenes_in_mapgenes<-sum(genes_in_mapgenes)
        
        ########################
        # BEGIN FISHER TESTING
        ########################
        if(statistical_test == "fisher"){
          if(alternative != "both"){        
            p.values<-c(p.val.calc(Ngenes_in_mapgenes,
                                   mapsize-Ngenes_in_mapgenes,
                                   length(Genes),
                                   size - length(Genes),
                                   "fisher",
                                   alternative
            ),
            alternative)
            
          }
          else{ ### if both computations
            p.values<-cbind(c(p.val.calc(Ngenes_in_mapgenes,
                                         mapsize-Ngenes_in_mapgenes,
                                         length(Genes),
                                         size - length(Genes),
                                         "fisher",
                                         "greater"
            ),
            "enrichment"),
            c(p.val.calc(Ngenes_in_mapgenes,
                         mapsize-Ngenes_in_mapgenes,
                         Genes_size,
                         size - length(Genes),
                         "fisher",
                         "less"
            ),
            "depletion")
            )
          }
        }
        ########################
        # BEGIN HYPERGEOM TESTING
        ########################
        else{ ### Test is != from fisher
          ### Ngcalc: Ngenes corrected or not for P[X<=k] (default for phyper, needs to be P[X<k] for p-value)
          if(alternative == "less"){
            Ngcalc<-Ngenes_in_mapgenes
          }
          else{
            if(Ngenes_in_mapgenes>0){
              Ngcalc<-Ngenes_in_mapgenes-1
            }
            else{
              Ngcalc<-0
            }
          }
          if(alternative == "greater" & Ngenes_in_mapgenes==0){ ### You cannot have enrichment if you have 0 genes in module
            p.values<-c(1,alternative) 
          }
          else if(alternative != "both"){ # alternative is (less) or (greater & Ngenes>0)        
            
            p.values<-c(p.val.calc( Ngcalc,
                                    length(Genes),
                                    size - length(Genes),
                                    mapsize,
                                    "hypergeom",
                                    alternative
            ),
            alternative
            ) 
          }
          else{ ### Testing both hypotheses
            if(Ngenes_in_mapgenes>0){
              p.values<-cbind(c(p.val.calc( Ngcalc,
                                            length(Genes),
                                            size - length(Genes),
                                            mapsize,
                                            "hypergeom",
                                            'greater'
              ),
              'enrichment'),
              c(p.val.calc( Ngenes_in_mapgenes, ### for not using Ngcalc see def of Ngcalc
                            length(Genes),
                            size - length(Genes),
                            mapsize,
                            "hypergeom",
                            'less'
              ),
              'depletion'
              )
              )
              print(p.values)
            }
            else{ ### Both alternatives and Ngenes is 0
              p.values<-cbind(c(1,'greater'),
                              c(p.val.calc( Ngenes_in_mapgenes, ### for not using Ngcalc see def of Ngcalc
                                            length(Genes),
                                            size - length(Genes),
                                            mapsize,
                                            "hypergeom",
                                            'less'
                              ),
                              'less'
                              )
              )
              print(p.values)
              
            }
          }
        }
        
        spare<-cbind(map_names[tracker],
                     mapsize,
                     paste(Genes[genes_in_mapgenes],collapse = " "),
                     Ngenes_in_mapgenes,
                     t(p.values))
        colnames(spare)<-c("module","module_size","genes_in_module",
                           "nb_genes_in_module","p.value","test")
        
        
        result<-rbind(result,spare)
        result$module_size<-as.integer(as.character(result$module_size))
      } #### End of module p-value
      
      if(length(maps)>1){
        modules[,1]<-paste(map_names[tracker],modules[,1],sep=":")
        map[,1]<-modules[,1]
      }
      ##########################
      #### BEGIN MODULE TESTING
      #########################
      ### BEGIN FISHER TEST###
      #########################
      if(statistical_test == "fisher"){
        p.values<-apply(map,MARGIN = 1, FUN = function(z){
          short_z<-z[z!=""][-c(1,2)] ### remove empty slots, module name and length
          Module_size<-cnum(z[2])
          test<-Genes %in% short_z
          Genes_in_module<-sum(test)
          Gene_set<-paste(Genes[test],collapse = " ")
          
          if(alternative != "both"){
            return(t(c(Gene_set,Genes_in_module,p.val.calc(x = Genes_in_module,
                                                           y = Module_size-Genes_in_module,
                                                           z = length(Genes),
                                                           a = size - length(Genes),
                                                           stat_test = "fisher",
                                                           alt = alternative),alternative))
            )
          }
          else{ ### test for both greater and less
            return(cbind(t(c(z[1],z[2],Gene_set,Genes_in_module,p.val.calc(x = Genes_in_module,
                                                                           y = Module_size-Genes_in_module,
                                                                           z = length(Genes),
                                                                           a = size - length(Genes),
                                                                           stat_test = "fisher",
                                                                           alt = "greater"),"enrichment")),
                         t(c(z[1],z[2],Gene_set,Genes_in_module,p.val.calc(Genes_in_module,
                                                                           Module_size-Genes_in_module,
                                                                           length(Genes),
                                                                           size - length(Genes),
                                                                           stat_test = "fisher",
                                                                           "less"),"depletion"))))
          }
          
        }
        )
        #########################
        ### BEGIN HYPERGEOM TEST#
        #########################
      }else if(statistical_test == "hypergeom"){
        p.values<-apply(map,MARGIN = 1, FUN = function(z){
          short_z<-z[z!=""][-c(1,2)] ### remove empty slots, module name and length
          Module_size<-cnum(z[2])
          test<-Genes %in% short_z
          Genes_in_module<-sum(test)
          Gene_set<-paste(Genes[test],collapse = " ")
          if(alternative=="greater"){
            if(Genes_in_module > 0){ ### Correction for depletion: phyper tests for P[X<=x]
              Genes_in_module_calc<-Genes_in_module-1 
            }
            else{ ### Return p-value of 1 (you cannot have enrichment with 0 genes!)
              return(c(Gene_set,Genes_in_module,1,alternative))
            }
          }
          else{
            Genes_in_module_calc<-Genes_in_module
          }
          if(alternative != "both"){ #### Either return less or greater&Ngenes>0
            return(c(Gene_set,Genes_in_module,p.val.calc(x = Genes_in_module_calc,
                                                         y = length(Genes),
                                                         z =size - length(Genes),
                                                         a = Module_size,
                                                         "hypergeom",
                                                         alternative),
                     alternative))
          }
          else{ #### p-value for both lower and higher tail
            if(Genes_in_module > 0){ ### Correction for depletion: phyper tests for P[X<=x]
              return(cbind(t(c(z[1],z[2],Gene_set,Genes_in_module,p.val.calc(x = Genes_in_module - 1,
                                                                             y = length(Genes),
                                                                             z = size - length(Genes),
                                                                             a = Module_size,
                                                                             "hypergeom",
                                                                             "greater"),
                               "enrichment")),
                           t(c(z[1],z[2],Gene_set,Genes_in_module,p.val.calc(x= Genes_in_module,
                                                                             y=length(Genes),
                                                                             z=size - length(Genes),
                                                                             a=Module_size,
                                                                             "hypergeom",
                                                                             "less"),
                               "depletion")))
              )
            }
            else{
              return(cbind(t(c(z[1],z[2],Gene_set,Genes_in_module,1,"enrichment")),
                           t(c(z[1],z[2],Gene_set,Genes_in_module,p.val.calc(x= Genes_in_module,
                                                                             y=length(Genes),
                                                                             z=size - length(Genes),
                                                                             a=Module_size,
                                                                             "hypergeom",
                                                                             "less"),
                               "depletion"))))
            }
          }
        }) ### end of phyper apply function
      } ### end of statistical test
    }
    ##################
    #END HYPERGEOM ###
    #BEGIN Data formating#
    ##################
    if(alternative!="both"){
      spare<-cbind(modules,t(p.values))
    }
    else{
      spare<-rbind(t(p.values)[,1:6],t(p.values)[,7:12])
    }
    colnames(spare)<-c("module","module_size","genes_in_module",
                       "nb_genes_in_module","p.value","test")
    result<-rbind(result,spare)
  } ### end of maps
  
  result$p.value<-cnum(result$p.value)
  result$universe_size<-size
  result$genes_in_universe<-Genes_size
  if(is.logical(correction_multitest)){
    result<-result[cnum(result$p.value) <= threshold,]
    if(correction_multitest){
      warning("If multiple correction is wanted, please use one of the listed parameters.")
    }
  }
  else{
    if(correction_multitest %in% c("bonferroni", "holm", "hochberg", "hommel", "BH", "fdr", "BY")){
      result$p.value.corrected<-p.adjust(cnum(result$p.value),method = correction_multitest)
      result<-result[result$p.value.corrected <= threshold,]
    }
    else{
      result<-result[result$p.value <= threshold,]
      warning('correction multitest should be one of the following: "bonferroni", "holm", "hochberg", "hommel", "BH", "fdr", "BY"')
    }
  }
  
  ###adding missing info
  if(dim(result)[1]==0){
    warning("No modules under threshold")
    return(NA)
  }
  result$universe_size<-size
  result$nb_genes_in_universe<-length(Genes)
  p.val.names<-colnames(result)[grepl(pattern = "p.value",x = colnames(result))]
  
  ### re-ordering
  result<-result[,c("module","module_size","nb_genes_in_module",
                    "genes_in_module","universe_size",
                    "nb_genes_in_universe",
                    p.val.names,
                    "test")]
  return(result)
}

####### Multiplesample analysis / multiple cohorts analysis #######

#' Automated gene set analysis for multiple sets
#' 
#' 
#' @param Genes_by_sample List of character vectors. Each list element name should be a sample name, 
#' and each character vector the set of genes to test for the sample.
#' @param maps list of maps generated by format_from_gmt. Default: tests on all acsn maps
#' @param correction_multitest either FALSE, "bonferroni", "holm", "hochberg", "hommel", "BH", "fdr" (identical to BH), or "BY"
#' @param statistical_test one of "fisher", "hypergeom"
#' @param min_module_size will remove from the analysis all modules which are (strictly) smaller than threshold
#' @param universe Universe on which the statistical analysis should be performed. Can be either "HUGO","ACSN" 
#' ,"map_defined", or a character vector of genes.
#' @param Remove_from_universe Default is NULL. A list of genes that should not be considered for enrichment 
#' (will be removed from input, maps, and universe). The size of universe and map will be updated after removal. 
#' @param threshold maximal p-value (corrected if correction is enabled) that will be displayed
#' @param cohort_threshold if TRUE modules will be kept in all samples if at least one sample 
#' has p-value lower than threshold, otherwise the threshold is applied for each sample independently.
#' @param alternative One of "greater", "less", "both", or "two.sided" (only for fisher test).
#' Greater will check for enrichment, less will check for depletion, and both will look for both.
#' @return Output is a list of dataframes with names the names given in `Genes_by_sample` with the following columns:\describe{
#'  \item{module}{The name of the map or the module preceded by the map}
#'  \item{module_size}{The number of genes in the module after taking into account universe reduction}
#'  \item{nb_genes_in_module}{The number of genes from input list in the module}
#'  \item{genes_in_module}{Names of the genes from input list in the module, space separated}
#'  \item{universe_size}{size of the input universe}
#'  \item{nb_genes_in_universe}{number of genes from the input list that are found in the universe}
#'  \item{test}{the kind of test that was looked for. "greater" when enrichment is tested, "less" when depletion is tested, or "two.sided"}
#' }
#' @examples multisample_enrichment(Genes_by_sample = list(set1 = genes_test,set2=c(genes_test,"PTPRD")),
#' maps = list(cellcycle = ACSNMineR::ACSN_maps$CellCycle),
#' min_module_size = 10,
#' universe = "ACSN",cohort_threshold = FALSE)
#' @export

multisample_enrichment<-function(Genes_by_sample=NULL,
                                 maps = c("Apoptosis",
                                          "CellCycle",
                                          "DNA_repair",
                                          "EMT_motility",
                                          "Survival" ), 
                                 correction_multitest = "BH",
                                 statistical_test = "fisher",
                                 min_module_size = 5,
                                 universe = "map_defined",
                                 Remove_from_universe = NULL,
                                 threshold = 0.05,
                                 cohort_threshold = TRUE,
                                 alternative = "greater"){
  ### TO-DO : restrict genes to universe to gain computation time
  
  if(cohort_threshold){
    result<-lapply(X = Genes_by_sample ,
                   FUN = function(z){
                     enrichment(Genes = z,
                                maps = maps,
                                correction_multitest = correction_multitest,
                                statistical_test = statistical_test,
                                min_module_size = min_module_size,
                                universe = universe,
                                Remove_from_universe =Remove_from_universe,
                                threshold =  1)
                   })
    kept_modules<-character() 
    for(sample in result){
      if(is.logical(correction_multitest)){ 
        kept_modules<-unique(c(kept_modules,sample[sample$p.value<=threshold,1]))
      }
      else{
        kept_modules<-unique(c(kept_modules,sample[sample$p.value.corrected<=threshold,1]))
      }
    }
    for(i in 1:length(result)){
      result[[i]]<-result[[i]][result[[i]][,1] %in% kept_modules,]
    }
  }
  else{
    result<-lapply(X = Genes_by_sample ,
                   FUN = function(z){
                     enrichment(Genes = z,
                                maps = maps,
                                correction_multitest = correction_multitest,
                                statistical_test = statistical_test,
                                min_module_size = min_module_size,
                                universe = universe,
                                Remove_from_universe = Remove_from_universe,
                                threshold =  threshold)
                   })
    names(result)<-names(Genes_by_sample)
  }
  return(result)
  
}
#' Calculate p-value
#' 
#' @param x : first value
#' @param y : second value
#' @param z : third value
#' @param a : fourth value
#' @param stat_test : statistical test to be used
#' @param alt : alternative: one of two-sided, greater, less or both
#' 
p.val.calc<-function(x,y,z,a,stat_test,alt){
  if(stat_test=="fisher"){
    M<-matrix(c(x,y,z,a),nrow = 2)
    FT<-fisher.test(M, alternative =alt)
    return(FT$p.value)
  }
  else{
    if(alt =="greater"){
      return(phyper(x, y,z,a,lower.tail = FALSE))
    }
    else{
      return(phyper(x, y,z,a,lower.tail = TRUE))
    }
    
  }
}


#' Convert to numeric
#' 
#' @param x A vector of numbers which is not in numeric format
cnum<-function(x){
  return(as.numeric(as.character(x)))
}

Try the ACSNMineR package in your browser

Any scripts or data that you put into this service are public.

ACSNMineR documentation built on May 1, 2019, 9:14 p.m.