R/merged_ewce.R

Defines functions merged_ewce

Documented in merged_ewce

#' Multiple EWCE results from multiple studies
#'
#' \code{merged_ewce} combines enrichment results from multiple studies targetting the same scientific problem
#'
#' @param results a list of EWCE results generated using \code{\link{add.res.to.merging.list}}
#' @param reps Number of random gene lists to generate (default=100 but should be over 10000 for publication quality results)
#' @return dataframe in which each row gives the statistics (p-value, fold change and number of standard deviations from the mean) associated with the enrichment of the stated cell type in the gene list
#' @examples
#' # Load the single cell data
#' data(celltype_data)
#'
#' # Set the parameters for the analysis
#' reps=100 # <- Use 100 bootstrap lists so it runs quickly, for publishable analysis use >10000
#' subCellStatus=0 # <- Use subcell level annotations (i.e. Interneuron type 3)
#'
#' # Load the gene list and get human orthologs
#' data("example_genelist")
#' data("mouse_to_human_homologs")
#' m2h = unique(mouse_to_human_homologs[,c("HGNC.symbol","MGI.symbol")])
#' mouse.hits = unique(m2h[m2h$HGNC.symbol %in% example_genelist,"MGI.symbol"])
#' mouse.bg  = unique(setdiff(m2h$MGI.symbol,mouse.hits))
#'
#' # Bootstrap significance testing, without controlling for transcript length and GC content
#' full_results = bootstrap.enrichment.test(sct_data=celltype_data,mouse.hits=mouse.hits,
#'      mouse.bg=mouse.bg,reps=reps,sub=subCellStatus)
#'
#' # Generate the plot
#' print(ewce.plot(full_results$results,mtc_method="BH"))
#' @export
# @import ggplot2
# @importFrom reshape2 melt
# @importFrom grid unit
# @import plyr
merged_ewce <- function(results,reps=100){
    # Check arguments are valid
    if(length(results)<=1){stop("ERROR: results list is not valid. Use 'add.res.to.merging.list' to generate valid list.")}

    # If results are directional then seperate directions and call function recursively
    if(!is.null(results[[1]]$Direction)){
        for(dir in c("Up","Down")){
            found=0
            for(i in 1:length(results)){
                if(results[[i]]$Direction==dir){
                    found=found+1
                    if(found==1){
                        dir_results = list(results[[i]])
                    }else{
                        dir_results[[length(dir_results)+1]] <- results[[i]]
                    }
                    dir_results[[length(dir_results)]]$Direction=NULL
                }
            }
            if(dir=="Up"){
                merged_res = cbind(merged_ewce(dir_results,reps=reps),Direction=dir)
            }else{
                tmp = cbind(merged_ewce(dir_results,reps=reps),Direction=dir)
                merged_res = rbind(merged_res,tmp)
            }
        }
        return(merged_res)
    }else{
    # If the results are NOT directional
        # First, sum the celltype enrichment values for hit genes in each study
        hit.cells = results[[1]]$hitCells
        for(i in 2:length(results)){
            hit.cells = hit.cells + results[[i]]$hitCells
        }
        # Then:
        # - results[[x]]$bootstrap_data has the enrichment values for the each of the 10000 original reps
        # - Add these to each other 'reps' times
        count=0
        for(i in 1:reps){
            randomSamples = sample(1:dim(results[[1]]$bootstrap_data)[1],10000,replace=TRUE)
            boot.cells = results[[1]]$bootstrap_data[randomSamples,]
            for(i in 2:length(results)){
                randomSamples = sample(1:dim(results[[1]]$bootstrap_data)[1],10000,replace=TRUE)
                boot.cells = boot.cells + results[[i]]$bootstrap_data[randomSamples,]
            }
            count=count+1
            if(count==1){
                all.boot.cells = boot.cells
            }else{
                all.boot.cells = rbind(all.boot.cells,boot.cells)
            }
        }
        boot.cells=all.boot.cells
        p = fc = sd_from_mean = rep(0,length(hit.cells))
        names(p) = names(fc) = names(results[[1]]$hitCells)
        for(i in 1:length(hit.cells)){
            p[i]  = sum(hit.cells[i]<boot.cells[,i])/length(boot.cells[,i])
            fc[i] = hit.cells[i]/mean(boot.cells[,i])
            sd_from_mean[i] = (hit.cells[i]-mean(boot.cells[,i]))/sd(boot.cells[,i])
        }
        return(data.frame(CellType=names(p),p=p,fc=fc,sd_from_mean=sd_from_mean))
    }
}

Try the EWCE package in your browser

Any scripts or data that you put into this service are public.

EWCE documentation built on May 31, 2017, 3:16 p.m.