R/presense_absense.R

Defines functions eset_presence_absence

Documented in eset_presence_absence

#' Presence/Absence Testing
#' 
#' A function for testing the significance in differences
#' of presence/absence patterns. 
#' 
#' The recommended way of using the test is two-group comparison. 
#' All except "binom" options will work for larger number of groups.
#' 
#' 
#' @param eset eset (or most likely eset subclass) object
#' @param grouping character defining the column in phenoData
#' @param test character: Fisher exact test, \eqn{\chi^{2}}{\chi^2}, G-test or binomial.
#' @param ... other aruments to pass to the test functions 
#'              (e.g. simulate.p.value = TRUE)
#' @return data.frame
#'      \describe{
#'          \item{\code{p.value}}{p-value}
#'          \item{\code{effect}}{difference between second and first group
#'                              (according to the factor levels).  
#'                              In case of larger number of levels, 
#'                              it is the absolute value of the max diffence}
#'          \item{\code{...}}{proportions of present for each group. 
#'                              Column names are level names.}
#'      }
#' @note see also \url{http://www.ncbi.nlm.nih.gov/pubmed/20831241}
#' @importFrom Biobase exprs pData featureNames
#' @export eset_presence_absence
#' 
#' @examples
#' library("MSnbase")
#' Nsam = 20
#' Npep = 5
#' M <- matrix(rbinom(Nsam*Npep, 1, 0.5), ncol=Nsam)
#' pd <- data.frame(group = gl(2, Nsam/2))
#' fd <- data.frame(otherfdata = letters[1:Npep])
#' x0 <- MSnSet(M, fd, pd)
#' eset_presence_absence(x0, 'group', test='fisher')
#' eset_presence_absence(x0, 'group', test='chisq', simulate.p.value=TRUE)
#' eset_presence_absence(x0, 'group', test='g')
#' eset_presence_absence(x0, 'group', test='binom')
#' 
eset_presence_absence <- function(eset, 
                                  grouping, 
                                  test=c('fisher','chisq','g','binom'), 
                                  ...){
    test <- match.arg(test)
    if(is.character(grouping) & length(grouping) == 1)
        grouping <- pData(eset)[[grouping]]
    stopifnot(is.factor(grouping))
    # presence count
    pc <- t(apply(exprs(eset), 1, tapply, INDEX=grouping, sum))
    total.pc <- colSums(pc)
    N <- table(grouping)
    if(test != 'binom'){
        TEST.FUN <- switch(test, 
                           fisher = fisher.test,
                           chisq = chisq.test,
                           g = Biostrings:::g.test)
        out <- apply(pc, 1, function(x){
            cm <- rbind(x, total.pc - x)
            res <- TEST.FUN(cm, ...)
            res$p.value})
    }else{
        if(nlevels(grouping) > 2)
            warning("binom option will test only the difference between first two factor levels")
        p <- pc[,1]/N[1]
        out <- sapply(seq_len(nrow(pc)), function(i){
            res <- binom.test(pc[i,2], N[2], p[i])
            res$p.value})
    }
    # now estimate the effect
    est <- sweep(pc, 2, N, '/')
    colnames(est) <- levels(grouping)
    if(nlevels(grouping) > 2)
        effect <- abs(Biobase::rowMax(est) - Biobase::rowMin(est))
    if(nlevels(grouping) == 2)
        effect <- est[,2] - est[,1]
    out <- data.frame(p.value=out, effect=effect, est)
    rownames(out) <- featureNames(eset)
    return(out)
}
vladpetyuk/vp.misc documentation built on June 25, 2021, 6:35 a.m.