R/MRtable.R

Defines functions MRtable

Documented in MRtable

#' Table of top microbial marker gene from linear model fit including sequence
#' information
#' 
#' Extract a table of the top-ranked features from a linear model fit. This
#' function will be updated soon to provide better flexibility similar to
#' limma's topTable. This function differs from \code{link{MRcoefs}} in that it
#' provides other information about the presence or absence of features to help
#' ensure significant features called are moderately present.
#' 
#' 
#' @param obj Output of fitFeatureModel or fitZig.
#' @param by Column number or column name specifying which coefficient or
#' contrast of the linear model is of interest.
#' @param coef Column number(s) or column name(s) specifying which coefficient
#' or contrast of the linear model to display.
#' @param number The number of bacterial features to pick out.
#' @param taxa Taxa list.
#' @param uniqueNames Number the various taxa.
#' @param adjustMethod Method to adjust p-values by. Default is "FDR". Options
#' include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr",
#' "none". See \code{\link{p.adjust}} for more details.
#' @param group One of five choices, 0,1,2,3,4. 0: the sort is ordered by a
#' decreasing absolute value coefficient fit. 1: the sort is ordered by the raw
#' coefficient fit in decreasing order. 2: the sort is ordered by the raw
#' coefficient fit in increasing order. 3: the sort is ordered by the p-value
#' of the coefficient fit in increasing order. 4: no sorting.
#' @param eff Filter features to have at least a "eff" quantile or number of effective samples.
#' @param numberEff Boolean, whether eff should represent quantile (default/FALSE) or number.
#' @param ncounts Filter features to have at least 'counts' of counts.
#' @param file Name of file, including location, to save the table.
#' @return Table of the top-ranked features determined by the linear fit's
#' coefficient.
#' @seealso \code{\link{fitZig}} \code{\link{fitFeatureModel}} \code{\link{MRcoefs}} \code{\link{MRfulltable}}
#' @examples
#' 
#' data(lungData)
#' k = grep("Extraction.Control",pData(lungData)$SampleType)
#' lungTrim = lungData[,-k]
#' lungTrim=filterData(lungTrim,present=30)
#' lungTrim=cumNorm(lungTrim,p=0.5)
#' smokingStatus = pData(lungTrim)$SmokingStatus
#' mod = model.matrix(~smokingStatus)
#' fit = fitZig(obj = lungTrim,mod=mod)
#' head(MRtable(fit))
#' ####
#' fit = fitFeatureModel(obj = lungTrim,mod=mod)
#' head(MRtable(fit))
#'
MRtable<-function(obj,by=2,coef=NULL,number=10,taxa=obj@taxa,
    uniqueNames=FALSE,adjustMethod="fdr",group=0,eff=0,numberEff=FALSE,ncounts=0,file=NULL){

    if(length(grep("fitFeatureModel",obj@call))){
        groups = factor(obj@design[,by])
        by = "logFC"; coef = 1:2;
        tb = data.frame(logFC=obj@fitZeroLogNormal$logFC,se=obj@fitZeroLogNormal$se)
        p  = obj@pvalues
    } else {
        tb = obj@fit$coefficients
        if(is.null(coef)){
            coef = 1:ncol(tb)
        }
        p=obj@eb$p.value[,by]
        groups = factor(obj@fit$design[,by])
        if(eff>0){
            effectiveSamples = calculateEffectiveSamples(obj)
            if(numberEff == FALSE){
                valid = which(effectiveSamples>=quantile(effectiveSamples,p=eff,na.rm=TRUE))
            } else {
                valid = which(effectiveSamples>=eff)
            }
        }        
    }
    
    tx = as.character(taxa)
    if(uniqueNames==TRUE){
        for (nm in unique(tx)) {
            ii=which(tx==nm)
            tx[ii]=paste(tx[ii],seq_along(ii),sep=":")
        }
    }
    padj = p.adjust(p,method=adjustMethod)
    cnts = obj@counts
    posIndices = cnts>0
    
    np0 = rowSums(posIndices[,groups==0])
    np1 = rowSums(posIndices[,groups==1])

    nc0 = rowSums(cnts[,groups==0])
    nc1 = rowSums(cnts[,groups==1])

    if(group==0){
        srt = order(abs(tb[,by]),decreasing=TRUE)
    } else if(group==1){
        srt = order((tb[,by]),decreasing=TRUE)
    } else if(group==2){
        srt = order((tb[,by]),decreasing=FALSE)
    } else if(group==3){
        srt = order(p,decreasing=FALSE)
    } else {
        srt = 1:length(padj)
    }

    valid = 1:length(padj)
    if(ncounts>0){
        np=rowSums(cbind(np0,np1))
        valid = intersect(valid,which(np>=ncounts))
    }
    srt = srt[which(srt%in%valid)][1:min(number,nrow(tb))]

    mat = cbind(np0,np1)
    mat = cbind(mat,nc0)
    mat = cbind(mat,nc1)
    mat = cbind(mat,tb[,coef])
    mat = cbind(mat,p)
    mat = cbind(mat,padj)
    rownames(mat) = tx
    mat = mat[srt,]

    nm = c("+samples in group 0","+samples in group 1","counts in group 0",
        "counts in group 1",colnames(tb)[coef],"pvalues","adjPvalues")
    colnames(mat) = nm

    if(!is.null(file)){
        nm = c("Taxa",nm)
        mat2 = cbind(rownames(mat),mat)
        mat2 = rbind(nm,mat2)
        write(t(mat2),ncolumns=ncol(mat2),file=file,sep="\t")
    }
    return(as.data.frame(mat))
}

Try the metagenomeSeq package in your browser

Any scripts or data that you put into this service are public.

metagenomeSeq documentation built on Nov. 8, 2020, 5:34 p.m.