R/betr.R

`betr` <-
function (eset, cond=NULL, timepoint, replicate, twoColor=FALSE, twoCondition=NULL, alpha=0.05, verbose=FALSE) {
    if (twoColor==TRUE & is.null(twoCondition))
        stop("For two color data you have to specificy whether the data is from one or two conditions (set option 'twoCondition' to TRUE or FALSE)")
    if (length(unique(table(timepoint)))!=1) 
        stop("The number of replicates must be the same at every time point.")
    
    ProbeAnn <- NULL
    if (is(eset, "exprSet")) {
        dat <- eset@exprs
        if (!is.null(rownames(dat))) 
            ProbeAnn <- data.frame(ID = I(rownames(dat)))
    }
    if (is(eset, "ExpressionSet")) {
        dat <- exprs(eset)
        if (length(eset@featureData@data)) 
            ProbeAnn <- eset@featureData@data
        if (!is.null(rownames(dat))) {
            if (is.null(ProbeAnn)) 
                ProbeAnn <- data.frame(ID = I(rownames(dat)))
            else ProbeAnn$ID <- rownames(dat)
        }
    }
    if (is.numeric(eset)) 
        dat <- eset
    timepoint <- as.numeric(as.character(timepoint))    
    if (is.null(cond)) 
    	cond <- rep('cond1', length(timepoint))
    cond <-  as.factor(as.character(cond)) # Gets rid of any unused factor levels
    replicate <-  as.factor(paste(cond,replicate,sep='.')) 
    numLevelsCond <- length(levels(cond))
    if (twoColor==FALSE)
        if(numLevelsCond==1) {
            twoCondition<-FALSE
        } else {
            twoCondition<-TRUE
        }
    colnames(dat) <- paste(cond, timepoint, replicate, sep='.')
    if (verbose) {
        if (twoColor==FALSE) {
            if (numLevelsCond==1) {
                cat(' Analyzing one color data - single condition\n')
            } else {
        	    cat(' Analyzing one color data - Finding differentially expressed genes between ', levels(cond)[1], ' and ', levels(cond)[2], '\n')
            }
        } else { # Two color data
            if (numLevelsCond==2) {
           	   cat(' Analyzing two color data with common reference - Finding differentially expressed genes between', levels(cond)[1], 'and', levels(cond)[2], '\n')
            } else { # numLevelsCond==1 can mean either single condition (common reference) or paired samples at each timepoint   
               if (twoCondition==FALSE) { 
                 	cat(' Analyzing two color data - single condition with common reference \n')
               } else { 
                 	cat(' Analyzing two color data - two conditions (paired samples at each time point)\n')
               }
            }
        }
    }
    
    X <- lapply(1:nrow(dat), function (g)
        lapply(levels(cond), function(c) {
            cond.idx <- which(cond==c) 
            t(sapply(unique(replicate[cond.idx]), 
                function(r) {
                    idx <- which(replicate==r)
                    idx <- idx[order(timepoint[idx])]
                    dat[g, idx]
                })
            )        
        })
    )

    names(X) <- rownames(dat)
    n <- sapply(X[[1]], nrow)
    if (numLevelsCond==1 & twoCondition==FALSE) { #compare each sample to baseline (first array)
        X <- sapply(X, function(g) 
            t(apply(g[[1]], 1, function(x) x[-1:0] - x[1])), simplify=FALSE   
        )
		XBar <- lapply(X, function(g) apply(g, 2, mean))
    } else {
	    XBar <- lapply(X, 
	        function(g) {
	            lapply(g, function(c) apply(c, 2, mean))
	        }
	    )
	}
            
    if (numLevelsCond==1) {
        # Either paired two condition data (numLevelsCond==1 since the samples are paired), or
        # single conditions data where we compare each sample to baseline (first array)
        YBar <- XBar # No need to transform into log ratios
    } else { # Two conditions, either two color data with common reference or one color data
        YBar <- lapply(XBar, 
            function(g) {
                g[[2]] - g[[1]]
            }
        ) 
    }
   
    # Calculate Sm, the variance of the mean about 0
    Sm <- lapply(YBar, 
        function(g) {
            g %*% t(g)
        }
    ) 
    
    # Get gene-specific shrinkage estimates of compound symmetry covariance
    k <- length(YBar[[1]])
    if (numLevelsCond == 2)  {
        S <- sapply(X, function(g) 
            var(g[[1]]) * (n[1]-1) + var(g[[2]]) * (n[2]-1), 
            simplify=FALSE)
        dfVar <-  (n[1] + n[2] - 2) * k
        dfCov <- (n[1] + n[2]) * k*(k-1)/2
    } else {
        S <- sapply(X, function(g) var(g) * (n-1), simplify=FALSE)
        dfVar <-  (n - 1) * k    
        dfCov <- n * k*(k-1)/2     
    }
    sVar <- sapply(S, function(g) sum(diag(g)) / dfVar)
    stildeVar <- squeezeVar(sVar, dfVar)$var.post    
    sCov <- sapply(S, function(g) sum(upper.tri(g) * g) / dfCov) 
    stildeCov <- squeezeVar(sCov, dfCov)$var.post
    stildeCov <- pmax(0, stildeCov)     
    stildeCov <- pmin(stildeVar*0.99, stildeCov)     
    StildeE <- mapply(function(v,c) 
        matrix(c, nrow=k, ncol=k) - diag(c, k) + diag(v, k), 
        stildeVar, stildeCov, SIMPLIFY=FALSE)       
    Sigma0 <- lapply(StildeE, function(e) sum(1/n)*e)
       
    # Calculate density under the null
    f0 <- mapply(function(y, S) dmvnorm(y, sigma=S), YBar, Sigma0)
       
    # Get a rough initial ranking of genes
    F <- mapply(function(m, e) (sum(diag(m)) / k) / (sum(1/n)*e),
            Sm, stildeVar
    ) 
    I <- pf(F, k, dfVar)
    p <- max(0.0001, sum((1-I) < alpha)/length(I))



    ######################################
    ###### Fit the model iteratively #####
    ######################################

    df.m <- min(n)
    diffIdxHistory <- list()
    pHistory <- NULL
    while (!isRepetitive(diffIdxHistory) | !isRepetitive(pHistory)) {
    	diffIdx <- order(I, decreasing=TRUE)[1:(max(1, (p*length(I))))]
        LambdaM <- squeezeMVar(Sm[diffIdx], df.m)$varPrior
        nuM <- squeezeMVar(Sm[diffIdx], df.m)$dfPrior        
        StildeM <- squeezeMVar(Sm, df.m, Lambda=LambdaM, nu=nuM)$varPost
        Sigma1 <- mapply(function(m,e) m + sum(1/n)*e, StildeM, StildeE, SIMPLIFY=FALSE)
        f1 <- mapply(function(y, S) dmvnorm(y, sigma=S), YBar, Sigma1)
        I <- f1*p / (f0*(1-p) + f1*p)
        p <- max(0.0001, sum((1-I)<alpha) / length(I))
    	cat (".")
    	diffIdxHistory <- c(diffIdxHistory, list(sort(diffIdx)))
    	pHistory <- c(pHistory, p)
    }
    I
}

Try the betr package in your browser

Any scripts or data that you put into this service are public.

betr documentation built on April 14, 2017, 5:16 a.m.