R/squeezeVar.R

#	EMPIRICAL BAYES SQUEEZING OF VARIANCES

squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1))
#	Empirical Bayes posterior variances
#	Gordon Smyth
#	2 March 2004.  Last modified 2 Dec 2013.
{
	n <- length(var)
	if(n == 0) stop("var is empty")
	if(n == 1) return(list(var.post=var,var.prior=var,df.prior=0))
	if(length(df)==1) { 
		df <- rep.int(df,n)
	} else {
		if(length(df) != n) stop("lengths differ")
	}

#	Estimate prior var and df
	if(robust)
		fit <- fitFDistRobustly(var, df1=df, covariate=covariate, winsor.tail.p=winsor.tail.p)
	else
		fit <- fitFDist(var, df1=df, covariate=covariate)

#	Prior var will be vector if robust=TRUE, otherwise scalar
 	var.prior <- fit$scale

#	Prior df will be vector if covariate is non-NULL, otherwise scalar
	df.prior <- fit$df2.shrunk
	if(is.null(df.prior)) df.prior <- fit$df2

#	Check estimated prior df
	if(is.null(df.prior) || any(is.na(df.prior))) stop("Could not estimate prior df")

#	Squeeze the posterior variances
	df.total <- df + df.prior
	var[df==0] <- 0 # guard against missing or infinite values
	Infdf <- df.prior==Inf
	if(any(Infdf)) {
		var.post <- rep(var.prior,length.out=n)
		i <- which(!Infdf)
		if(length(i)) {
			if(is.null(covariate))
				s02 <- var.prior
			else
				s02 <- var.prior[i]
			var.post[i] <- (df[i]*var[i] + df.prior[i]*s02) / df.total[i]
		}
	} else {
		var.post <- (df*var + df.prior*var.prior) / df.total
	}

	list(df.prior=df.prior,var.prior=var.prior,var.post=var.post)
}

fitFDist <- function(x,df1,covariate=NULL)
#	Moment estimation of the parameters of a scaled F-distribution
#	The first degrees of freedom are given
#	Gordon Smyth and Belinda Phipson
#	8 Sept 2002.  Last revised 27 Oct 2012.
{
#	Check covariate
	if(!is.null(covariate)) {
		if(length(covariate) != length(x)) stop("covariate and x must be of same length")
		if(any(is.na(covariate))) stop("NA covariate values not allowed")
		isfin <- is.finite(covariate)
		if(!all(isfin)) {
			if(!any(isfin))
				covariate <- sign(covariate)
			else {
				r <- range(covariate[isfin])
				covariate[covariate == -Inf] <- r[1]-1
				covariate[covariate == Inf] <- r[2]+1
			}
		}
		splinedf <- min(4,length(unique(covariate)))
		if(splinedf < 2) covariate <- NULL
	}
#	Remove missing or infinite values and zero degrees of freedom
	ok <- is.finite(x) & is.finite(df1) & (x > -1e-15) & (df1 > 1e-15)
	notallok <- !all(ok)
	if(notallok) {
		x <- x[ok]
		df1 <- df1[ok]
		if(!is.null(covariate)) {
			covariate2 <- covariate[!ok]
			covariate <- covariate[ok]
		}
	}
	n <- length(x)
	if(n==0) return(list(scale=NA,df2=NA))

#	Avoid exactly zero values
	x <- pmax(x,0)
	m <- median(x)
	if(m==0) {
		warning("More than half of residual variances are exactly zero: eBayes unreliable")
		m <- 1
	} else {
		if(any(x==0)) warning("Zero sample variances detected, have been offset",call.=FALSE)
	}
	x <- pmax(x, 1e-5 * m)

#	Better to work on with log(F)
	z <- log(x)
	e <- z-digamma(df1/2)+log(df1/2)

	if(is.null(covariate)) {
		emean <- mean(e)
		evar <- sum((e-emean)^2)/(n-1)
	} else {
		require(splines)
		design <- try(ns(covariate,df=splinedf,intercept=TRUE),silent=TRUE)
		if(is(design,"try-error")) stop("Problem with covariate")
		fit <- lm.fit(design,e)
		if(notallok) {
			design2 <- predict(design,newx=covariate2)
			emean <- rep.int(0,n+length(covariate2))
			emean[ok] <- fit$fitted
			emean[!ok] <- design2 %*% fit$coefficients
		} else {
			emean <- fit$fitted
		}
		evar <- mean(fit$residuals[-(1:fit$rank)]^2)
	}
	evar <- evar - mean(trigamma(df1/2))
	if(evar > 0) {
		df2 <- 2*trigammaInverse(evar)
		s20 <- exp(emean+digamma(df2/2)-log(df2/2))
	} else {
		df2 <- Inf
		s20 <- exp(emean)
	}
	list(scale=s20,df2=df2)
}

trigammaInverse <- function(x) {
#	Solve trigamma(y) = x for y
#	Gordon Smyth
#	8 Sept 2002.  Last revised 12 March 2004.

#	Non-numeric or zero length input
	if(!is.numeric(x)) stop("Non-numeric argument to mathematical function")
	if(length(x)==0) return(numeric(0))

#	Treat out-of-range values as special cases
	omit <- is.na(x)
	if(any(omit)) {
		y <- x
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}
	omit <- (x < 0)
	if(any(omit)) {
		y <- x
		y[omit] <- NaN
		warning("NaNs produced")
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}
	omit <- (x > 1e7)
	if(any(omit)) {
		y <- x
		y[omit] <- 1/sqrt(x[omit])
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}
	omit <- (x < 1e-6)
	if(any(omit)) {
		y <- x
		y[omit] <- 1/x[omit]
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}

#	Newton's method
#	1/trigamma(y) is convex, nearly linear and strictly > y-0.5,
#	so iteration to solve 1/x = 1/trigamma is monotonically convergent
	y <- 0.5+1/x
	iter <- 0
	repeat {
		iter <- iter+1
		tri <- trigamma(y)
		dif <- tri*(1-tri/x)/psigamma(y,deriv=2)
		y <- y+dif
		if(max(-dif/y) < 1e-8) break
		if(iter > 50) {
			warning("Iteration limit exceeded")
			break
		}
	}
	y
}
richierocks/limma2 documentation built on May 27, 2019, 8:47 a.m.