R/lrt-functions.R
In edge: Extraction of Differential Gene Expression

lrtStat <- function(resNull, resFull, post.var = NULL) {
  rss.full <-  rowSums(resFull ^ 2)
  rss.null <- rowSums(resNull ^ 2)

  # F-statistic
  if (is.null(post.var)) {
    stat <- (rss.null - rss.full) / (rss.full)
  } else {
    stat <- (rss.null - rss.full) / post.var
  }
  return(stat)
}

#	EMPIRICAL BAYES SQUEEZING OF VARIANCES

squeezeVar <- function(var, df, covariate=NULL, winsor.tail.p=c(0.05,0.1))
  #	Empirical Bayes posterior variances
  #	Gordon Smyth
  #	2 March 2004.  Last modified 2 Dec 2013.
{
  n <- length(var)
  if(n == 0) stop("var is empty")
  if(n == 1) return(list(var.post=var,var.prior=var,df.prior=0))
  if(length(df)==1) { 
    df <- rep.int(df,n)
  } else {
    if(length(df) != n) stop("lengths differ")
  }
  
  #	Estimate prior var and df
  fit <- fitFDist(var, df1=df, covariate=covariate)
  
  #	Prior var will be vector if robust=TRUE, otherwise scalar
  var.prior <- fit$scale
  
  #	Prior df will be vector if covariate is non-NULL, otherwise scalar
  df.prior <- fit$df2.shrunk
  if(is.null(df.prior)) df.prior <- fit$df2
  
  #	Check estimated prior df
  if(is.null(df.prior) || any(is.na(df.prior))) stop("Could not estimate prior df")
  
  #	Squeeze the posterior variances
  df.total <- df + df.prior
  var[df==0] <- 0 # guard against missing or infinite values
  Infdf <- df.prior==Inf
  if(any(Infdf)) {
    var.post <- rep(var.prior,length.out=n)
    i <- which(!Infdf)
    if(length(i)) {
      if(is.null(covariate))
        s02 <- var.prior
      else
        s02 <- var.prior[i]
      var.post[i] <- (df[i]*var[i] + df.prior[i]*s02) / df.total[i]
    }
  } else {
    var.post <- (df*var + df.prior*var.prior) / df.total
  }
  
  list(df.prior=df.prior,var.prior=var.prior,var.post=var.post)
}

fitFDist <- function(x,df1,covariate=NULL)
  #	Moment estimation of the parameters of a scaled F-distribution
  #	The first degrees of freedom are given
  #	Gordon Smyth and Belinda Phipson
  #	8 Sept 2002.  Last revised 27 Oct 2012.
{
  #	Check covariate
  if(!is.null(covariate)) {
    if(length(covariate) != length(x)) stop("covariate and x must be of same length")
    if(any(is.na(covariate))) stop("NA covariate values not allowed")
    isfin <- is.finite(covariate)
    if(!all(isfin)) {
      if(!any(isfin))
        covariate <- sign(covariate)
      else {
        r <- range(covariate[isfin])
        covariate[covariate == -Inf] <- r[1]-1
        covariate[covariate == Inf] <- r[2]+1
      }
    }
    splinedf <- min(4,length(unique(covariate)))
    if(splinedf < 2) covariate <- NULL
  }
  #	Remove missing or infinite values and zero degrees of freedom
  ok <- is.finite(x) & is.finite(df1) & (x > -1e-15) & (df1 > 1e-15)
  notallok <- !all(ok)
  if(notallok) {
    x <- x[ok]
    df1 <- df1[ok]
    if(!is.null(covariate)) {
      covariate2 <- covariate[!ok]
      covariate <- covariate[ok]
    }
  }
  n <- length(x)
  if(n==0) return(list(scale=NA,df2=NA))
  
  #	Avoid exactly zero values
  x <- pmax(x,0)
  m <- median(x)
  if(m==0) {
    warning("More than half of residual variances are exactly zero: eBayes unreliable")
    m <- 1
  } else {
    if(any(x==0)) warning("Zero sample variances detected, have been offset",call.=FALSE)
  }
  x <- pmax(x, 1e-5 * m)
  
  #	Better to work on with log(F)
  z <- log(x)
  e <- z-digamma(df1/2)+log(df1/2)
  
  if(is.null(covariate)) {
    emean <- mean(e)
    evar <- sum((e-emean)^2)/(n-1)
  } else {
    if(!requireNamespace("splines",quietly=TRUE)) stop("splines package required but is not available")
    design <- try(splines::ns(covariate,df=splinedf,intercept=TRUE),silent=TRUE)
    if(is(design,"try-error")) stop("Problem with covariate")
    fit <- lm.fit(design,e)
    if(notallok) {
      design2 <- predict(design,newx=covariate2)
      emean <- rep.int(0,n+length(covariate2))
      emean[ok] <- fit$fitted
      emean[!ok] <- design2 %*% fit$coefficients
    } else {
      emean <- fit$fitted
    }
    evar <- mean(fit$residuals[-(1:fit$rank)]^2)
  }
  evar <- evar - mean(trigamma(df1/2))
  if(evar > 0) {
    df2 <- 2*trigammaInverse(evar)
    s20 <- exp(emean+digamma(df2/2)-log(df2/2))
  } else {
    df2 <- Inf
    s20 <- exp(emean)
  }
  list(scale=s20,df2=df2)
}

trigammaInverse <- function(x) {
  #	Solve trigamma(y) = x for y
  #	Gordon Smyth
  #	8 Sept 2002.  Last revised 12 March 2004.
  
  #	Non-numeric or zero length input
  if(!is.numeric(x)) stop("Non-numeric argument to mathematical function")
  if(length(x)==0) return(numeric(0))
  
  #	Treat out-of-range values as special cases
  omit <- is.na(x)
  if(any(omit)) {
    y <- x
    if(any(!omit)) y[!omit] <- Recall(x[!omit])
    return(y)
  }
  omit <- (x < 0)
  if(any(omit)) {
    y <- x
    y[omit] <- NaN
    warning("NaNs produced")
    if(any(!omit)) y[!omit] <- Recall(x[!omit])
    return(y)
  }
  omit <- (x > 1e7)
  if(any(omit)) {
    y <- x
    y[omit] <- 1/sqrt(x[omit])
    if(any(!omit)) y[!omit] <- Recall(x[!omit])
    return(y)
  }
  omit <- (x < 1e-6)
  if(any(omit)) {
    y <- x
    y[omit] <- 1/x[omit]
    if(any(!omit)) y[!omit] <- Recall(x[!omit])
    return(y)
  }
  
  #	Newton's method
  #	1/trigamma(y) is convex, nearly linear and strictly > y-0.5,
  #	so iteration to solve 1/x = 1/trigamma is monotonically convergent
  y <- 0.5+1/x
  iter <- 0
  repeat {
    iter <- iter+1
    tri <- trigamma(y)
    dif <- tri*(1-tri/x)/psigamma(y,deriv=2)
    y <- y+dif
    if(max(-dif/y) < 1e-8) break
    if(iter > 50) {
      warning("Iteration limit exceeded")
      break
    }
  }
  y
}