R/qsmooth.r

#' Quantile shrinkage normalization
#' 
#' @param exprs for counts use log2(raw counts + 1)), for MA use log2(raw intensities)
#' @param groups groups to which samples belong (character vector)
#' @param norm.factors scaling normalization factors
#' @param plot plot weights? (default=FALSE)
#' @param window window size for running median (a fraction of the number of rows of exprs) 
#' @export 
qsmooth = function (exprs, groups, norm.factors=NULL, plot=FALSE, window=0.05) {

  # Stop if exprs contains any NA
  if (any(is.na(exprs))) stop("exprs contains NAs (K.Okrah)")
  
  # Scale normalization step
  if (is.null(norm.factors)) {
    dat = exprs
  }else{
    dat = t(t(exprs) - norm.factors)
  }
  
  # Compute quantile stats
  qs = qstats(dat, groups, window=window)
  Qref = qs$Qref 
  Qhat = qs$Qhat  
  w = qs$smoothWeights
  
  # Weighted quantiles
  normExprs = w * Qref + (1 - w) * Qhat
  
  # Re-order normExprs by rank of exprs (columnwise)
  for (i in 1:ncol(normExprs)) {
    # Grab ref. i
    ref = normExprs[,i]
    
    # Grab exprs column i
    x = exprs[,i]
    
    # Grab ranks of x (using min rank for ties)
    rmin = rank(x, ties.method="min")
    
    # If x has rank ties then average the values of ref at those ranks
    dups = duplicated(rmin)
    
    if (any(dups)) {
      # Grab ranks of x (using random ranks for ties) 
      # (needed to uniquely identify the indices of tied ranks)
      rrand = rank(x, ties.method="random")
      
      # Grab tied ranks
      tied.ranks = unique(rmin[dups])
      
      for (k in tied.ranks) {
        sel = rrand[rmin == k] # Select the indices of tied ranks 
        ref[sel] = ave(ref[sel])
      }
    }
    
    # Re-order ref and replace in normExprs
    normExprs[,i] = ref[rmin]
  }  
  
  # Plot weights
  if (plot) {
    
    oldpar = par(mar=c(4, 4, 1.5, 0.5))
    
    lq = length(Qref)
    u = (1:lq - 0.5) / lq
    
    if (length(u) > 10000) { # do not plot more than 10000 points
      
      sel = sample(1:lq, 10000)
      
      plot(u[sel], w[sel], pch=".", main="qsmooth weights",
           xlab=" quantiles", ylab="Weight", ylim=c(0, 1))
      
    }else{
      
      plot(u, w, pch=".", main="qsmooth weights",
           xlab="quantiles", ylab="Weight", ylim=c(0, 1))
      
    }
    
    abline(h=0.5, v=0.5, col="red", lty=2)
    
    par(oldpar)
    
  }
  
  rownames(normExprs) = rownames(exprs)
  colnames(normExprs) = colnames(exprs)
  normExprs
}
kokrah/qsmooth documentation built on May 20, 2019, 12:54 p.m.