pepStat: Statistical analysis of peptide microarrays

#' Data smoothing for peptide microarray.
#'
#' This function applies a sliding mean window to intensities to reduce noise
#' generated by experimental variation, as well as take advantage of the overlapping
#' nature of array peptides to share signal.
#'
#' @param peptideSet A \code{peptideSet}. The expression data for the peptides as
#' well as annotations and ranges. The range information is required to run this function.
#' @param width A \code{numeric}. The width of the sliding window.
#' @param verbose A \code{logical}. If set to TRUE, progress information will be displayed.
#' @param split.by.clade A \code{logical}. If TRUE, the peptides will be smoothed by
#' clade (See details section below for more information).
#'
#' @details
#' Peptide membership in the sliding mean window is determined by its position and
#' the width argument. Two peptides are in the same window if the difference in their
#' positions is less than or equal to width/2. A peptide's position is taken to be
#' position(peptideSet).
#'
#' A peptide's intensity is replaced by the mean of all peptide intensities within
#' the peptide's sliding mean window.
#'
#' When split.by.clade = TRUE, peptides are smoothed within clades defined by the
#' clade column of the GRanges object occupying the featureRange slot of
#' peptideSet. If set to FALSE, a peptide at a given position will borrow
#' information from the neighboring peptides as well as the ones from other
#' clades around this position.
#'
#' @return A \code{peptideSet} object with smoothed intensities.
#'
#' @seealso \code{\link{summarizePeptides}}, \code{\link{normalizeArray}}
#'
#' @author Gregory Imholte
#'
#' @name slidingMean
#' @rdname slidingMean
#'
#' @importFrom GenomicRanges GRangesList
#' @importClassesFrom GenomicRanges GRangesList
#' @importMethodsFrom IRanges unlist
#' @export
#' @example examples/pipeline.R
slidingMean <-function(peptideSet, width=9, verbose=FALSE, split.by.clade=TRUE){
  .check_peptideSet(peptideSet)
  if (preproc(peptideSet@experimentData)$transformation!="log" &
       preproc(peptideSet@experimentData)$transformation!="vsn") {
    stop("The probe measurements need to be log/vsn transformed!")
  }
  if (preproc(peptideSet@experimentData)$normalization=="none"){
    warning("You should probably normalize your data before using this function")
  }

  if(split.by.clade & ncol(clade(peptideSet)) > 1){
    pSet_list <- split(peptideSet, clade(peptideSet))
    #peptides need to be ordered the same in exprs and featureRange
    for(i in 1:length(pSet_list)){
      cur_clade <- colnames(clade(peptideSet))[i]
      ranges(pSet_list[[i]])$clade <- cur_clade
      exprs(pSet_list[[i]]) <- .applySlidingMean(exprs(pSet_list[[i]]), width, 
              position(pSet_list[[i]]))
      # update row names with clade-appended peptide strings
      clade_rownames <- paste(peptide(pSet_list[[i]]), cur_clade, sep="_")
      rownames(pSet_list[[i]]) <- clade_rownames
      names(ranges(pSet_list[[i]])) <- clade_rownames
    }
    #ranges <- do.call("rbind", lapply(pSet_list, ranges))
    clade_ranges <- unlist(GRangesList(lapply(pSet_list, ranges)))
    clade_exprs <- do.call("rbind", lapply(pSet_list, exprs))    
    peptideSet_smoothed <- new("peptideSet",
            exprs = clade_exprs,
            featureRange = clade_ranges,
            experimentData = peptideSet@experimentData,
            phenoData = peptideSet@phenoData,
            protocolData = peptideSet@protocolData)
    preproc(peptideSet_smoothed)$split.by.clade <- TRUE
  } else {
#     if (length(names(ranges(peptideSet))) > 1)
#       warning("smoothing multiple spaces together in peptideSet object")
#     # This could be made more efficient with multicore
    p <- position(peptideSet)
    o <- order(p)
    ro <- order(o)

    y <- exprs(peptideSet)[o,]
    p <- position(peptideSet)[o]
    ny <- .applySlidingMean(y, width, p)
    exprs(peptideSet) <- ny[ro,]
    peptideSet_smoothed <- peptideSet
  }

  if (verbose) {
    cat("** Finished processing ", nrow(peptideSet_smoothed),
            " probes on ", ncol(peptideSet_smoothed)," arrays **\n");
  }
  peptideSet_smoothed
}


#return A matrix of the intensities ordered like y.
.applySlidingMean <- function(y, width, position){
  yn <- sapply(position, function(p) {
    p.window <- abs(position - p) <= width/2
    colMeans(y[p.window,,drop = FALSE])
  })
  yn <- t(yn)
  rownames(yn) <- rownames(y)
  return(yn)
}