R/slidingMean.R
In pepStat: Statistical analysis of peptide microarrays

Documented in slidingMean

#' Data smoothing for peptide microarray.
#'
#' This function applies a sliding mean window to intensities to reduce noise
#' generated by experimental variation, as well as take advantage of the overlapping
#' nature of array peptides to share signal.
#'
#' @param peptideSet A \code{peptideSet}. The expression data for the peptides as
#' well as annotations and ranges. The range information is required to run this function.
#' @param width A \code{numeric}. The width of the sliding window.
#' @param verbose A \code{logical}. If set to TRUE, progress information will be displayed.
#' @param split.by.clade A \code{logical}. If TRUE, the peptides will be smoothed by
#' clade (See details section below for more information).
#'
#' @details
#' Peptide membership in the sliding mean window is determined by its position and
#' the width argument. Two peptides are in the same window if the difference in their
#' positions is less than or equal to width/2. A peptide's position is taken to be
#' position(peptideSet).
#'
#' A peptide's intensity is replaced by the mean of all peptide intensities within
#' the peptide's sliding mean window.
#'
#' When split.by.clade = TRUE, peptides are smoothed within clades defined by the
#' clade column of the GRanges object occupying the featureRange slot of
#' peptideSet. If set to FALSE, a peptide at a given position will borrow
#' information from the neighboring peptides as well as the ones from other
#' clades around this position.
#'
#' @return A \code{peptideSet} object with smoothed intensities.
#'
#' @seealso \code{\link{summarizePeptides}}, \code{\link{normalizeArray}}
#'
#' @author Gregory Imholte
#'
#' @name slidingMean
#' @rdname slidingMean
#'
#' @importFrom GenomicRanges GRangesList
#' @importClassesFrom GenomicRanges GRangesList
#' @importMethodsFrom IRanges unlist
#' @export
#' @example examples/pipeline.R
slidingMean <-function(peptideSet, width=9, verbose=FALSE, split.by.clade=TRUE){
    .check_peptideSet(peptideSet)
    if (preproc(peptideSet@experimentData)$transformation!="log" &
            preproc(peptideSet@experimentData)$transformation!="vsn") {
        stop("The probe measurements need to be log/vsn transformed!")
    }
    if (preproc(peptideSet@experimentData)$normalization=="none"){
        warning("You should probably normalize your data before using this function")
    }
    
    if(split.by.clade & ncol(clade(peptideSet)) > 1){
        pSet_list <- split(peptideSet, clade(peptideSet))
        #peptides need to be ordered the same in exprs and featureRange
        for(i in 1:length(pSet_list)){
            cur_clade <- colnames(clade(peptideSet))[i]
            ranges(pSet_list[[i]])$clade <- cur_clade
            exprs(pSet_list[[i]]) <- .applySlidingMean(exprs(pSet_list[[i]]), width, 
                    position(pSet_list[[i]]))
            # update row names with clade-appended peptide strings
            clade_rownames <- paste(peptide(pSet_list[[i]]), cur_clade, sep="_")
            rownames(pSet_list[[i]]) <- clade_rownames
            names(ranges(pSet_list[[i]])) <- clade_rownames
        }
        #ranges <- do.call("rbind", lapply(pSet_list, ranges))
        clade_ranges <- unlist(GRangesList(lapply(pSet_list, ranges)))
        clade_exprs <- do.call("rbind", lapply(pSet_list, exprs))    
        peptideSet_smoothed <- new("peptideSet",
                exprs = clade_exprs,
                featureRange = clade_ranges,
                experimentData = peptideSet@experimentData,
                phenoData = peptideSet@phenoData,
                protocolData = peptideSet@protocolData)
        preproc(peptideSet_smoothed)$split.by.clade <- TRUE
    } else {
#     if (length(names(ranges(peptideSet))) > 1)
#       warning("smoothing multiple spaces together in peptideSet object")
#     # This could be made more efficient with multicore
        p <- position(peptideSet)
        o <- order(p)
        ro <- order(o)
        
        y <- exprs(peptideSet)[o,]
        p <- position(peptideSet)[o]
        ny <- .applySlidingMean(y, width, p)
        exprs(peptideSet) <- ny[ro,]
        peptideSet_smoothed <- peptideSet
    }
    
    if (verbose) {
        cat("** Finished processing ", nrow(peptideSet_smoothed),
                " probes on ", ncol(peptideSet_smoothed)," arrays **\n");
    }
    peptideSet_smoothed
}


#return A matrix of the intensities ordered like y.
.applySlidingMean <- function(y, width, position) {
    yn <- sapply(position, function(p) {
                p.window <- abs(position - p) <= width/2
                colMeans(y[p.window,,drop = FALSE])
            })
    yn <- t(yn)
    rownames(yn) <- rownames(y)
    return(yn)
}