R/chipType.R

#' @title Determine ChIP-Seq Type of Protein Complex Binding
#'
#' @description Estimate the types of protein complex binding. Protein complex
#' binding might act similarly to normal transcription factors, where the
#' changes are symmetrical between two biological conditions (unimodel on fold
#' changes); or the changes might be globally one-side accompanied with some
#' non-changing bindings (bimodel on fold changes). This function help to
#' determine the binding type of given ChIP-Seq samples from two biological
#' conditions using kernal density information of raw fold changes. As this
#' function was designed to work on the raw counts (no normalization needed),
#' only one replicate from each condition is allowed (input a two-column count
#' matrix); otherwise, coverage difference acorss replicates might bias
#' determination.
#'
#' @param count A two-column matrix of read counts or a SummarizedExperiment,
#' where columns are samples and rows are peaks or high coverage bins. This
#' object can be generated by function \code{regionReads}.
#' @param cutoff A numeric cut off on \code{count} matrix. If positive, only
#' peaks/bins with counts larger than \code{cutoff} in at least one sample are
#' used to estimate the binding type. We recommend a larger cutoff since
#' background signal can dramatically mask the right estimation of kernal
#' density, especially for deep sequenced ChIP-seq samples. (Default: 50)
#' @param fold A numeric threshold to help determine the binding type. In
#' detail, if top two estimated modes on smoothed kernal density have a height
#' differece less than the folds given by \code{fold}, binding type will be
#' determined as bimodel; otherwise, it is unimodel. This number should be
#' larger than 1. (Default: 10)
#' @param h Initial smoothing factor when estimating kernal density of raw fold
#' changes for bump hunting. (Default: 0.1)
#' @param plot A logical indicator that if M-A plot and smoothed kernal density
#' should be visualized. (Default: TRUE)
#'
#' @importFrom matrixStats rowMaxs
#' @importFrom matrixStats rowDiffs
#' @import SummarizedExperiment
#' @importFrom methods is
#' @importFrom graphics abline
#' @importFrom graphics layout
#' @importFrom stats dnorm 
#'
#' @return
#' A character with value either "bimodel" or "unimodel" to represent estimated
#' binding type.
#'
#' @export
#'
#' @examples
#' ## load sample data
#' data(complex)
#' names(complex)
#'
#' ## test sample data
#' chipType(count=complex$counts)

chipType <- function(count, cutoff=50L, fold=10, h=0.1, plot=TRUE){
    stopifnot((is.matrix(count) || is(count,"SummarizedExperiment")) &&
              ncol(count) == 2)
    stopifnot(is.numeric(cutoff) && length(cutoff) == 1 && cutoff >= 0)
    stopifnot(is.numeric(fold) && length(fold) == 1 && fold > 1)
    stopifnot(is.numeric(h) && length(h) == 1 && h > 0)
    stopifnot(is.logical(plot) && length(plot) == 1)
    ## raw M & A
    if(is(count,"SummarizedExperiment")) count <- assay(count,1)
    counttmp <- count[rowMaxs(count) >= cutoff,]
    logcount <- log2(counttmp + 0.5)
    M <- rowDiffs(logcount)
    A <- rowMeans(logcount)
    ## kernal density bumps
    bump <- c()
    ix <- seq(round(min(M), 1), max(M), 0.05)
    while(length(bump)<2){
        dm <- rowMeans(sapply(M, function(m) dnorm(ix, mean=m, sd=h)))
        bump <- which(diff(sign(diff(dm))) == -2) + 1
        bump2 <- bump[order(dm[bump],
                            decreasing=TRUE)[seq_len(min(2,length(bump)))]]
        mu <- ix[bump2]
        mudm <- dm[bump2]
        h <- h * 0.8
    }
    ## protein complex type
    enrich <- abs(log(mudm[1] / mudm[2]))
    if(enrich <= log(fold)){
        cmplxtype <- "bimodel"
    }else{
        cmplxtype <- "unimodel"
    }
    ## plots
    if(plot){
        layout(matrix(1:2,1,2))
        plot(A,M,pch=20,cex=0.5,main=cmplxtype)
        abline(h=0,lty=2,col='red',lwd=2)
        plot(density(M,na.rm = TRUE,adjust=1),xlab='M',main=cmplxtype,lwd=2)
        abline(v=mu,lty=2,col='blue',lwd=2)
    }
    cmplxtype
}
tengmx/ComplexDiff documentation built on May 31, 2019, 8:34 a.m.