#' @title Determine ChIP-Seq Type of Protein Complex Binding
#'
#' @description Estimate the types of protein complex binding. Protein complex
#' binding might act similarly to normal transcription factors, where the
#' changes are symmetrical between two biological conditions (unimodel on fold
#' changes); or the changes might be globally one-side accompanied with some
#' non-changing bindings (bimodel on fold changes). This function help to
#' determine the binding type of given ChIP-Seq samples from two biological
#' conditions using kernal density information of raw fold changes. As this
#' function was designed to work on the raw counts (no normalization needed),
#' only one replicate from each condition is allowed (input a two-column count
#' matrix); otherwise, coverage difference acorss replicates might bias
#' determination.
#'
#' @param count A two-column matrix of read counts or a SummarizedExperiment,
#' where columns are samples and rows are peaks or high coverage bins. This
#' object can be generated by function \code{regionReads}.
#' @param cutoff A numeric cut off on \code{count} matrix. If positive, only
#' peaks/bins with counts larger than \code{cutoff} in at least one sample are
#' used to estimate the binding type. We recommend a larger cutoff since
#' background signal can dramatically mask the right estimation of kernal
#' density, especially for deep sequenced ChIP-seq samples. (Default: 50)
#' @param fold A numeric threshold to help determine the binding type. In
#' detail, if top two estimated modes on smoothed kernal density have a height
#' differece less than the folds given by \code{fold}, binding type will be
#' determined as bimodel; otherwise, it is unimodel. This number should be
#' larger than 1. (Default: 10)
#' @param h Initial smoothing factor when estimating kernal density of raw fold
#' changes for bump hunting. (Default: 0.1)
#' @param plot A logical indicator that if M-A plot and smoothed kernal density
#' should be visualized. (Default: TRUE)
#'
#' @importFrom matrixStats rowMaxs
#' @importFrom matrixStats rowDiffs
#' @import SummarizedExperiment
#' @importFrom methods is
#' @importFrom graphics abline
#' @importFrom graphics layout
#' @importFrom stats dnorm
#'
#' @return
#' A character with value either "bimodel" or "unimodel" to represent estimated
#' binding type.
#'
#' @export
#'
#' @examples
#' ## load sample data
#' data(complex)
#' names(complex)
#'
#' ## test sample data
#' chipType(count=complex$counts)
chipType <- function(count, cutoff=50L, fold=10, h=0.1, plot=TRUE){
stopifnot((is.matrix(count) || is(count,"SummarizedExperiment")) &&
ncol(count) == 2)
stopifnot(is.numeric(cutoff) && length(cutoff) == 1 && cutoff >= 0)
stopifnot(is.numeric(fold) && length(fold) == 1 && fold > 1)
stopifnot(is.numeric(h) && length(h) == 1 && h > 0)
stopifnot(is.logical(plot) && length(plot) == 1)
## raw M & A
if(is(count,"SummarizedExperiment")) count <- assay(count,1)
counttmp <- count[rowMaxs(count) >= cutoff,]
logcount <- log2(counttmp + 0.5)
M <- rowDiffs(logcount)
A <- rowMeans(logcount)
## kernal density bumps
bump <- c()
ix <- seq(round(min(M), 1), max(M), 0.05)
while(length(bump)<2){
dm <- rowMeans(sapply(M, function(m) dnorm(ix, mean=m, sd=h)))
bump <- which(diff(sign(diff(dm))) == -2) + 1
bump2 <- bump[order(dm[bump],
decreasing=TRUE)[seq_len(min(2,length(bump)))]]
mu <- ix[bump2]
mudm <- dm[bump2]
h <- h * 0.8
}
## protein complex type
enrich <- abs(log(mudm[1] / mudm[2]))
if(enrich <= log(fold)){
cmplxtype <- "bimodel"
}else{
cmplxtype <- "unimodel"
}
## plots
if(plot){
layout(matrix(1:2,1,2))
plot(A,M,pch=20,cex=0.5,main=cmplxtype)
abline(h=0,lty=2,col='red',lwd=2)
plot(density(M,na.rm = TRUE,adjust=1),xlab='M',main=cmplxtype,lwd=2)
abline(v=mu,lty=2,col='blue',lwd=2)
}
cmplxtype
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.