#' @title Differential Binding Estimation with Permutation
#'
#' @description
#' This function does differential binding estimation and additional
#' permutation analysis. This function works only if multiple replicates are
#' provided for comparison; otherwise, it is the same as \code{diffRegions}
#' which is the main function for differential binding estimation. Permutation
#' analysis is obtained by shuffling samples between compared conditions.
#' Customized shuffling can also be specified by parameter \code{permute} for
#' complicated experiment designs.
#'
#' @param count A matrix of read counts or a RangedSummarizedExperiment, where
#' columns are samples and rows are genome-wide bins. This object can be
#' generated by function \code{regionReads}.
#' @param bins If \code{count} is a read count matrix, \code{bins} should be
#' provided as a GRanges object recording bins of corresponding rows in
#' \code{count}. If \code{count} is a RangedSummarizedExperiment, this
#' parameter will be ignored.
#' @param meta If \code{count} is a read count matrix, this should be a
#' DataFrame object recording sample annotations. Rows of \code{meta}
#' correspond to the columns of \code{count}. The \code{design} parameter
#' treats the column names of \code{meta} as variables. If \code{count} is a
#' RangedSummarizedExperiment, this parameter will be ignored.
#' @param design A formula object which expresses how read counts for each bin
#' depend on the variables in \code{meta}, e.g. '~ group + condition' etc. Or,
#' if \code{count} is a RangedSummarizedExperiment, the bins and meta objects
#' will be extracted by \code{rowRanges()} and \code{colData()} from
#' \code{count} object. By default, the last variable in \code{design} formula
#' will be used to build the differential binding contrast. At most two
#' variables are allowed if \code{diffmeth} is set to 'ttest'. (Details see
#' below)
#' @param sizefac A numeric vector indicating estimated size of samples for
#' normalization purpose. This vector can be generated by function
#' \code{sizeFac}.
#' @param rccut A numeric cutoff on normalized count matrix using
#' \code{sizefac}. If positive, only bins with normalized counts larger than
#' \code{rccut} in at least one sample are selected for fold change estimate.
#' Unlike other functions in this package, moderate cutoff would be better
#' as too large results more false negative and too small increases the time
#' cost. (Default: 15)
#' @param fccut A numeric cutoff on smoothed log2foldchanges of bins for
#' bump hunting of differtial binding regions. Neighbor bins with fold change
#' larger than this value will be merged together with allowed gaps.
#' (Default: 0.4)
#' @param gap A integer specifying the gaps allowed for bin merging, in the
#' unit of number of bins. (Default: 2)
#' @param permute A matrix where each row contains shuffled indices of all
#' samples. If \code{NULL}, permutation is automatically obtained by shuffling
#' samples between compared conditions. This object can be generated by using
#' R-package \code{permute} as well.
#' @param maxperm Maximum number of permutations to be finished. (Default:10)
#' @param diffmeth Method for statistical testing of differential binding.
#' (Default: 'DESeq2')
#'
#' @details
#' Three methods are provided for significance estimation of differential
#' binding. \code{DESeq2} allows pseudo-estimation for comparisons without
#' replicates; otherwise, all methods can be used for comparisons with at least
#' two replicates. The \code{design} formula can be specified as suggested by
#' \code{DESeq2} and \code{limma} if these two methods are selected. For
#' \code{ttest}, \code{design} can either contain one or two components,
#' referring to student's t-test or paired t-test based on logarithm scaled
#' data. For consistance with other packages, the last component in
#' \code{design} formula is the contrast on which the final differential
#' estimation are reported.
#'
#' The default permutation works on the main contrast only (the last component
#' in \code{design} formula). If customized permutation needed, e.g. invloving
#' several components in \code{design} formula, users should provided the
#' customized indecies of permutations through parameter \code{permute}.
#'
#' @import S4Vectors
#' @import GenomicRanges
#' @import SummarizedExperiment
#' @importFrom methods is
#' @importFrom utils combn
#' @importFrom stats ecdf
#' @importFrom stats model.frame
#'
#' @return
#' A GRanges object containing potential regions with differential binding, as
#' well as statistical significances as meta columns.
#'
#' @export
#'
#' @examples
#' ## load sample data
#' data(complex)
#' names(complex)
#'
#' ## test sample data
#' sizefac <- sizeFac(count=complex$counts,plot=TRUE)$sizefac
#' library(S4Vectors)
#' meta <- DataFrame(cond=c("ctr","tre"))
#' dr <- diffRegionsWithPerm(count=complex$counts,bins=complex$bins,
#' meta=meta,design=~cond,sizefac=sizefac)
diffRegionsWithPerm <- function(count, bins=NULL, meta=NULL, design, sizefac,
rccut=15, fccut=0.4, gap=2,
permute=NULL, maxperm=10,
diffmeth=c("DESeq2","limma","ttest")){
stopifnot((is.matrix(count) || is(count,"RangedSummarizedExperiment")) &&
ncol(count) >= 2)
stopifnot(is.null(bins) || (is(bins,"GRanges") &&
length(bins)==nrow(count)))
stopifnot(is.null(meta) || (is(meta,"DataFrame") && ncol(meta)>=1))
if(is.matrix(count) && (is.null(bins) || is.null(meta)))
stop("Bins or meta shouldn't be NULL when count is matrix\n")
stopifnot(class(design) == "formula")
stopifnot(is.numeric(sizefac) && length(sizefac) == ncol(count)
&& all(sizefac > 0))
stopifnot(is.numeric(rccut) && length(rccut) == 1 && rccut >= 0)
stopifnot(is.numeric(fccut) && length(fccut) == 1 && fccut > 0)
stopifnot(is.numeric(gap) && length(gap) == 1 && gap >= 0)
stopifnot(is.null(permute) ||
(is.matrix(permute) &&
all(apply(permute,1,function(x)
identical(seq_len(ncol(count)),sort(x))))))
stopifnot(is.numeric(maxperm) && length(maxperm) == 1 && maxperm >= 1)
if(is(count,"RangedSummarizedExperiment")){
bins <- rowRanges(count)
meta <- colData(count)
count <- assay(count,1)
}
effects <- model.frame(design, data = meta)
stopifnot(ncol(effects) >= 1 && length(unique(effects[,ncol(effects)]))==2)
diffmeth <- match.arg(diffmeth)
if(diffmeth=="ttest"){
paired <- FALSE
if(ncol(effects) > 2 || (ncol(effects) == 2 &&
any(sapply(split(effects[,2],effects[,1]),function(x)
length(x) !=2 || length(unique(x)) != 2)))){
cat("try methods other than 'ttest' for a complex design.\n
differential analysis abandoned.\n")
return(0)
}else if(ncol(effects) == 2){
paired <- TRUE
}
}
if(ncol(count)==2){
cat("No permutation performed due to not enough samples!\n")
## real test
observed <- diffRegions(count=count, bins=bins, meta=meta,
design=design, sizefac=sizefac, rccut=rccut,
fccut=fccut, gap=gap, diffmeth=diffmeth)
cat("Differential analysis of real data......done!\n")
}else{
observed <- diffRegions(count=count, bins=bins, meta=meta,
design=design, sizefac=sizefac, rccut=rccut,
fccut=fccut, gap=gap, diffmeth=diffmeth)
cat("Differential analysis of real data......done!\n")
## permute test
if(is.null(permute)){
level <- effects[,ncol(effects)]
subn <- min(floor(table(level)/2))
combs <- combn(min(table(level)),subn)
comb1 <- matrix(which(level==(unique(level)[1]))[combs],
nrow(combs))
comb2 <- matrix(which(level==(unique(level)[2]))[combs],
nrow(combs))
comb1f <- comb1[,rep(seq_len(ncol(comb1)),ncol(comb2))]
comb2f <- comb2[,rep(seq_len(ncol(comb2)),each=ncol(comb1))]
permute <- t(sapply(seq_len(ncol(comb1f)),function(i){
ranks <- seq_len(ncol(count))
tmpidx <- match(c(comb1f[,i],comb2f[,i]),ranks)
ranks[tmpidx] <- c(comb2f[,i],comb1f[,i])
ranks
}))
}
if(maxperm<nrow(permute))
permute <- permute[sample(seq_len(nrow(permute)),maxperm),]
n <- nrow(permute)
cat("Differential analysis of permute data...... Total",n,"times!\n")
perm <- list()
for(i in seq_len(n)){
counttmp <- count[,permute[i,]]
sizefactmp <- sizefac[permute[i,]]
perm[[i]] <- diffRegions(count=counttmp, bins=bins, meta=meta,
design=design, sizefac=sizefactmp,
rccut=rccut, fccut=fccut, gap=gap,
diffmeth=diffmeth)
cat(i,"permute done!\n")
}
## significance evaluation
permp <- ecdf(abs(unlist(sapply(perm,function(x) x$stat))))
observed$permPV <- 1-permp(abs(observed$stat))
}
observed
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.