R/get_tstats.R
In DAMEfinder: Finds DAMEs - Differential Allelicly MEthylated regions

Documented in get_tstats

#' Get t-Statistics
#'
#' This function calculates a moderated t-Statistic per site or tuple using
#' \code{limma}'s \code{\link{lmFit}} and \code{\link{eBayes}} functions. It
#' then smoothes the obtained t-Statistics using \code{bumphunter}'s
#' \code{\link{smoother}} function.
#'
#' The smoothing is done on genomic clusters consisting of CpGs that are close
#' to each other. In the case of tuples, the midpoint of the two genomic
#' positions in each tuple is used as the genomic position of that tuple, to
#' perform the smoothing.The function takes a \code{RangedSummarizedExperiment}
#' generated by \code{\link{calc_derivedasm}} or \code{\link{calc_asm}}
#' containing ASM across samples, and the index of control and treatment
#' samples.
#'
#' @param sa A SummarizedExperiment containing ASM values where each row and
#'   column correspond to a tuple/site and sample respectively.
#' @param design a design matrix created with \code{\link{model.matrix}}.
#' @param coef Column in model.matrix specifying the parameter to estimate.
#' Default = 2. If \code{contrast} specified, column with contrast of interest.
#' @param contrast a contrast matrix, generated with
#'   \code{\link{makeContrasts}}.
#' @param method The method to be used in limma's \code{\link{lmFit}}. The
#'   default is set to 'ls' but can also be set to 'robust', which is
#'   recommended on a real data set.
#' @param trend Passed to \code{\link{eBayes}}. Should an intensity-trend be
#' allowed for the prior variance? Default is that the prior variance is
#' constant, e.g. FALSE.
#' @param smooth Whether smoothing should be applied to the t-Statistics.
#'   Default = FALSE. If TRUE, wherever smoothing is not possible, the
#'   un-smoothed t-stat is used instead.
#' @param maxGap The maximum allowed gap between genomic positions for
#'   clustering of genomic regions to be used in smoothing. Default = 20.
#' @param verbose Set verbose. Default = TRUE.
#' @param filter Remove empty tstats. Default = TRUE.
#' @param ... Arguments passed to \code{\link{loessByCluster}}. Only used if
#'   \code{smooth} = TRUE.
#'
#' @return A vector of t-Statistics within the
#'   \code{RangedSummarizedExperiment}.
#' @importFrom BiocGenerics start
#' @importFrom SummarizedExperiment assays
#' 
#' @examples 
#' data(readtuples_output)
#' ASM <- calc_asm(readtuples_output)
#' grp <- factor(c(rep('CRC',3),rep('NORM',2)), levels = c('NORM', 'CRC'))
#' mod <- model.matrix(~grp)
#' tstats <- get_tstats(ASM, mod)
#'
#' @export

get_tstats <- function(sa, design, contrast = NULL, method = "ls", 
    trend = FALSE, smooth = FALSE, maxGap = 20, coef = 2, verbose = TRUE, 
    filter = TRUE, ...) {
    
    # choose SumExp type
    if (names(assays(sa))[1] == "asm") {
        asm <- assays(sa)[["asm"]]
    } else {
        asm <- assays(sa)[["der.ASM"]]
    }
    
    # moderated t-statistic using specified column in the design
    # matrix
    if (verbose) 
        message("Calculating moderated t-statistics", appendLF = TRUE)
    
    fit <- limma::lmFit(object = asm, design = design, method = method)
    if (is.matrix(contrast)) {
        fit.cont <- limma::contrasts.fit(fit, contrasts = contrast)
        fit2 <- limma::eBayes(fit.cont, trend = trend)
        
    } else {
        fit2 <- limma::eBayes(fit, trend = trend)
    }
    
    S4Vectors::mcols(sa)$tstat <- fit2$t[, coef]
    S4Vectors::mcols(sa)$p.value <- fit2$p.value[, coef]
    
    
    if (names(assays(sa))[1] == "asm") {
        midpt <- S4Vectors::mcols(sa)$midpt
    } else {
        midpt <- start(sa)
    }
    
    S4Vectors::mcols(sa)$cluster <- bumphunter::clusterMaker(
        chr = as.character(GenomeInfoDb::seqnames(sa)), 
        pos = midpt, maxGap = maxGap, assumeSorted = TRUE)
    
    # smooth moderated t-stats
    if (smooth) {
        
        if (verbose) 
            message("Smoothing moderated t-statistics", appendLF = TRUE)
        
        smooth <- bumphunter::loessByCluster(y = S4Vectors::mcols(sa)$tstat, 
            x = midpt, cluster = S4Vectors::mcols(sa)$cluster, 
            verbose = verbose, ...)
        
        S4Vectors::mcols(sa)$smooth_tstat <- smooth$fitted[, 
            1]
        
        # replace empty smoothed value for original
        S4Vectors::mcols(sa)$smooth_tstat <- ifelse(
            is.na(S4Vectors::mcols(sa)$smooth_tstat), 
            S4Vectors::mcols(sa)$tstat, S4Vectors::mcols(sa)$smooth_tstat)
    }
    
    # filter for empty tstats
    if (filter) 
        sa <- sa[!is.na(S4Vectors::mcols(sa)$tstat), ]
    
    return(sa)
}