R/differentialNbinomWaldTestUMI4C.R

Defines functions nbinomWaldTestUMI4C differentialNbinomWaldTestUMI4C

Documented in differentialNbinomWaldTestUMI4C nbinomWaldTestUMI4C

#' Differential UMI4C contacts using DESeq2 Wald Test
#' 
#' Using a \code{UMI4C} object, infers the differences between conditions 
#' specified in \code{design} of the smooth monotone fitted values using a 
#' Wald Test from \code{DESeq2} package. 
#' @param umi4c UMI4C object as generated by \code{makeUMI4C} or the
#' \code{UMI4C} constructor.
#' @param design A \code{formula} or \code{matrix}. The formula expresses how
#' the counts for each fragment end depend on the variables in \code{colData}.
#' See  \code{\link[DESeq2]{DESeqDataSet}}.
#' @param query_regions \code{GRanges} object or \code{data.frame} containing
#' the coordinates of the genomic regions you want to use to perform the
#' analysis in specific genomic intervals. Default: NULL.
#' @param normalized Logical indicating if the function should return normalized
#'  or raw UMI counts.  Default: TRUE.
#' @param padj_method The method to use for adjusting p-values, see
#' \code{\link[stats]{p.adjust}}.  Default: fdr.
#' @param padj_threshold Numeric indicating the adjusted p-value threshold to
#' use to define significant differential contacts.  Default: 0.05.
#' @param alpha Approximate number of fragments desired for every basis function
#'  of the B-spline basis. \code{floor((max(number of fragments)) / alpha)} is 
#'  passed to \code{create.bspline.basis} as nbasis argument. 4 is the minimum
#'  allowed value. Default: 20.
#' @param penalty Amount of smoothing to be applied to the estimated functional 
#' parameter.  Default: 0.1.
#' @return \code{UMI4C} object with the DESeq2 Wald Test results.
#' @details This function fits the signal trend of a variance stabilized count 
#' values using a symmetric monotone fit for the distance dependency. Then 
#' scales the raw counts across the samples to obtain normalized factors. 
#' Finally, it detects differences between conditions applying the DESeq2 Wald 
#' Test.
#' @import GenomicRanges
#' @importFrom stats formula predict
#' @examples
#' \dontrun{
#'  files <- list.files(system.file("extdata", "CIITA", "count", package="UMI4Cats"),
#'                      pattern = "*_counts.tsv.gz",
#'                      full.names = TRUE
#'  )
#' # Create colData including all relevant information
#' colData <- data.frame(
#'   sampleID = gsub("_counts.tsv.gz", "", basename(files)),
#'   file = files,
#'   stringsAsFactors = FALSE
#' )
#' 
#' library(tidyr)
#' colData <- colData %>%
#'   separate(sampleID,
#'            into = c("condition", "replicate", "viewpoint"),
#'            remove = FALSE
#'   )
#'   
#' # Make UMI-4C object including grouping by condition
#' umi <- makeUMI4C(
#'   colData = colData,
#'   viewpoint_name = "CIITA",
#'   grouping = NULL,
#'   bait_expansion = 2e6
#' )
#' 
#' umi_wald <- differentialNbinomWaldTestUMI4C(umi4c=umi,
#'                                             design=~condition,
#'                                             alpha = 100)
#' }
#' @export
differentialNbinomWaldTestUMI4C <- function(umi4c,
                                            design=~condition,
                                            normalized=TRUE,
                                            padj_method="fdr",
                                            query_regions=NULL,
                                            padj_threshold=0.05,
                                            penalty=0.1,
                                            alpha=20){
  stop("This function is deprecated, please use waldUMI4C() instead.")
  
  # umi4c object to DDS object
  dds <- UMI4C2dds(umi4c=umi4c,
                   design=~condition)
  
  # variance stabilizing transformation
  dds <- vstUMI4C(dds=dds)
  
  # monotone smoothing of the DDS object VST counts
  dds <- smoothMonotoneUMI4C(dds=dds,
                             alpha=alpha,
                             penalty=penalty)
  
  # differential contacts using DESeq2 Wald Test
  dds <- nbinomWaldTestUMI4C(dds=dds,
                             query_regions = query_regions)
  
  # DDS object to umi4c object
  umi4c <- dds2UMI4C(umi4c=umi4c,
                     dds=dds,
                     # design=~condition,
                     normalized=normalized,
                     padj_method=padj_method,
                     padj_threshold=padj_threshold)
  
  return(umi4c)
}


#' Differential UMI4C contacts using DESeq2 Wald Test
#' 
#' Takes the smooth monotone fit count values and infers the differential UMI4C 
#' contacts using DESeq2 Wald Test from 
#' \code{DESeq2} package. 
#' @param dds DDS object as generated by \code{smoothMonotoneUMI4C} with the 
#' smooth monotone fit counts
#' @inheritParams differentialNbinomWaldTestUMI4C
#' @return DDS object with the DESeq2 Wald Test results, 
#' with results columns accessible with the \code{results} function.
#' @details This function back-transform fitted values to the
#' scale of raw counts and scale them across the samples to
#' obtain normalized factors using \code{normalizationFactors} from 
#' \code{DESeq2} package. To detect differences between conditions, the DESeq2 
#' 
nbinomWaldTestUMI4C <- function(dds,
                                query_regions = NULL){
  if (!c("fit") %in% names(assays(dds))) 
    stop("No assay 'fit' found. Check your input.")
  
  message(paste(
    paste0("\n[", Sys.time(), "]"),
    "Starting nbinomWaldTestUMI4C using:\n",
    "> Samples of DDS object:\n", paste(colnames(dds), sep="", collapse=", "), "\n"
  ))
  
  # Select only fragments in query_regions (if provided)
  if(!is.null(query_regions)) dds <- subsetByOverlaps(dds, query_regions)
  
  # define inverse vst function
  coefs <- attr(DESeq2::dispersionFunction(dds), "coefficients")
  attr(dds, "inverse-vst") <- function(q) {
    (4 * coefs["asymptDisp"] * 2^q - (1 + coefs["extraPois"]))^2/(4 * 
                                                                    coefs["asymptDisp"] * (1 + coefs["extraPois"] +                                                                                          (4 * coefs["asymptDisp"] * 2^q - (1 + coefs["extraPois"]))))
  }
  
  # backtransform fitted counts
  fit <-  attr(dds, "inverse-vst")(assays(dds)[["fit"]])
  # get normalization factors
  loggeomeans <- rowMeans(log(fit))
  normFactors <- exp(log(fit) - loggeomeans)
  DESeq2::normalizationFactors(dds) <- as.matrix(normFactors)
  # wald test
  design(dds) <- formula(~condition)
  dds <- try(estimateDispersions(dds))
  dds <- DESeq2::nbinomWaldTest(dds)
  message(
    paste0("[", Sys.time(), "] "),
    "Finished sample nbinomWaldTestUMI4C"
  )
  
  return(dds)
}
Pasquali-lab/UMI4Cats documentation built on Nov. 3, 2024, 3:10 p.m.