R/utils.R

Defines functions dds2UMI4C UMI4C2dds combineUMI4C .makeSlidingWindow .makeFixedWindow makeWindowFragments

Documented in combineUMI4C dds2UMI4C makeWindowFragments UMI4C2dds

#' Make windows merging restriction fragments
#' 
#' Use a set of continuous restriction fragments to generate windows containing
#' a fixed number of fragments (n_frags).
#' @param input Input object containing the restriction fragments. Should be 
#' class UMI4C (rowRanges will be extracted) or class GRanges.
#' @param n_frags Number of fragments to use for generating the windows. This 
#' should include restriction fragments with 0 counts (Default: 8).
#' @param sliding Numeric indicating the factor for generating sliding windows.
#' If set to 1 (default) will use fixed windows. If set to > 0 and < 1 will use
#' n_frags * sliding fragments to generate sliding windows.
#' @return A GRanges object containing the windows of merged restriction 
#' fragments.
#' @export
#' @examples
#' data("ex_ciita_umi4c")
#' 
#' # Without sliding windows
#' win_frags <- makeWindowFragments(ex_ciita_umi4c, n_frags=30, sliding=1)
#' win_frags
#' 
#' # With sliding windows (n_frags*sliding)
#' win_frags <- makeWindowFragments(ex_ciita_umi4c, n_frags=30, sliding=0.5)
#' win_frags
makeWindowFragments <- function(input, 
                                n_frags=8,
                                sliding=1) {
  if (isClass(input, "UMI4C")) frags <- rowRanges(input)
  else if (isClass(input, "GRanges")) frags <- input
  else stop("Input object should be of class 'UMI4C' or 'GRanges'")
  
  frags <- frags[order(frags)]
  start_upstream <- frags$id_contact[frags$position=="upstream" & start(frags) == max(start(frags[frags$position=="upstream"]))]
  start_downstream <- frags$id_contact[frags$position=="downstream" & start(frags) == min(start(frags[frags$position=="downstream"]))]
  
  if (sliding==1) window_frags <- .makeFixedWindow(frags, n_frags, start_upstream, start_downstream)
  else if (sliding < 1 & sliding > 0) window_frags <- .makeSlidingWindow(frags, n_frags, start_upstream, start_downstream, sliding)
  else stop("Sliding value should be 1 (no sliding) or > 0 & < 1 for sliding.")
  
  return(window_frags)
}

.makeFixedWindow <- function(frags,
                             n_frags,
                             start_upstream,
                             start_downstream) {
  ## Upstream
  up1 <- seq(grep(start_upstream, frags$id_contact), 1, by=-n_frags)
  up2 <- c(up1[-1]+1, 1)
  frags_upstream <- GRanges(seqnames = unique(seqnames(frags)),
                            ranges = IRanges(start=start(frags[up2]), end=end(frags[up1])),
                            mcols= data.frame("id"=paste0("window_UP_", seq_len(length(up1))),
                                              "position"="upstream"))
  
  ## Downstream
  dwn1 <- seq(grep(start_downstream, frags$id_contact), length(frags), by=n_frags)
  dwn2 <- c(dwn1[-1]-1, length(frags))
  frags_downstream <- GRanges(seqnames = unique(seqnames(frags)),
                              ranges = IRanges(start=start(frags[dwn1]), end=end(frags[dwn2])),
                              mcols= data.frame("id"=paste0("window_DOWN_", seq_len(length(dwn1))),
                                                "position"="downstream"))
  
  ## Merge
  window_frags <- c(frags_upstream, frags_downstream)
  window_frags <- window_frags[order(window_frags)]
  
  return(window_frags)
}

.makeSlidingWindow <- function(frags,
                               n_frags,
                               start_upstream,
                               start_downstream,
                               sliding) {
  
  ## Upstream
  up1 <- seq(grep(start_upstream, frags$id_contact), 1, by=-(n_frags*sliding))
  up2 <- up1 - n_frags + 1
  up2[up2<=0] <- 1
  
  frags_upstream <- GRanges(seqnames = unique(seqnames(frags)),
                            ranges = IRanges(start=start(frags[up2]), end=end(frags[up1])),
                            mcols= data.frame("id"=paste0("window_UP_", seq_len(length(up1))),
                                              "position"="upstream"))
  
  ## Downstream
  dwn1 <- seq(grep(start_downstream, frags$id_contact), length(frags), by=n_frags*sliding)
  dwn2 <-dwn1 + n_frags - 1
  dwn2[dwn2 > length(frags)] <- length(frags)
  
  frags_downstream <- GRanges(seqnames = unique(seqnames(frags)),
                              ranges = IRanges(start=start(frags[dwn1]), end=end(frags[dwn2])),
                              mcols= data.frame("id"=paste0("window_DOWN_", seq_len(length(dwn1))),
                                                "position"="downstream"))
  
  ## Merge
  window_frags <- c(frags_upstream, frags_downstream)
  window_frags <- window_frags[order(window_frags)]
  
  return(window_frags)
}

#' Combine UMI4C fragments 
#' 
#' Combine the UMI4C fragments that overlap a given set of \code{query_regions}.
#' @param umi4c UMI4C object as generated by \code{\link{makeUMI4C}} or the
#' \code{UMI4C} constructor.
#' @param query_regions \code{GRanges} object containing the coordinates of the 
#' genomic regions for combining restriction fragments.
#' @return \code{UMI4C} object with rowRanges corresponding to query_regions and
#' assay containing the sum of raw UMI counts at each specified \code{query_region}.
#' @export
#' @examples 
#' data("ex_ciita_umi4c")
#' 
#' wins <- makeWindowFragments(ex_ciita_umi4c)
#' umi_comb <- combineUMI4C(ex_ciita_umi4c, wins)
combineUMI4C <- function(umi4c,
                         query_regions) {
  query_regions <- query_regions[order(query_regions)]
  
  matrix <- assay(umi4c)
  rowranges <- rowRanges(umi4c)
  
  hits <- findOverlaps(rowranges, query_regions)
  
  # Change id for mcol 4
  ids <- split(mcols(rowranges)[queryHits(hits),1], subjectHits(hits))
  
  mat_sp <- lapply(ids, function(x) matrix[x,])
  mat_sum <- lapply(mat_sp, function(x) if(is.null(dim(x))) x else colSums(x))
  mat_final <- do.call(rbind, mat_sum)
  
  umi4c_comb <- UMI4C(colData=colData(umi4c),
                      rowRanges=unique(query_regions[subjectHits(hits)]),
                      assays=SimpleList(umi = mat_final),
                      metadata=metadata(umi4c))
  rownames(umi4c_comb) <- unique(mcols(query_regions)[subjectHits(hits),1])
  
  return(umi4c_comb)
}

#' UMI4Cats object to DDS object.
#' 
#' Transforms an UMI4C object to a DDS object
#' @inheritParams differentialNbinomWaldTestUMI4C
#' @return DDS object.
#' @import GenomicRanges
UMI4C2dds <- function(umi4c,
                      design = ~condition){
  
  stopifnot(is(umi4c, "UMI4C"))
  
  # transform UMI4Cats object to DDS
  dds <- DESeq2::DESeqDataSetFromMatrix(
    countData = assays(umi4c)$umi,
    colData = colData(umi4c),
    rowRanges = rowRanges(umi4c),
    metadata = metadata(umi4c),
    design = ~ condition)
  
  rowRanges(dds) <- rowRanges(umi4c)
  colnames(mcols(rowRanges(dds)))[1] <- "id"
  
  return(dds)
}

#' DDS object to UMI4Cats object.
#' 
#' Transforms an DDS object to a UMI4C object after applying 
#' \code{nbinomWaldTestUMI4C}.
#' @inheritParams differentialNbinomWaldTestUMI4C
#' @param dds DDS object as generated by \code{nbinomWaldTestUMI4C} 
#' with the DESeq2 Wald Test results
#' @return UMI4C object with the DESeq2 Wald Test results.
dds2UMI4C <- function(umi4c,
                      dds,
                      normalized = TRUE,
                      padj_method = "fdr",
                      padj_threshold = 0.05) {
  
  res <- DESeq2::results(dds,
                         pAdjustMethod = padj_method
  )
  
  res <- data.frame(res[, c(5, 2, 6)])
  res$query_id <- rownames(res)
  res$sign <- FALSE
  res$sign[res$padj <= padj_threshold] <- TRUE
  
  counts <- as.data.frame(counts(dds, normalized = normalized))
  counts$query_id <- rownames(counts)
  counts <- counts[, c(ncol(counts), seq_len(ncol(counts) - 1))]
  
  umi4c@results <- S4Vectors::SimpleList(
    test = "DESeq2 Test based on the Negative Binomial distribution",
    ref = DESeq2::resultsNames(dds)[2],
    padj_threshold = padj_threshold,
    results = res[, c(4, 1, 2, 3, 5)],
    query = rowRanges(dds),
    counts = counts
  )
  
  return(umi4c)
}
Pasquali-lab/UMI4Cats documentation built on March 23, 2024, 9:42 p.m.