R/extract_signatures.R

Defines functions extract_signatures

Documented in extract_signatures

#' Extract mutational signatures from 96 mutation matrix using NMF
#'
#' Decomposes trinucleotide count matrix into signatures and contribution of
#' those signatures to the spectra of the samples/vcf files.
#'
#' @param mut_matrix 96 mutation count matrix
#' @param rank Number of signatures to extract
#' @param nrun Number of iterations, default = 200.
#' A lower number will be faster, but result in less accurate results.
#' @param nmf_type Type of NMF to be used.
#'              Possible values:
#'              * 'regular'
#'              * 'variational_bayes'
#' The 'regular' method comes from the NMF package.
#' The 'variational_bayes' method comes from the ccfindR package.
#' This method uses bayesian inference, which makes it easier to determine the
#' mathmatically optimal number of signatures.
#' @param single_core Boolean. If TRUE, it forces the NMF algorithm to
#' use only a single core. This can sometimes prevent issues.
#' Doesn't apply to variational-bayes NMF
#'
#' @return Named list of mutation matrix, signatures and signature contribution
#'
#' @import NMF
#'
#' @examples
#' ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
#' mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## This function is computationally intensive.
#' # nmf_res <- extract_signatures(mut_mat, rank = 2)
#'
#' ## It's also possible to use a variational Bayes method.
#' ## It requires the ccfindR package to work.
#' # nmf_res <- extract_signatures(mut_mat, rank = 2, nmf_type = "variational_bayes")
#' @seealso
#' \code{\link{mut_matrix}}
#'
#' @export

extract_signatures <- function(mut_matrix, rank, nrun = 200, nmf_type = c("regular", "variational_bayes"), single_core = FALSE) {
  # Match argument
  nmf_type <- match.arg(nmf_type)

  # Add a small pseudocount to avoid features with zero counts.
  mut_matrix <- as.matrix(mut_matrix) + 0.0001

  # Make sure the rank_range is valid.
  if (!(rank > 0 & rank == round(rank))) {
    stop("Rank should be a positive integer", call. = FALSE)
  }

  if (ncol(mut_matrix) < max(rank)) {
    stop(paste0(
      "The rank should be smaller than the number of ",
      "samples in the input matrix."
    ), call. = FALSE)
  }

  if (nmf_type == "regular") {
    # Calculate NMF
    if (single_core){
      res <- NMF::nmf(mut_matrix, rank = rank, method = "brunet", nrun = nrun, seed = 123456, .opt = "v-p")
    } else{
      res <- NMF::nmf(mut_matrix, rank = rank, method = "brunet", nrun = nrun, seed = 123456)
    }
    # Find signatures and contribution of signatures
    signatures <- NMF::basis(res)
    contribution <- NMF::coef(res)
  } else {
    if (!requireNamespace("ccfindR", quietly = TRUE)) {
      stop(paste0(
        "Package 'ccfindR' is needed for variational_bayes to work. ",
        "Please either install it or use the regular NMF."
      ), call. = FALSE)
    }
    sc <- ccfindR::scNMFSet(count = mut_matrix)
    res <- ccfindR::vb_factorize(sc, ranks = rank, nrun = nrun, progress.bar = FALSE, verbose = 0)
    # estimate = ccfindR::vb_factorize(sc, ranks = 2:7, nrun = nrun, progress.bar = FALSE, verbose = 0)
    # plot(estimate)
    # optimal_rank(sb)
    signatures <- ccfindR::basis(res)[[1]]
    contribution <- ccfindR::coeff(res)[[1]]
  }
  # Reconstruct mutation matrix
  reconstructed <- signatures %*% contribution
  return(list(
    signatures = signatures,
    contribution = contribution,
    reconstructed = reconstructed
  ))
}

Try the MutationalPatterns package in your browser

Any scripts or data that you put into this service are public.

MutationalPatterns documentation built on Nov. 14, 2020, 2:03 a.m.