R/GenerateNoisyTumors.R

Defines functions GenerateNoisyTumors

Documented in GenerateNoisyTumors

#' Generate noisy tumors from available exposures
#'
#' @param seed A random seed to use.
#'
#' @param dir The directory in which to put the output; will be created if
#'   necessary.
#'
#' @param input.exposure A matrix of exposures.
#'
#' @param signatures A matrix of signatures.
#'
#' @param n.binom.size If non \code{NULL}, use negative binomial noise
#'     with this size parameter; see \code{\link[stats]{NegBinomial}}.
#'     If \code{NULL}, then use Poisson distribution to do the resampling.
#'
#' @param overwrite If TRUE, overwrite existing directories and files.
#'
#' @return A list with the elements \describe{
#' \item{exposures}{The numbers of mutations due to each signature
#'    after adding noise}
#' \item{spectra}{The spectra based on the noisy signature exposures.}
#' }
#'
#' @export
#'
#' @examples
#'
#' # Generate synthetic tumors for Indel (ID) using negative binomial distribution
#' input.sigs.ID <- cosmicsig::COSMIC_v3.2$signature$GRCh37$ID
#' real.exposures.ID <- PCAWG7::exposure$PCAWG$ID
#' cancer.types <- PCAWG7::CancerTypes()[1:5]
#' ID.synthetic.tumors <-
#'   GenerateSyntheticTumors(seed = 191906,
#'                           dir = file.path(tempdir(), "ID.synthetic.tumors"),
#'                           cancer.types = cancer.types,
#'                           samples.per.cancer.type = 30,
#'                           input.sigs = input.sigs.ID,
#'                           real.exposures = real.exposures.ID,
#'                           distribution = "neg.binom",
#'                           sample.prefix.name = "SP.Syn."
#'   )
#'
#' # Add noise to the exposures
#' ID.noisy.tumors <-
#'   GenerateNoisyTumors(seed = 892513,
#'                       dir = file.path(tempdir(), "ID.noisy.tumors"),
#'                       input.exposure = ID.synthetic.tumors$ground.truth.exposures,
#'                       signatures = ID.synthetic.tumors$ground.truth.signatures,
#'                       n.binom.size = 1)
#'
#' # Plot the synthetic and noisy catalog and exposures
#' ICAMS::PlotCatalogToPdf(catalog = ID.synthetic.tumors$ground.truth.catalog,
#'                         file = file.path(tempdir(), "ID.synthetic.catalog.pdf"))
#' mSigAct::PlotExposureToPdf(exposure = ID.synthetic.tumors$ground.truth.exposures,
#'                            file = file.path(tempdir(), "ID.synthetic.exposures.pdf"),
#'                            cex.xaxis = 0.7)
#' ICAMS::PlotCatalogToPdf(catalog = ID.noisy.tumors$spectra,
#'                         file = file.path(tempdir(), "ID.noisy.catalog.pdf"))
#' mSigAct::PlotExposureToPdf(exposure = ID.noisy.tumors$exposures,
#'                            file = file.path(tempdir(), "ID.noisy.exposures.pdf"),
#'                            cex.xaxis = 0.7)
#'
GenerateNoisyTumors <-
  function(seed, dir, input.exposure, signatures,
           n.binom.size = NULL, overwrite = TRUE) {
    # Set seed using R's default random number generator kind "Mersenne-Twister"
    set.seed(seed = seed, kind = "Mersenne-Twister")
    retval <- SynSigGen::AddNoise(input.exposure = input.exposure,
                                  signatures = signatures,
                                  n.binom.size = n.binom.size)

    if (overwrite == TRUE) {
      dir.create(path = dir, showWarnings = FALSE)
    } else {
      stop("\nDirectory ", dir, " exists\n")
    }

    # Get the mutation type of the noisy data
    mutation.type <- GetMutationType(sig.name = colnames(signatures))

    mSigAct::WriteExposure(exposure = retval$exposures,
                           file = file.path(dir,
                                            paste0("ground.truth.syn.exposures.noisy.neg.binom.size.",
                                                   n.binom.size, ".", mutation.type, ".csv")))
    ICAMS::WriteCatalog(catalog = ICAMS::as.catalog(retval$spectra),
                        file = file.path(dir,
                                         paste0("ground.truth.syn.catalog.noisy.neg.binom.size.",
                                                n.binom.size, ".", mutation.type, ".csv")))

    exposed.sigs <- rownames(retval$exposures)

    # VERY IMPORTANT, the next statement guarantees that
    # the order of signatures in rows of exposures is the same as
    # the order of columns in signatures. In addition,
    # it ensure that signatures contains only signatures
    # that are present in exposures.
    #
    signatures <- signatures[ , exposed.sigs, drop = FALSE]

    ICAMS::WriteCatalog(catalog = signatures,
                        file = file.path(dir,
                                         paste0("ground.truth.syn.sigs", ".", mutation.type, ".csv")))
    return(retval)
  }
steverozen/SynSigGen documentation built on April 1, 2022, 8:54 p.m.