R/CreateAbstract_Nat.R

Defines functions Defunct.How.We.Mapped.From.SP.to.SA.Sig xFixSASigNames Create.2.7a.7b.Abstract two.7a.7b.abstract.for.ludmil.2019.08.18 Create.3.5.40.Abstract three.5.40.abstract.for.ludmil.2019.08.18

Documented in Create.2.7a.7b.Abstract Create.3.5.40.Abstract xFixSASigNames

three.5.40.abstract.for.ludmil.2019.08.18 <- function() {
  seeds <- sample(10000, size = 9)
  for (seed in seeds) {
    Create.3.5.40.Abstract(
      seed = seed, overwrite = FALSE, regress.dir = NULL)
  }
}


#' Create synthetic spectra based on SBS3 SBS5 SBS40
#'
#' This function generates synthetic spectra with mutation loads of SBS3
#' (signature prevalent in ovarian adenocarcinoma),
#'  SBS5 and SBS40 (signatures prevalent in renal cell carcinoma). This
#' dataset challenges the computational approaches as these three signatures
#' are "flat" signatures hard to be extracted accurately.
#'
#' This function supersedes the second part of \code{data-raw/Create.3.5.40.Rmd}
#' in GitHub repository \code{steverozen/SynSig}. With default arguments, this function generates the same results as
#' the second half of \code{data-raw/Create.3.5.40.Rmd}.
#'
#' Data set generated by this function can be found at Synapse with Synapse ID:
#' \href{https://www.synapse.org/#!Synapse:syn18500215}{syn18500215}.
#'
#' @param seed A random seed to use.
#'
#' @param overwrite If \code{TRUE}, overwrite existing directories / files.
#'
#' @param regress.dir If not \code{NULL}, compare the result to
#' the contents of this directory with a \code{diff}.
#'
#' @param num.syn.tumors The number of tumors to create \strong{for each cancer
#'    type} in \code{cancer.types}.
#'
#' @param top.level.dir The directory in which to put the output; will
#'        be created if necessary.
#'
#' @param unlink If \code{TRUE} and \code{!is.null(regress.dir)}, then
#'       unlink the result directory if there are no differences.
#'
#' @export

Create.3.5.40.Abstract <-
  function(seed        = 44,
           overwrite   = TRUE,
           regress.dir =
             "data-raw/long.test.regression.data/syn.3.5.40.abst/",
           num.syn.tumors = 1000,
           top.level.dir  = paste0("../syn.3.5.40.abst.", seed),
           unlink         = FALSE) {

    if (is.null(top.level.dir)) {
      top.level.dir <- paste0("../syn.3.5.40.abst.", seed)
    }
    if (dir.exists(top.level.dir)) {
      if (!overwrite) stop(top.level.dir, " exists and overwrite is FALSE")
    } else {
      MustCreateDir(top.level.dir)
    }

    set.seed(seed)

    sa.kidney.exp <-
      GetExpForOneCancerType("Kidney-RCC",
                             SynSigGen::sa.no.hyper.real.exposures)
    sa.kidney.parms <- GetSynSigParamsFromExposures(sa.kidney.exp)
    sa.ovary.exp <-
      GetExpForOneCancerType("Ovary-AdenoCA",
                             SynSigGen::sa.no.hyper.real.exposures)
    sa.ovary.parms  <- GetSynSigParamsFromExposures(sa.ovary.exp)


    sp.kidney.exp <-
      GetExpForOneCancerType("Kidney-RCC",
                             SynSigGen::sp.no.hyper.real.exposures)
    sp.kidney.parms <- GetSynSigParamsFromExposures(sp.kidney.exp)
    sp.ovary.exp <-
      GetExpForOneCancerType("Ovary-AdenoCA",
                             SynSigGen::sp.no.hyper.real.exposures)
    sp.ovary.parms  <- GetSynSigParamsFromExposures(sp.ovary.exp)


    x.sp.parms <-
      cbind(sp.kidney.parms[ , c("SBS5", "SBS40")],
            sp.ovary.parms[ , "SBS3", drop = FALSE])


    x.sa.parms <-
      cbind(sa.kidney.parms[ , c("BI_COMPOSITE_SBS5_P",
                                 "BI_COMPOSITE_SBS40_P")],
            sa.ovary.parms[ , "BI_COMPOSITE_SBS3_P", drop = FALSE])

    sp.abst.info <-
      GenerateSynAbstract(
        parms            = x.sp.parms,
        num.syn.tumors   = num.syn.tumors,
        file.prefix      = NULL, # "sp",
        sample.id.prefix = "SP.Syn.Abst",
        froot            = file.path(top.level.dir, "sp"))

    sa.abst.info <-
      GenerateSynAbstract(
        parms            = x.sa.parms,
        num.syn.tumors   = num.syn.tumors,
        file.prefix      = NULL, #"sa",
        sample.id.prefix = "SA.Syn.Abst",
        froot            = file.path(top.level.dir, "sa"))

    #### Generate and write SignatureAnalyzer "abstract" 3, 5, 40 catalogs

    CreateAndWriteCatalog(
      sa.COMPOSITE.sigs,
      sa.abst.info$syn.exp,
      dir = NULL,
      WriteCatCOMPOSITE,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sa.sa.COMPOSITE"))

    CreateAndWriteCatalog(
      sa.96.sigs,
      sa.abst.info$syn.exp,
      dir = NULL,
      ICAMS::WriteCatalog,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sa.sa.96"))

    # We need to adjust the signature names in the exposures
    # so they match the signature names in \code{sa.COMPOSITE.sigs}.

    tmp.exp <- sp.abst.info$syn.exp
    rownames(tmp.exp) <- rownames(sa.abst.info$syn.exp)

    CreateAndWriteCatalog(
      sa.COMPOSITE.sigs,
      tmp.exp,
      dir = NULL,
      WriteCatCOMPOSITE,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sp.sa.COMPOSITE"))

    CreateAndWriteCatalog(
      sp.sigs,
      sp.abst.info$syn.exp,
      dir = NULL,
      ICAMS::WriteCatalog,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sp.sp"))

    if (!is.null(regress.dir)) {
      diff.result <-
        NewDiff4SynDataSets(newdir         = top.level.dir,
                            regressdirname = regress.dir,
                            unlink         = unlink,
                            verbose        = TRUE)
      if (diff.result[1] != "ok") {
        message("\nThere was a difference, investigate\n",
                paste0(diff.result, "\n"))
      } else {
        message("\nok\n")
      }
    }
  }

two.7a.7b.abstract.for.ludmil.2019.08.18 <- function() {
  seeds <- sample(10000, size = 9)
  for (seed in seeds) {
    Create.2.7a.7b.Abstract(
      seed = seed, overwrite = FALSE, regress.dir = NULL)
  }
}


#' This function supersedes the second part of \code{data-raw/Create.2.7a.7b.Rmd}.
#'
#' With default arguments, this function generates the same results as
#' the second half of \code{data-raw/Create.2.7a.7b.Rmd}.
#'
#' @keywords internal


#' Generate synthetic data sets modeled on bladder TCC and skin melanoma.
#'
#' This function generates synthetic spectra with mutation loads of SBS2
#' (signature prevalent in bladder transitional cell carcinoma),
#' SBS7a and SBS7b (UV-related signatures prevalent in skin melanoma). This
#' dataset challenges the computational approaches as SBS2 has a similar
#' pattern to the mixture of SBS7a and SBS7b, thus the existence of these
#' signatures may interfere computational approaches from accurately
#' extracting these signatures.
#'
#' This function supersedes the second part of \code{data-raw/Create.2.7a.7b.Rmd}.
#' in GitHub repository \code{steverozen/SynSig}. With default arguments, this
#' function generates the same results as the second part of
#' \code{data-raw/Create.2.7a.7b.Rmd}.
#'
#' Data set generated by this function can be found at Synapse with Synapse ID:
#' \href{https://www.synapse.org/#!Synapse:syn18500216}{syn18500216}.
#'
#' @param seed A random seed to use.
#'
#' @param overwrite If TRUE, overwrite existing directories and files.
#'
#' @param regress.dir If not \code{NULL}, compare the result to
#'    the contents of this directory with a \code{diff}.
#'
#' @param num.syn.tumors The number of tumors to create \strong{for each cancer
#'    type} in \code{cancer.types}.
#'
#' @param top.level.dir The directory in which to put the output; will
#'        be created if necessary.
#'
#' @param unlink If \code{TRUE} and \code{!is.null(regress.dir)}, then
#'       unlink the result directory if there are no differences.
#'
#' @export

Create.2.7a.7b.Abstract <-
  function(seed        = 55,
           overwrite   = TRUE,
           regress.dir =
             "data-raw/long.test.regression.data/syn.2.7a.7b.abst/",
           num.syn.tumors = 1000,
           top.level.dir = NULL,
           unlink        = FALSE) {

    if (is.null(top.level.dir)) {
      top.level.dir <- paste0("../syn.2.7a.7b.abst.", seed)
    }
    if (dir.exists(top.level.dir)) {
      if (!overwrite) stop(top.level.dir, " exists and overwrite is FALSE")
    } else {
      MustCreateDir(top.level.dir)
    }

    set.seed(seed)

    sa.bladder.exp <-
      GetExpForOneCancerType("Bladder-TCC",
                             SynSigGen::sa.no.hyper.real.exposures)
    sa.bladder.parms <- GetSynSigParamsFromExposures(sa.bladder.exp)
    sa.skin.exp <-
      GetExpForOneCancerType("Skin-Melanoma",
                             SynSigGen::sa.all.real.exposures)
    sa.skin.parms  <-
      GetSynSigParamsFromExposures(sa.skin.exp)


    sp.bladder.exp <-
      GetExpForOneCancerType("Bladder-TCC",
                             SynSigGen::sp.no.hyper.real.exposures)
    sp.bladder.parms <- GetSynSigParamsFromExposures(sp.bladder.exp)
    sp.skin.exp <-
      GetExpForOneCancerType("Skin-Melanoma",
                             SynSigGen::sp.all.real.exposures)
    sp.skin.parms  <- GetSynSigParamsFromExposures(sp.skin.exp)


    x.sp.parms <-
      cbind(sp.bladder.parms[ , "SBS2", drop = FALSE],
            sp.skin.parms[ , c("SBS7a", "SBS7b"), drop = FALSE])

    x.sa.parms <-
      cbind(sa.bladder.parms[ , "BI_COMPOSITE_SBS2_P", drop = FALSE],
            sa.skin.parms[ , c("BI_COMPOSITE_SBS7a_S", "BI_COMPOSITE_SBS7b_S"),
                           drop = FALSE])

    sp.abst.info <-
      GenerateSynAbstract(
        parms            = x.sp.parms,
        num.syn.tumors   = num.syn.tumors,
        file.prefix      = NULL, # "sp",
        sample.id.prefix = "SP.Syn.Abst",
        froot            = file.path(top.level.dir, "sp"))

    sa.abst.info <-
      GenerateSynAbstract(
        parms            = x.sa.parms,
        num.syn.tumors   = num.syn.tumors,
        file.prefix      = NULL, #"sa",
        sample.id.prefix = "SA.Syn.Abst",
        froot            = file.path(top.level.dir, "sa"))

    #### Generate and write SignatureAnalyzer catalogs

    CreateAndWriteCatalog(
      sa.COMPOSITE.sigs,
      sa.abst.info$syn.exp,
      dir = NULL,
      WriteCatCOMPOSITE,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sa.sa.COMPOSITE"))

    CreateAndWriteCatalog(
      sa.96.sigs,
      sa.abst.info$syn.exp,
      dir = NULL,
      ICAMS::WriteCatalog,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sa.sa.96"))

    # We need to adjust the signature names in the exposures
    # so they match the signature names in \code{sa.COMPOSITE.sigs}.

    tmp.exp <- sp.abst.info$syn.exp
    rownames(tmp.exp) <- rownames(sa.abst.info$syn.exp)

    CreateAndWriteCatalog(
      sa.COMPOSITE.sigs,
      tmp.exp,
      dir = NULL,
      WriteCatCOMPOSITE,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sp.sa.COMPOSITE"))

    CreateAndWriteCatalog(
      sp.sigs,
      sp.abst.info$syn.exp,
      dir = NULL,
      ICAMS::WriteCatalog,
      overwrite = overwrite,
      my.dir = file.path(top.level.dir, "sp.sp"))

    if (!is.null(regress.dir)) {
      diff.result <-
        NewDiff4SynDataSets(newdir         = top.level.dir,
                            regressdirname = regress.dir,
                            unlink         = unlink,
                            verbose        = TRUE)
      return(diff.result[1] == "ok")
    }
  }



#' Standardize SignatureAnalyzer signature names.
#'
#' For example, change \code{BI_COMPOSITE_SNV_SBS83_P}
#' to \code{BI_COMPOSITE_SBS83_P}.
#'
#' This is necessary because
#' for COMPOSITE signatures we rbind coordinated
#' "SNV", "DNP", and "INDEL" signatures.
#'
#' This is a copy of \code{FixSASigNames} in
#' package \code{SynSigEval}.
#'
#' @param sig.names Vector of signature names.
#'
#' @return Vector of signatures names with "_SNV" removed.
#'
#' @keywords internal

xFixSASigNames <- function(sig.names) {
  return(gsub("_SNV_", "_", sig.names, fixed = TRUE))
}


Defunct.How.We.Mapped.From.SP.to.SA.Sig <-
  function(rcc.sa.syn.exp, ovary.sa.syn.exp) {
    # Find mapping from SBS3, SBS5, and SBS40 to SignatureAnalyzer signatures
    # assigned to these tumor types


    ICAMSxtra::MatchSigs1Direction(
      SynSigGen::sp.sigs[ , "SBS5", drop = F],
      SynSigGen::sa.96.sigs[ , xFixSASigNames(rownames(rcc.sa.syn.exp))])

    ICAMSxtra::MatchSigs1Direction(
      SynSigGen::sp.sigs[ , "SBS40", drop = F],
      SynSigGen::sa.96.sigs[ , xFixSASigNames(rownames(rcc.sa.syn.exp))])

    ICAMSxtra::MatchSigs1Direction(
      SynSigGen::sp.sigs[ , "SBS3", drop = F],
      SynSigGen::sa.96.sigs[ , xFixSASigNames(rownames(ovary.sa.syn.exp))])

    # Both BI..SBS3 and BI..SBS39 are in every ovarian; we select BI..SBS3
  }
steverozen/SynSigGen documentation built on April 1, 2022, 8:54 p.m.