knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Overview and rationale

Checking for interference between low-burden and high-burden signatures, using MMR deficient tumours as the source of high-burden signatures.

We will need to run SignatureAnalyzer first on the 500 tumors with neither SBS26 nor SBS44, and then on the "hyper-mutated tumors, which contain SBS1, SBS5, SBS26 and and SBS44.

We create two directories for SignatureAnayzer: one directory for the SBS1 and SBS5-only tumors, one for the "hypermutated" tumors with SBS1, SBS5, SBS27, and SBS44.

We create one directory for SigProfiler, containing a synthetic catlog containing both the non-hyper and hyper tumors.

Required libraries

library(ICAMS)
library(SynSigGen)

Get actual exposures for high-mutational burden signatures, SBS26 and SBS44

maybe.hyper.samples <-
  setdiff(colnames(sa.all.real.exposures),
          colnames(sa.no.hyper.real.exposures))

maybe.hyper.colorectal <-
  grep("ColoRect-AdenoCA::",maybe.hyper.samples,
       fixed = TRUE, value = TRUE)

maybe.hyper.uterus <-
  grep("Uterus-AdenoCA::",maybe.hyper.samples,
       fixed = TRUE, value = TRUE)

maybe.hyper <- sp.all.real.exposures[ , c(maybe.hyper.colorectal, maybe.hyper.uterus)]
maybe.hyper2 <- maybe.hyper[    , maybe.hyper["SBS10a", ] < 10]

 # SBS14 is Combined POLE and MMR deficient, so we do not use it.
sp.hyper.real.exp <- maybe.hyper2[    , maybe.hyper2["SBS14", ] < 10]
# Look for SBS44, SBS26; SBS6 is rare

sa.hyper.real.exp <- sa.all.real.exposures[ , colnames(sp.hyper.real.exp)]

sp.hyper.real.exp <- sp.hyper.real.exp[c("SBS26", "SBS44"), ]
sa.hyper.real.exp <-
  sa.hyper.real.exp[c("BI_COMPOSITE_SBS26_S", "BI_COMPOSITE_SBS44_S"), ]

Compute parameters and create synthetic exposures for high-burden signatures

\code{SAAndSPSynDataOneCAType} will always include at least 1 non-zero exposure in the the synthetic exposure output.

We put do this analysis in \code{../../syn.hyper.START}, and we will \strong{not} use the information in this directory later.

START.dir.root <- "syn.hyper.low.START"
SetNewOutDir(paste0("tmp.", START.dir.root),
             overwrite = TRUE # For testing and development
             )


set.seed(191908)
num.syn.tumors <- 500

hyper.info <- 
  SAAndSPSynDataOneCAType(
    sa.hyper.real.exp,
    sp.hyper.real.exp,
    ca.type = NULL, # Do not filter by cancer type
    num.syn.tumors,
    file.prefix = "HYPER")

Double the exposures of the hyper tumors.

hyper.info$sa.syn.exp <- hyper.info$sa.syn.exp * 2
hyper.info$sp.syn.exp <- hyper.info$sp.syn.exp * 2

Get actual exposures for low-mutational burden signatures, SBS1 and SBS2

no.hyper.colorectal <-
  grep("ColoRect-AdenoCA::", colnames(sa.no.hyper.real.exposures),
       fixed = TRUE, value = TRUE)

sp.no.hyper.colorectal <- sp.all.real.exposures[ , no.hyper.colorectal]
mean(sp.no.hyper.colorectal["SBS1", ])
mean(sp.no.hyper.colorectal["SBS5", ])

no.hyper.uterus <-
  grep("Uterus-AdenoCA::", colnames(sa.no.hyper.real.exposures),
       fixed = TRUE, value = TRUE)
sp.no.hyper.uterus <- sp.all.real.exposures[ , no.hyper.uterus]
mean(sp.no.hyper.uterus["SBS1", ])
mean(sp.no.hyper.uterus["SBS5", ])

no.hyper.colnames <- c(no.hyper.colorectal, no.hyper.uterus)

sp.no.hyper.real.exp <-
  sp.all.real.exposures[c("SBS1", "SBS5"), no.hyper.colnames]
stopifnot(dim(sp.no.hyper.real.exp) == c(2, 81)) # sanity check

sa.no.hyper.real.exp <-
  sa.all.real.exposures[
    c("BI_COMPOSITE_SBS1_P", "BI_COMPOSITE_SBS5_P"),
    no.hyper.colnames]

There is no BI_COMPOSITE_SBS5_P in the SignatureAnalyzer exposures, so we take the exposures from SBS5

sa.no.hyper.real.exp["BI_COMPOSITE_SBS5_P", ] <- sp.no.hyper.real.exp["SBS5", ]
# Alternative approach, not used
if (FALSE) {
  sa.no.hyper.real.exp2 <- sa.no.hyper.real.exp[rowSums(sa.no.hyper.real.exp) > 0, ]
  sp.sa.map.info <-
    MapSPToSASignatureNamesInExposure(
      sp.exposures = sp.no.hyper.real.exp,
      sa.sig.names.to.consider = rownames(sa.no.hyper.real.exp2))

  knitr::kable(
    sp.sa.map.info$sp.to.sa.sig.match, 
    caption =
      'Best matches from SP signatures to SA signatures with exposures in hyper tumors',
    digits = 4)
  sa.no.hyper.real.exp3 <- sa.no.hyper.real.exp2[sp.sa.map.info$sp.to.sa.sig.match$to, ]
}
# View(sa.all.real.exposures[ , no.hyper.colnames])
stopifnot(dim(sa.no.hyper.real.exp) == c(2, 81)) # sanity check

Compute parameters and create synthetic exposures for low-burden signatures

num.syn.tumors <- 500

no.hyper.syn.info <- 
  SAAndSPSynDataOneCAType(
    sa.no.hyper.real.exp,
    sp.no.hyper.real.exp,
    ca.type = NULL, # Do not filter by cancer type
    num.syn.tumors,
    file.prefix = "NO.HYPER")

Exposures for a set of ~500 tumours with SBS1, SBS5, SBS26, and SBS44

Most tumors have a mix of all 4 signatures. This reflects the SignatureAnalyzer strategy of dealing with the "hypermutated" tumours separately.

Combine the high and low burden synthetic exposures.

sa.hyper.syn.exp <-
  rbind(hyper.info$sa.syn.exp,
        no.hyper.syn.info$sa.syn.exp)

sp.hyper.syn.exp <-
  rbind(hyper.info$sp.syn.exp,
        no.hyper.syn.info$sp.syn.exp)

Remove hyper-mutated exposures that contain neither SBS26 nor SBS44

sa.to.keep <-
  (sa.hyper.syn.exp["BI_COMPOSITE_SBS26_S", ] >= 1) |
  (sa.hyper.syn.exp["BI_COMPOSITE_SBS44_S", ] >= 1)

sa.hyper.syn.exp <- sa.hyper.syn.exp[ , sa.to.keep]

sp.to.keep <-
  (sp.hyper.syn.exp["SBS26", ] >= 1 ) |
  (sp.hyper.syn.exp["SBS44", ] >= 1)

sp.hyper.syn.exp <- sp.hyper.syn.exp[ , sp.to.keep]

Data set for SA -- one (SBS1, 5) catalog, one (SBS1, 5, 26, 44) catalog

Catalogs for 500 tumors with only SBS1, 5

Create catalogs based on SignatureAnalyzer attributions

sa.primary.dir.root <- "syn.hyper.low.sa.primary" 
SetNewOutDir(paste0("tmp.", sa.primary.dir.root),
             overwrite = TRUE # For testing and development
             )

overwrite = TRUE

CreateAndWriteCatalog(
  sa.COMPOSITE.sigs,
  no.hyper.syn.info$sa.syn.exp,
  "sa.sa.COMPOSITE",
  WriteCatCOMPOSITE,
  overwrite = overwrite)

CreateAndWriteCatalog(
  sa.96.sigs,
  no.hyper.syn.info$sa.syn.exp,
  "sa.sa.96",
  WriteCatalog,
  overwrite = overwrite)

Create synthetic catalogs based on SigProfiler attributions

sp.no.hyper.exp.renamed <- no.hyper.syn.info$sp.syn.exp
rownames(sp.no.hyper.exp.renamed) <- rownames(no.hyper.syn.info$sa.syn.exp) 

CreateAndWriteCatalog(
  sa.COMPOSITE.sigs,
  sp.no.hyper.exp.renamed,
  "sp.sa.COMPOSITE",
  WriteCatCOMPOSITE,
  overwrite = overwrite)
CreateAndWriteCatalog(
  sp.sigs,
  no.hyper.syn.info$sp.syn.exp,
  "sp.sp",
  WriteCatalog,
  overwrite = overwrite)

Catalogs for 500 tumors with SBS1, 5, 26, 44

Create catalogs based on SignatureAnalyzer attributions

sa.secondary.dir.root <- "syn.hyper.low.sa.secondary"
SetNewOutDir(paste0("tmp.", sa.secondary.dir.root),
             overwrite = TRUE # For testing and development
             )

CreateAndWriteCatalog(
  sa.COMPOSITE.sigs,
  sa.hyper.syn.exp,
  "sa.sa.COMPOSITE",
  WriteCatCOMPOSITE,
  overwrite = overwrite)

CreateAndWriteCatalog(
  sa.96.sigs,
  sa.hyper.syn.exp,
  "sa.sa.96",
  WriteCatalog,
  overwrite = overwrite)

Create synthetic catalogs based on SigProfiler attributions

sp.hyper.exp.renamed <- sp.hyper.syn.exp
rownames(sp.hyper.exp.renamed) <- rownames(sa.hyper.syn.exp) 
CreateAndWriteCatalog(
  sa.COMPOSITE.sigs,
  sp.hyper.exp.renamed,
  "sp.sa.COMPOSITE",
  WriteCatCOMPOSITE,
  overwrite = overwrite)
CreateAndWriteCatalog(
  sp.sigs,
  sp.hyper.syn.exp,
  "sp.sp",
  WriteCatalog,
  overwrite = overwrite)

Generate data for SigProfiler

Write the catalogs for SP

sp.dir.root <- "syn.hyper.low.sp"
SetNewOutDir(paste0("tmp.", sp.dir.root),
             overwrite = TRUE # For testing and development
             )

sa.merge.syn.exp <-
  MergeExposures(
    list(no.hyper.syn.info$sa.syn.exp, sa.hyper.syn.exp))

sa.tmp <-
CreateAndWriteCatalog(
  sa.96.sigs,
  sa.merge.syn.exp,
  "sa.sa.96",
  WriteCatalog,
  overwrite = overwrite)
sp.merge.syn.exp <-
  MergeExposures(
    list(no.hyper.syn.info$sp.syn.exp, sp.hyper.syn.exp))

sp.tmp <-
CreateAndWriteCatalog(
  sp.sigs,
  sp.merge.syn.exp,
  "sp.sp",
  WriteCatalog,
  overwrite = overwrite)

Check and unlink the results

Set unlink = FALSE if you want to keep the results. Return values are "ok" on success.

Diff4SynDataSets(START.dir.root, unlink = TRUE)
Diff4SynDataSets(sa.primary.dir.root, unlink = TRUE)
Diff4SynDataSets(sa.secondary.dir.root, unlink = TRUE)
Diff4SynDataSets(sp.dir.root, unlink = TRUE)


steverozen/SynSigGen documentation built on April 1, 2022, 8:54 p.m.