knitr::opts_chunk$set(echo = TRUE)

Introduction

This is an example of background subtraction using the mSigBG package.

In this example we subtract the background signatures of HepG2 and MCF-10A cells from the spectra of cells treated with three platinum-based chemotherapies.

r ifelse(params$verbose, "## Libraries and helper functions", "")

# CRAN packages
library(ICAMS)
require(philentropy)
require(factoextra)
require(gplots)
require(nloptr)

# Bioconductor
library(BSgenome.Hsapiens.1000genomes.hs37d5)
# If not available,
# install.packages("BiocManager")
# BiocManager::install("BSgenome.Hsapiens.1000genomes.hs37d5")

# Other
library(mSigBG)
# if not available
# remotes::install_github("steverozen/mSigBG", ref = "1.0-branch")
library(PCAWG7)
# if not available
# remotes::install_github("steverozen/PCAWG7")
# Plot a single catalog
pcat1 <- function(catalog, ylim = NULL) {
  par(pin = c(5, 1))
  par(mar = c(5, 4, 5, 4))
  par(cex = 0.8)
  par(cex.main = 1.4)
  bp <- ICAMS::PlotCatalog(catalog[ , 1, drop = FALSE],
                           upper   = TRUE,
                           xlabels = TRUE,
                           ylim    = ylim
                           )
  return(invisible(bp$plot.object))
}
pcat <- function(catalog) {
  par(pin = c(3*ncol(catalog), 1))
  par(mfrow = c(ncol(catalog), 1))
  par(mar = c(2, 4, 4, 2))
  par(cex = 0.8)
  par(cex.main = 1.4) 
  xlabels <- FALSE
  for (i in 1:ncol(catalog)) {
    if (FALSE && i == ncol(catalog)) { # FALSE an experiment
      xlabels <- TRUE
      par(mar = c(4.5, 4, 3, 2))
    }

    ICAMS::PlotCatalog(catalog[ , i, drop = FALSE],
                       upper = (i == 1),
                       xlabels = xlabels)
  }
}

Generate and read input spectra

These are the spectra from cell lines exposed to cisplatin, carboplatin, or oxaliplatin.

Generate spectra "catalogs" from HepG2 and MCF-10A cisplatin VCF files

These are variant call files (VCFs) from mutations in HepG2 and MCF-10A cells treated with cisplatin.

if (!file.exists("spectra/HepG2_and_MCF10A_Cis.spectra.Rdata")) {
  vcfs <- list.files("vcfs/HepG2_Cis/", full.names = TRUE)
  hepg2.cis <- ICAMS::StrelkaSBSVCFFilesToCatalog(
    files      = vcfs,
    ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5,
    region     = "genome")$catSBS96
  colnames(hepg2.cis) <- 
    sub(".vcf", "", x = colnames(hepg2.cis), fixed = TRUE)

  vcfs <- list.files("vcfs/MCF10A_Cis/", full.names = TRUE)
  mcf10a.cis <- ICAMS::StrelkaSBSVCFFilesToCatalog(
    files      = vcfs,
    ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5,
    region     = "genome")$catSBS96
  colnames(mcf10a.cis) <- 
    sub(".vcf", "", x = colnames(mcf10a.cis), fixed = TRUE)

  save(hepg2.cis, 
       mcf10a.cis, 
       file = "spectra/HepG2_and_MCF10A_Cis.spectra.Rdata")

  rm(vcfs)
} else {
  load("spectra/HepG2_and_MCF10A_Cis.spectra.Rdata")
}

Read carboplatin and oxaliplatin spectra catalogs

hepg2.car  <- ICAMS::ReadCatalog("spectra/HepG2_Car.csv", 
                                 ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5,
                                 region = "genome",
                                 catalog.type = "counts")

hepg2.oxa  <- ICAMS::ReadCatalog("spectra/HepG2_Oxa.csv", 
                                 ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5,
                                 region = "genome",
                                 catalog.type = "counts")

mcf10a.car <- ICAMS::ReadCatalog("spectra/MCF10A_Car.csv", 
                                 ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5,
                                 region = "genome",
                                 catalog.type = "counts")

Input as count spectra

pcat(hepg2.cis)
pcat(mcf10a.cis)
pcat(hepg2.car)
pcat(mcf10a.car)
pcat(hepg2.oxa)

Cosine similarity among raw spectra

hepg2.exposures <- list(hepg2.cis, hepg2.car, hepg2.oxa)
names(hepg2.exposures) <-
  c("HepG2.cis.sig", "HepG2.car.sig", "HepG2.oxa.sig")

mcf10a.exposures <- list(mcf10a.cis, mcf10a.car)
names(mcf10a.exposures) <- c("MCF10A.cis.sig", "MCF10.car.sig")
all.spectra <- do.call(cbind, c(hepg2.exposures, mcf10a.exposures))

temp.names <- unlist(lapply(c(hepg2.exposures, mcf10a.exposures), colnames))

colnames(all.spectra) <- temp.names

cosine.sim <- suppressMessages(
  philentropy::distance(t(all.spectra), method = "cosine"))
rownames(cosine.sim) <- colnames(all.spectra)
colnames(cosine.sim) <- colnames(all.spectra)
gplots::heatmap.2(x = cosine.sim,
                  dendrogram = "column",
                  margins = c(9, 9),
                  cex.axis = 0.5,
                  symm = TRUE,
                  trace = "none")

Principal components of raw spectra

all.spectra.as.sigs <- 
  ICAMS::TransformCatalog(all.spectra,
                          target.catalog.type = "counts.signature")

pc <- prcomp(t(all.spectra.as.sigs), center = TRUE, scale = FALSE, retx = TRUE)

factoextra::fviz_pca_ind(pc, repel = TRUE)

factoextra::fviz_contrib(
  pc, choice   = "var", 
  top          = 20, 
  xtickslab.rt = 90)

factoextra::fviz_contrib(
  pc, choice   = "var", 
  top          = 20,
  axes         = 2,
  xtickslab.rt = 90)

Background signatures, for refrence

pcat(mSigBG::background.info[["MCF10A"]]$background.sig)
pcat(mSigBG::background.info[["HepG2"]]$background.sig)

Separate the background and target signatures

one.separation <- function(sig.name, sig.list, bg.sig.info, my.seed = 101010) {

  set.seed(my.seed, kind = "L'Ecuyer-CMRG")

  spectra <- sig.list[[sig.name]]

  if (is.null(attr(spectra, "abundance"))) {
    stop("NULL abundance for ", sig.name)
  }

  ret <- SeparateSignatureAndSpectra(
    spectra          = spectra,
    bg.sig.info      = bg.sig.info,
    m.opts           = NULL,
    start.b.fraction = 0.5,
    sig.name         = sig.name
  )

  return(ret)
}

if (!file.exists("all.separated.Rdata")) {
  hepg2.separated <- lapply(X           = names(hepg2.exposures),
                            FUN         = one.separation,
                            sig.list    = hepg2.exposures,
                            bg.sig.info = background.info[["HepG2"]])
  names(hepg2.separated) <- names(hepg2.exposures)
}
if (!file.exists("all.separated.Rdata")) {
  mcf10a.separated <- lapply(X           = names(mcf10a.exposures),
                             FUN         = one.separation,
                             sig.list    = mcf10a.exposures,
                             bg.sig.info = background.info[["MCF10A"]])
  names(mcf10a.separated) <- names(mcf10a.exposures)

  all.separated <- c(hepg2.separated, mcf10a.separated)
  save(all.separated, file = "all.separated.Rdata")
} else {
  load("all.separated.Rdata")
}

Sanity check: exposures to background signature

This is a sanity check. The number of mutations assigned to the background signature should be reasonable given the distribution in untreated cell lines.

background.info[["HepG2"]]$count.nbinom.mu
background.info[["MCF10A"]]$count.nbinom.mu
lapply(all.separated, 
       function(x) { 
         exposures <- x$exposures.to.bg.sig
         names <- colnames(x$inferred.bg.spectra)
         names(exposures) <- sub("-inf.bg.spect", "", names, fixed = TRUE)
         return(exposures)
       })

Signatures based on inferred target spectra with uncertainty

The bars show the maximum and minimum proportions across all input spectra.

par(mfrow = c(1, 1))
par(mar = c(7, 5, 7, 2))
par(cex = 0.8)
par(cex.main = 1.4) 
for (test.name in names(all.separated)) {
  PlotSpectraAsSigsWithUncertainty(
    all.separated[[test.name]]$inferred.target.spectra,
    title = test.name)
}
par(mfrow = c(1, 1))
par(mar = c(7, 5, 7, 2))
par(cex = 0.8)
par(cex.main = 1.4) 
for (test.name in names(all.separated)) {
  d.spect <- ICAMS::TransformCatalog(
    all.separated[[test.name]]$inferred.target.spectra,
    target.catalog.type = "density")
  PlotSpectraAsSigsWithUncertainty(
    d.spect,
    title = test.name)
}

Stacked bar charts showing target and background signature exposures

par(mfrow = c(1, 1))
par(mar = c(7, 5, 7, 2))
par(cex = 0.8)
par(cex.main = 1.4)
for (test.name in names(all.separated)) {

  info <- all.separated[[test.name]]
  bg.spectra <- info$inferred.bg.spectra
  ta.spectra <- info$inferred.target.spectra

  for (colnum in 1:ncol(bg.spectra)) {
    Plot1StackedSpectrum(
      background.spectrum = bg.spectra[ , colnum, drop = FALSE],
      target.spectrum     = ta.spectra[ , colnum, drop = FALSE])
  }
}

Similarities across combined partial target spectra and inferred signatures

remove.to.dot <- function(x, times) {
  for (ii in 1:times) {
    x <- sub("[^\\.]*\\.", "", x = x, perl = TRUE)
  }
  return(x)
}
inferred.spectra.list <- lapply(all.separated, `[[`, "inferred.target.spectra")
all.target.spectra <- do.call(cbind, inferred.spectra.list)

# Clean up the columb names
colnames(all.target.spectra) <-
  sub(".inf.tg.spect", "",
      x = colnames(all.target.spectra),
      fixed = TRUE)
colnames(all.target.spectra) <- remove.to.dot(colnames(all.target.spectra), 3)

Heatmap of cosine similarities

all.target.sig.list <- lapply(all.separated, `[[`, "inferred.target.sig.as.catalog")

all.target.sigs <- do.call(cbind, all.target.sig.list)

grist <- cbind(
  ICAMS::TransformCatalog(all.target.spectra, 
                          target.catalog.type = "counts.signature"), 
  all.target.sigs)

cosine.sim <- suppressMessages(
  philentropy::distance(t(grist), method = "cosine"))

rownames(cosine.sim) <- colnames(grist)
colnames(cosine.sim) <- colnames(grist)
gplots::heatmap.2(x = cosine.sim,
                  dendrogram = "column",
                  margins = c(9, 9),
                  cex.axis = 0.5,
                  symm = TRUE,
                  trace = "none")

Principal components analysis

PCA with signatures and partial spectra

pc <- prcomp(t(grist), center = TRUE, scale = FALSE, retx = TRUE)

factoextra::fviz_pca_ind(pc, repel = TRUE)

factoextra::fviz_contrib(
  pc, choice   = "var", 
  top          = 20, 
  xtickslab.rt = 90)

factoextra::fviz_contrib(
  pc, choice   = "var", 
  top          = 20,
  axes         = 2,
  xtickslab.rt = 90)

There is only one major dimension, and it is due mainly to CCC > CTC, CCT > CTT, and GCC > CAG mutations.

PCA on signatures only

pc <- prcomp(t(all.target.sigs), center = TRUE, scale = FALSE, retx = TRUE)

factoextra::fviz_pca_ind(pc, repel = TRUE)

factoextra::fviz_contrib(
  pc, choice   = "var", 
  top          = 20, 
  xtickslab.rt = 90)

factoextra::fviz_contrib(
  pc, choice   = "var", 
  top          = 20,
  axes         = 2,
  xtickslab.rt = 90)

PCA without oxaliplatin

ox.indices <- grep("oxa", colnames(grist), ignore.case = TRUE) 
grist.no.ox <- grist[ , -ox.indices]
pc <- prcomp(t(grist.no.ox), center = TRUE, scale = FALSE, retx = TRUE)

factoextra::fviz_pca_ind(pc, repel = TRUE)

factoextra::fviz_contrib(
  pc, choice     = "var", 
  xtickslab.rt   = 90,
  top            = 20)

factoextra::fviz_contrib(
  pc, choice     = "var", 
  xtickslab.rt   = 90,
  top            = 20,
  axes           = 2)

How well do combinations of COSMIC signatures SBS31 and SBS35 reconstruct experimental signatures?

Set up numerical optimizaiton for maximum cosine similarity

sbs31 <- PCAWG7::signature$genome$SBS96[ , "SBS31"]
sbs35 <- PCAWG7::signature$genome$SBS96[ , "SBS35"]

one.opt <- function(sig, method = "cosine", invert = -1) {

  sig <- as.vector(sig)

  my.obj.fn <- function(coef) {
    recon <- coef[1]*sbs31 + coef[2]*sbs35 
    return(
      suppressMessages(
        invert * philentropy::distance(
          rbind(recon, sig), method = method)))
  }

  g_ineq <- function(coef) {
    abs(1 - sum(coef))  
  }

  retval <- nloptr::nloptr(
    x0          = c(0.5, 0.5),
    eval_f      = my.obj.fn,
    eval_g_ineq = g_ineq,
    lb          = c(0, 0),
    ub          = c(1, 1),
    opt         = list(
      algorithm = "NLOPT_LN_COBYLA",
      maxeval   = 2000,
      xtol_rel  = 1e-6,
      xtol_abs  = 1e-7)
  )

  retval <- list(similarity = invert * retval$objective, 
              coef = retval$solution / sum(retval$solution))

  return(retval)

}

Results of reconstruction from SBS31 and SBS35

t( 
  as.data.frame(
    lapply(
      all.target.sig.list, 
      function(x) { ret <- one.opt(x)
      names(ret$coef) <- c("sbs31", "sbs35")
      return(c(cos.sim = ret$similarity, ret$coef))})
  ))

Conclusion: the experimental cisplatin signatures are best reconstructed by combinations of SBS31 and SBS35, the carboplatin signatures are constructed pretty well, and oxaliplatin is not reconstructed as well. Hypothesis: The relative contributions of SBS31 and SBS35 may vary across samples. Can check to see if this is the case for the individual cell line partial spectra.



steverozen/mSigBG documentation built on Nov. 4, 2022, 10:58 a.m.