knitr::opts_chunk$set(echo = TRUE)
This is an example of background subtraction using the mSigBG package.
In this example we subtract the background signatures of HepG2 and MCF-10A cells from the spectra of cells treated with three platinum-based chemotherapies.
r ifelse(params$verbose, "## Libraries and helper functions", "")
# CRAN packages library(ICAMS) require(philentropy) require(factoextra) require(gplots) require(nloptr) # Bioconductor library(BSgenome.Hsapiens.1000genomes.hs37d5) # If not available, # install.packages("BiocManager") # BiocManager::install("BSgenome.Hsapiens.1000genomes.hs37d5") # Other library(mSigBG) # if not available # remotes::install_github("steverozen/mSigBG", ref = "1.0-branch") library(PCAWG7) # if not available # remotes::install_github("steverozen/PCAWG7")
# Plot a single catalog pcat1 <- function(catalog, ylim = NULL) { par(pin = c(5, 1)) par(mar = c(5, 4, 5, 4)) par(cex = 0.8) par(cex.main = 1.4) bp <- ICAMS::PlotCatalog(catalog[ , 1, drop = FALSE], upper = TRUE, xlabels = TRUE, ylim = ylim ) return(invisible(bp$plot.object)) }
pcat <- function(catalog) { par(pin = c(3*ncol(catalog), 1)) par(mfrow = c(ncol(catalog), 1)) par(mar = c(2, 4, 4, 2)) par(cex = 0.8) par(cex.main = 1.4) xlabels <- FALSE for (i in 1:ncol(catalog)) { if (FALSE && i == ncol(catalog)) { # FALSE an experiment xlabels <- TRUE par(mar = c(4.5, 4, 3, 2)) } ICAMS::PlotCatalog(catalog[ , i, drop = FALSE], upper = (i == 1), xlabels = xlabels) } }
These are the spectra from cell lines exposed to cisplatin, carboplatin, or oxaliplatin.
These are variant call files (VCFs) from mutations in HepG2 and MCF-10A cells treated with cisplatin.
if (!file.exists("spectra/HepG2_and_MCF10A_Cis.spectra.Rdata")) { vcfs <- list.files("vcfs/HepG2_Cis/", full.names = TRUE) hepg2.cis <- ICAMS::StrelkaSBSVCFFilesToCatalog( files = vcfs, ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5, region = "genome")$catSBS96 colnames(hepg2.cis) <- sub(".vcf", "", x = colnames(hepg2.cis), fixed = TRUE) vcfs <- list.files("vcfs/MCF10A_Cis/", full.names = TRUE) mcf10a.cis <- ICAMS::StrelkaSBSVCFFilesToCatalog( files = vcfs, ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5, region = "genome")$catSBS96 colnames(mcf10a.cis) <- sub(".vcf", "", x = colnames(mcf10a.cis), fixed = TRUE) save(hepg2.cis, mcf10a.cis, file = "spectra/HepG2_and_MCF10A_Cis.spectra.Rdata") rm(vcfs) } else { load("spectra/HepG2_and_MCF10A_Cis.spectra.Rdata") }
hepg2.car <- ICAMS::ReadCatalog("spectra/HepG2_Car.csv", ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5, region = "genome", catalog.type = "counts") hepg2.oxa <- ICAMS::ReadCatalog("spectra/HepG2_Oxa.csv", ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5, region = "genome", catalog.type = "counts") mcf10a.car <- ICAMS::ReadCatalog("spectra/MCF10A_Car.csv", ref.genome = BSgenome.Hsapiens.1000genomes.hs37d5, region = "genome", catalog.type = "counts")
pcat(hepg2.cis)
pcat(mcf10a.cis)
pcat(hepg2.car)
pcat(mcf10a.car)
pcat(hepg2.oxa)
hepg2.exposures <- list(hepg2.cis, hepg2.car, hepg2.oxa) names(hepg2.exposures) <- c("HepG2.cis.sig", "HepG2.car.sig", "HepG2.oxa.sig") mcf10a.exposures <- list(mcf10a.cis, mcf10a.car) names(mcf10a.exposures) <- c("MCF10A.cis.sig", "MCF10.car.sig")
all.spectra <- do.call(cbind, c(hepg2.exposures, mcf10a.exposures)) temp.names <- unlist(lapply(c(hepg2.exposures, mcf10a.exposures), colnames)) colnames(all.spectra) <- temp.names cosine.sim <- suppressMessages( philentropy::distance(t(all.spectra), method = "cosine")) rownames(cosine.sim) <- colnames(all.spectra) colnames(cosine.sim) <- colnames(all.spectra) gplots::heatmap.2(x = cosine.sim, dendrogram = "column", margins = c(9, 9), cex.axis = 0.5, symm = TRUE, trace = "none")
all.spectra.as.sigs <- ICAMS::TransformCatalog(all.spectra, target.catalog.type = "counts.signature") pc <- prcomp(t(all.spectra.as.sigs), center = TRUE, scale = FALSE, retx = TRUE) factoextra::fviz_pca_ind(pc, repel = TRUE) factoextra::fviz_contrib( pc, choice = "var", top = 20, xtickslab.rt = 90) factoextra::fviz_contrib( pc, choice = "var", top = 20, axes = 2, xtickslab.rt = 90)
pcat(mSigBG::background.info[["MCF10A"]]$background.sig) pcat(mSigBG::background.info[["HepG2"]]$background.sig)
one.separation <- function(sig.name, sig.list, bg.sig.info, my.seed = 101010) { set.seed(my.seed, kind = "L'Ecuyer-CMRG") spectra <- sig.list[[sig.name]] if (is.null(attr(spectra, "abundance"))) { stop("NULL abundance for ", sig.name) } ret <- SeparateSignatureAndSpectra( spectra = spectra, bg.sig.info = bg.sig.info, m.opts = NULL, start.b.fraction = 0.5, sig.name = sig.name ) return(ret) } if (!file.exists("all.separated.Rdata")) { hepg2.separated <- lapply(X = names(hepg2.exposures), FUN = one.separation, sig.list = hepg2.exposures, bg.sig.info = background.info[["HepG2"]]) names(hepg2.separated) <- names(hepg2.exposures) }
if (!file.exists("all.separated.Rdata")) { mcf10a.separated <- lapply(X = names(mcf10a.exposures), FUN = one.separation, sig.list = mcf10a.exposures, bg.sig.info = background.info[["MCF10A"]]) names(mcf10a.separated) <- names(mcf10a.exposures) all.separated <- c(hepg2.separated, mcf10a.separated) save(all.separated, file = "all.separated.Rdata") } else { load("all.separated.Rdata") }
This is a sanity check. The number of mutations assigned to the background signature should be reasonable given the distribution in untreated cell lines.
background.info[["HepG2"]]$count.nbinom.mu background.info[["MCF10A"]]$count.nbinom.mu lapply(all.separated, function(x) { exposures <- x$exposures.to.bg.sig names <- colnames(x$inferred.bg.spectra) names(exposures) <- sub("-inf.bg.spect", "", names, fixed = TRUE) return(exposures) })
The bars show the maximum and minimum proportions across all input spectra.
par(mfrow = c(1, 1)) par(mar = c(7, 5, 7, 2)) par(cex = 0.8) par(cex.main = 1.4) for (test.name in names(all.separated)) { PlotSpectraAsSigsWithUncertainty( all.separated[[test.name]]$inferred.target.spectra, title = test.name) }
par(mfrow = c(1, 1)) par(mar = c(7, 5, 7, 2)) par(cex = 0.8) par(cex.main = 1.4) for (test.name in names(all.separated)) { d.spect <- ICAMS::TransformCatalog( all.separated[[test.name]]$inferred.target.spectra, target.catalog.type = "density") PlotSpectraAsSigsWithUncertainty( d.spect, title = test.name) }
par(mfrow = c(1, 1)) par(mar = c(7, 5, 7, 2)) par(cex = 0.8) par(cex.main = 1.4) for (test.name in names(all.separated)) { info <- all.separated[[test.name]] bg.spectra <- info$inferred.bg.spectra ta.spectra <- info$inferred.target.spectra for (colnum in 1:ncol(bg.spectra)) { Plot1StackedSpectrum( background.spectrum = bg.spectra[ , colnum, drop = FALSE], target.spectrum = ta.spectra[ , colnum, drop = FALSE]) } }
remove.to.dot <- function(x, times) { for (ii in 1:times) { x <- sub("[^\\.]*\\.", "", x = x, perl = TRUE) } return(x) }
inferred.spectra.list <- lapply(all.separated, `[[`, "inferred.target.spectra") all.target.spectra <- do.call(cbind, inferred.spectra.list) # Clean up the columb names colnames(all.target.spectra) <- sub(".inf.tg.spect", "", x = colnames(all.target.spectra), fixed = TRUE) colnames(all.target.spectra) <- remove.to.dot(colnames(all.target.spectra), 3)
all.target.sig.list <- lapply(all.separated, `[[`, "inferred.target.sig.as.catalog") all.target.sigs <- do.call(cbind, all.target.sig.list) grist <- cbind( ICAMS::TransformCatalog(all.target.spectra, target.catalog.type = "counts.signature"), all.target.sigs) cosine.sim <- suppressMessages( philentropy::distance(t(grist), method = "cosine")) rownames(cosine.sim) <- colnames(grist) colnames(cosine.sim) <- colnames(grist) gplots::heatmap.2(x = cosine.sim, dendrogram = "column", margins = c(9, 9), cex.axis = 0.5, symm = TRUE, trace = "none")
pc <- prcomp(t(grist), center = TRUE, scale = FALSE, retx = TRUE) factoextra::fviz_pca_ind(pc, repel = TRUE) factoextra::fviz_contrib( pc, choice = "var", top = 20, xtickslab.rt = 90) factoextra::fviz_contrib( pc, choice = "var", top = 20, axes = 2, xtickslab.rt = 90)
There is only one major dimension, and it is due mainly to CCC > CTC, CCT > CTT, and GCC > CAG mutations.
pc <- prcomp(t(all.target.sigs), center = TRUE, scale = FALSE, retx = TRUE) factoextra::fviz_pca_ind(pc, repel = TRUE) factoextra::fviz_contrib( pc, choice = "var", top = 20, xtickslab.rt = 90) factoextra::fviz_contrib( pc, choice = "var", top = 20, axes = 2, xtickslab.rt = 90)
ox.indices <- grep("oxa", colnames(grist), ignore.case = TRUE) grist.no.ox <- grist[ , -ox.indices] pc <- prcomp(t(grist.no.ox), center = TRUE, scale = FALSE, retx = TRUE) factoextra::fviz_pca_ind(pc, repel = TRUE) factoextra::fviz_contrib( pc, choice = "var", xtickslab.rt = 90, top = 20) factoextra::fviz_contrib( pc, choice = "var", xtickslab.rt = 90, top = 20, axes = 2)
sbs31 <- PCAWG7::signature$genome$SBS96[ , "SBS31"] sbs35 <- PCAWG7::signature$genome$SBS96[ , "SBS35"] one.opt <- function(sig, method = "cosine", invert = -1) { sig <- as.vector(sig) my.obj.fn <- function(coef) { recon <- coef[1]*sbs31 + coef[2]*sbs35 return( suppressMessages( invert * philentropy::distance( rbind(recon, sig), method = method))) } g_ineq <- function(coef) { abs(1 - sum(coef)) } retval <- nloptr::nloptr( x0 = c(0.5, 0.5), eval_f = my.obj.fn, eval_g_ineq = g_ineq, lb = c(0, 0), ub = c(1, 1), opt = list( algorithm = "NLOPT_LN_COBYLA", maxeval = 2000, xtol_rel = 1e-6, xtol_abs = 1e-7) ) retval <- list(similarity = invert * retval$objective, coef = retval$solution / sum(retval$solution)) return(retval) }
t( as.data.frame( lapply( all.target.sig.list, function(x) { ret <- one.opt(x) names(ret$coef) <- c("sbs31", "sbs35") return(c(cos.sim = ret$similarity, ret$coef))}) ))
Conclusion: the experimental cisplatin signatures are best reconstructed by combinations of SBS31 and SBS35, the carboplatin signatures are constructed pretty well, and oxaliplatin is not reconstructed as well. Hypothesis: The relative contributions of SBS31 and SBS35 may vary across samples. Can check to see if this is the case for the individual cell line partial spectra.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.