inst/doc/get_sarted_with_mutSignatures.R

## ----setup, include=FALSE-----------------------------------------------------
set.seed(1234567)
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.align = "center", results = "asis")

my.t0 <- Sys.time()

library(dplyr)
library(reshape2)
library(kableExtra)
library(ggplot2)
library(gridExtra)
library(mutSignatures)

# load data
data("mutSigData")

# Extra cols
head.muty <- c("T[C>A]G", "T[C>T]A", "G[C>T]A", "C[C>T]T", "T[C>G]T", "T[C>G]C", 
               "T[C>G]A", "T[C>T]T", "T[C>G]T", "T[C>T]G", "T[C>A]G", "T[C>T]A", 
               "A[T>C]T", "A[T>G]G", "C[T>C]T", "T[C>T]A", "T[C>T]A", "G[T>A]C")

## ----eval = FALSE-------------------------------------------------------------
#  install.packages("mutSignatures")

## ----eval = FALSE-------------------------------------------------------------
#  devtools::install_github("dami82/mutSignatures", force = TRUE,
#                           build_opts = NULL, build_vignettes = TRUE)

## ----echo=TRUE, eval=FALSE, include=TRUE--------------------------------------
#  # Get BiocManager
#  if (!"BiocManager" %in% rownames(utils::installed.packages()))
#    install.packages("BiocManager")
#  
#  # Get Bioc libs
#  BiocManager::install(pkgs = c("IRanges", "GenomicRanges", "BSgenome", "GenomeInfoDb"))

## ----echo=TRUE, eval=FALSE, include=TRUE--------------------------------------
#  BiocManager::install(pkgs = "BSgenome.Hsapiens.UCSC.hg19")

## ----eval=FALSE---------------------------------------------------------------
#  # Required libs
#  library(dplyr)
#  library(reshape2)
#  library(kableExtra)
#  library(ggplot2)
#  library(gridExtra)
#  library(BSgenome.Hsapiens.UCSC.hg19)
#  
#  # Load mutSignatures
#  library(mutSignatures)
#  
#  # prep hg19
#  hg19 <- BSgenome.Hsapiens.UCSC.hg19
#  
#  # load data
#  data("mutSigData")

## -----------------------------------------------------------------------------
# Import data (VCF-like format)
x <- mutSigData$inputC

# Filter non SNV
x <- filterSNV(dataSet = x,  seq_colNames = c("REF", "ALT"))

# Visualize head
head(x) %>% kable() %>% kable_styling(bootstrap_options = "striped")

## ----eval=FALSE---------------------------------------------------------------
#  # Attach context
#  x <- attachContext(mutData = x,
#                     chr_colName = "CHROM",
#                     start_colName = "POS",
#                     end_colName = "POS",
#                     nucl_contextN = 3,
#                     BSGenomeDb = hg19)

## ----include=FALSE, eval=TRUE, echo=FALSE-------------------------------------
x <- mutSignatures::mutSigData$inputC.ctx

## -----------------------------------------------------------------------------
# Visualize head
head(x) %>% kable() %>% kable_styling(bootstrap_options = "striped")
# Remove mismatches
x <- removeMismatchMut(mutData = x,                  # input data.frame
                       refMut_colName = "REF",       # column name for ref base
                       context_colName = "context",  # column name for context
                       refMut_format = "N")    

## ----eval=FALSE---------------------------------------------------------------
#  # Compute mutType
#  x <- attachMutType(mutData = x,                      # as above
#                     ref_colName = "REF",              # column name for ref base
#                     var_colName = "ALT",              # column name for mut base
#                     context_colName = "context")

## ----include=FALSE, eval=TRUE, echo=FALSE-------------------------------------
x <- x[1:18, ]
x$mutType <- head.muty

## -----------------------------------------------------------------------------
# Visualize head
head(x) %>% kable() %>% kable_styling(bootstrap_options = "striped")

## ----eval=FALSE---------------------------------------------------------------
#  # Count
#  blca.counts <- countMutTypes(mutTable = x,
#                               mutType_colName = "mutType",
#                               sample_colName = "SAMPLEID")

## ----include=FALSE, eval=TRUE, echo=FALSE-------------------------------------
# Count
blca.counts <- mutSignatures::as.mutation.counts(mutSigData$blcaMUTS)

## ----results='markup'---------------------------------------------------------
# Mutation Counts
print(blca.counts)

## -----------------------------------------------------------------------------
# how many signatures should we extract? 
num.sign <- 4

# Define parameters for the non-negative matrix factorization procedure.
# you should parallelize if possible
blca.params <- 
  mutSignatures::setMutClusterParams( 
    num_processesToExtract = num.sign,    # num signatures to extract
    num_totIterations = 20,               # bootstrapping: usually 500-1000
    num_parallelCores = 4)                # total num of cores to use (parallelization)

## ----eval=FALSE, include=TRUE, echo=TRUE--------------------------------------
#  # Extract new signatures - may take a while
#  blca.analysis <-
#    decipherMutationalProcesses(input = blca.counts,
#                                params = blca.params)

## ----eval=TRUE, include=TRUE, echo=FALSE--------------------------------------
blca.analysis <- list(Results = mutSigData$inputS$Results)
tmp <- mutSigData$inputS$silhouetteTMP

xrange <- c(min(tmp[,3]), max(tmp[,3]))
xrange[1] <- ifelse(xrange[1] > 0, 0, (-1.15) * abs(xrange[1]))
xrange[2] <- 1.15
graphics::barplot(tmp[nrow(tmp):1, 3], 
                  col = base::as.factor(tmp[nrow(tmp):1,1]),
                  xlim = xrange,
                  horiz = TRUE, xlab = "Silhouette Value", 
                  ylab = "", 
                  main = "Silhouette Plot", 
                  border = base::as.factor(tmp[nrow(tmp):1,1]))
graphics::abline(v=0)
graphics::title(ylab="Iter. Results (by Group)", line=1, cex.lab=1, font = 2)

## ----fig.align='center', fig.width=12, fig.height=3---------------------------
# Retrieve signatures (results)
blca.sig <- blca.analysis$Results$signatures

# Retrieve exposures (results)
blca.exp <- blca.analysis$Results$exposures

# Plot signature 1 (standard barplot, you can pass extra args such as ylim)
msigPlot(blca.sig, signature = 1, ylim = c(0, 0.10))

## -----------------------------------------------------------------------------
# Plot exposures (ggplot2 object, you can customize as any other ggplot2 object)
msigPlot(blca.exp, main = "BLCA samples") + 
  scale_fill_manual(values = c("#1f78b4", "#cab2d6", "#ff7f00", "#a6cee3"))
# Export Signatures as data.frame
xprt <- coerceObj(x = blca.sig, to = "data.frame") 
head(xprt) %>% kable() %>% kable_styling(bootstrap_options = "striped")
# Get signatures from data (imported as data.frame) 
# and then convert it to mutSignatures object
cosmixSigs <- mutSigData$blcaSIGS %>% 
  dplyr::select(starts_with("COSMIC")) %>% 
  as.mutation.signatures()

blcaKnwnSigs <- mutSigData$blcaSIGS %>% 
  dplyr::select(starts_with("BLCA")) %>% 
  as.mutation.signatures()

# Compare de-novo signatures with selected COSMIC signatures
msig1 <- matchSignatures(mutSign = blca.sig, reference = cosmixSigs, 
                         threshold = 0.45, plot = TRUE) 
msig2 <- matchSignatures(mutSign = blca.sig, reference = blcaKnwnSigs, 
                         threshold = 0.45, plot = TRUE)

## ----fig.height=5, fig.width=12, fig.align='center'---------------------------
# Visualize match
# signature 1 is similar to COSMIC ; 
# signatures 2 and 3 are similar to COSMIC
# Here, we should probably extract only 2 mutational signatures
hm1 <- msig1$plot + ggtitle("Match to COSMIC signs.")
hm2 <- msig2$plot + ggtitle("Match to known BLCA signs.")

# Show
grid.arrange(hm1, hm2, ncol = 2)

## -----------------------------------------------------------------------------
# Retrieve a mutation.counts data.frame
x <- mutSigData$blcaMUTS

# Visualize header
x[1:10, 1:5] %>% kable() %>% kable_styling(bootstrap_options = "striped")
# Convert it
xx <- as.mutation.counts(x)

## ----results='markup'---------------------------------------------------------
# Print to screen
print(xx)

## -----------------------------------------------------------------------------
# Obtain 4 COSMIC signatures
cosmx <- mutSigData$blcaSIGS %>% dplyr::select(starts_with("COSMIC"))
cosmx <- as.mutation.signatures(cosmx)

# Obtain 4 BLCA signatures
blcmx <- mutSigData$blcaSIGS %>% dplyr::select(starts_with("BLCA"))
blcmx <- as.mutation.signatures(blcmx)

## ----results='markup'---------------------------------------------------------
# Visualize cosmx
print(cosmx)
# Visualize cosmx
print(blcmx)

## -----------------------------------------------------------------------------
# Run analysis
blca.expo1 <- resolveMutSignatures(mutCountData = xx, 
                                   signFreqData = cosmx)

blca.expo2 <- resolveMutSignatures(mutCountData = xx, 
                                   signFreqData = blcmx)

## ----fig.width=12, fig.height=5-----------------------------------------------
# Retrieve exposures (results)
blca.exp.1x <- blca.expo1$results$count.result
blca.exp.2x <- blca.expo2$results$count.result

# Plot exposures
bp1 <- msigPlot(blca.exp.1x, main = "BLCA | COSMIC sigs.") + 
  scale_fill_manual(values = c("#fdbf6f", "#e31a1c", "#fb9a99", "#1f78b4"))

bp2 <- msigPlot(blca.exp.2x, main = "BLCA | pre-blca sigs.") + 
  scale_fill_manual(values = c("#fdbf6f", "#e31a1c", "#fb9a99", "#1f78b4"))

# Visualize
grid.arrange(bp1, bp2, ncol = 2)

# Compare sigs

# Export Exposures as data.frame
xprt <- as.data.frame(blca.exp.1x, transpose = TRUE)
head(xprt) %>% round() %>% kable() %>% 
  kable_styling(bootstrap_options = "striped")

## ----results='markup'---------------------------------------------------------
sessionInfo()

Try the mutSignatures package in your browser

Any scripts or data that you put into this service are public.

mutSignatures documentation built on Nov. 9, 2020, 9:06 a.m.