inst/doc/SomaticSignatures-vignette.R

## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE-------
set.seed(1)

options(width = 70)

library(knitr)

inlineCode <- function(file, format = c) {
    file_exist = sapply(file, file.exists)
    file = file[file_exist]
    if(length(file) == 0)
        return("")
    style = sapply(file,
        function(file) {
            paste(readLines(file), collapse = "\n")},
        USE.NAMES = FALSE)
    style = sapply(style, format)
    style = paste(style, "\n", collapse = "\n\n")
    return(style)
}

knitrHeader <- function(css, js) {
    header = opts_knit$get("header")
    if(!missing(css) && !identical(css, character())) {
        header["highlight"] = inlineCode(css)
    }
    if(!missing(js) && !identical(js, character())) {
        header["js"] = inlineCode(js, formatInlineJS)
    }
    return(header)
}

base_dir = system.file(package = "SomaticSignatures")
css_path = file.path(base_dir, "css", "bioc.css")

opts_knit$set(self.contained = TRUE,
              upload.fun = image_uri,
              header = knitrHeader(css = css_path))

opts_chunk$set(comment = "  ",
               fig.path = "",
               fig.align = "center",
               out.width = "65%",
               dpi = 300,
               indent = 10,
               cache = FALSE,
               cache.path = "../cache")

knit_hooks$set(fig.cap = function(before, options, envir) {
    if(!before) {
        paste('<p class="caption">',options$fig.cap,"</p>",sep="")
    }
})

## ----load_ss, results='hide',message=FALSE--------------------------
library(SomaticSignatures)

## ----load_data_package, results='hide',message=FALSE----------------
library(SomaticCancerAlterations)
library(BSgenome.Hsapiens.1000genomes.hs37d5)

## ----sca_metadata---------------------------------------------------
sca_metadata = scaMetadata()

sca_metadata

## ----sca_to_vranges-------------------------------------------------
sca_data = unlist(scaLoadDatasets())

sca_data$study = factor(gsub("(.*)_(.*)", "\\1", toupper(names(sca_data))))
sca_data = unname(subset(sca_data, Variant_Type %in% "SNP"))
sca_data = keepSeqlevels(sca_data, hsAutosomes(), pruning.mode = "coarse")

sca_vr = VRanges(
    seqnames = seqnames(sca_data),
    ranges = ranges(sca_data), 
    ref = sca_data$Reference_Allele,
    alt = sca_data$Tumor_Seq_Allele2, 
    sampleNames = sca_data$Patient_ID,
    seqinfo = seqinfo(sca_data), 
    study = sca_data$study)

sca_vr

## ----sca_study_table------------------------------------------------
sort(table(sca_vr$study), decreasing = TRUE)

## ----sca_vr_to_motifs-----------------------------------------------
sca_motifs = mutationContext(sca_vr, BSgenome.Hsapiens.1000genomes.hs37d5)
head(sca_motifs)

## ----sca_motif_occurrence-------------------------------------------
sca_mm = motifMatrix(sca_motifs, group = "study", normalize = TRUE)

head(round(sca_mm, 4))

## ----sca_mutation_spectrum, fig.cap='Mutation spectrum over studies'----
plotMutationSpectrum(sca_motifs, "study")

## ----sca_nmf_pca----------------------------------------------------
n_sigs = 5

sigs_nmf = identifySignatures(sca_mm, n_sigs, nmfDecomposition)

sigs_pca = identifySignatures(sca_mm, n_sigs, pcaDecomposition)

## ----sca_explore_nmf------------------------------------------------
sigs_nmf

## ----sca_explore_pca------------------------------------------------
sigs_pca

## -------------------------------------------------------------------
n_sigs = 2:8

gof_nmf = assessNumberSignatures(sca_mm, n_sigs, nReplicates = 5)

gof_pca = assessNumberSignatures(sca_mm, n_sigs, pcaDecomposition)

## ----fig.cap='Summary statistics for selecting the number of signatures in the NMF decomposition.'----
plotNumberSignatures(gof_nmf)

## ----fig.cap='Summary statistics for selecting the number of signatures in the PCA decomposition.'----
plotNumberSignatures(gof_pca)

## ----load_ggplot2, results='hide',message=FALSE---------------------
library(ggplot2)

## ----sca_plot_nmf_signatures_map, fig.cap='Composition of somatic signatures estimated with the NMF, represented as a heatmap.'----
plotSignatureMap(sigs_nmf) + ggtitle("Somatic Signatures: NMF - Heatmap")

## ----sca_plot_nmf_signatures, fig.cap='Composition of somatic signatures estimated with the NMF, represented as a barchart.'----
plotSignatures(sigs_nmf) + ggtitle("Somatic Signatures: NMF - Barchart")

## -------------------------------------------------------------------
plotObservedSpectrum(sigs_nmf)

## -------------------------------------------------------------------
plotFittedSpectrum(sigs_nmf)

## ----sca_plot_nmf_samples_map, fig.cap='Occurrence of signatures estimated with the NMF, represented as a heatmap.'----
plotSampleMap(sigs_nmf)

## ----sca_plot_nmf_samples, fig.cap='Occurrence of signatures estimated with the NMF, represented as a barchart.'----
plotSamples(sigs_nmf)

## ----sca_plot_pca_signatures_map, fig.cap='Composition of somatic signatures estimated with the PCA, represented as a heatmap.'----
plotSignatureMap(sigs_pca) + ggtitle("Somatic Signatures: PCA - Heatmap")

## ----sca_plot_pca_signatures, fig.cap='Composition of somatic signatures estimated with the PCA, represented as a barchart.'----
plotSignatures(sigs_pca) + ggtitle("Somatic Signatures: PCA - Barchart")

## -------------------------------------------------------------------
plotFittedSpectrum(sigs_pca)

## -------------------------------------------------------------------
plotObservedSpectrum(sigs_pca)

## ----load_ggplot2_again, results='hide',message=FALSE---------------
library(ggplot2)

## ----sca_plot_nmf_samples_mod, results='hide',message=FALSE---------
p = plotSamples(sigs_nmf)

## (re)move the legend
p = p + theme(legend.position = "none")
## (re)label the axis
p = p + xlab("Studies")
## add a title
p = p + ggtitle("Somatic Signatures in TGCA WES Data")
## change the color scale
p = p + scale_fill_brewer(palette = "Blues")
## decrease the size of x-axis labels
p = p + theme(axis.text.x = element_text(size = 9))

## ----sca_plot_nmf_samples_mod_print, fig.cap='Occurrence of signatures estimated with the NMF, customized plot. See the original plot above for comparisons.'----
p

## -------------------------------------------------------------------
clu_motif = clusterSpectrum(sca_mm, "motif")

## ----fig.cap='Hierachical clustering of the mutational spectrum, according to motif.'----
library(ggdendro)

p = ggdendrogram(clu_motif, rotate = TRUE)
p

## ----sva_load, results='hide',message=FALSE-------------------------
library(sva)

## ----sva_batch------------------------------------------------------
sca_anno = as.data.frame(lapply(sca_metadata, unlist))

model_null = model.matrix(~ 1, sca_anno)

sca_mm_batch = ComBat(sca_mm, batch = sca_anno$Sequence_Source, mod = model_null)

## ----kmer_hs_chrs, results='hide',message=FALSE---------------------
k = 3
n = 1e4

hs_chrs = as(seqinfo(BSgenome.Hsapiens.1000genomes.hs37d5), "GRanges")
hs_chrs = keepSeqlevels(hs_chrs, c(1:22, "X", "Y"), pruning.mode = "coarse")

k3_hs_chrs = kmerFrequency(BSgenome.Hsapiens.1000genomes.hs37d5, n, k, hs_chrs)
k3_hs_chrs

## ----kmer_exons, eval=FALSE-----------------------------------------
#  library(TxDb.Hsapiens.UCSC.hg19.knownGene)
#  
#  k = 3
#  n = 1e4
#  
#  hs_exons = reduce(exons(TxDb.Hsapiens.UCSC.hg19.knownGene))
#  hs_exons = ncbi(keepStandardChromosomes(hs_exons))
#  
#  k3_exons = kmerFrequency(BSgenome.Hsapiens.1000genomes.hs37d5, n, k, hs_exons)

## ----normalize_motifs-----------------------------------------------
data(kmers)
norms = k3wg / k3we
head(norms)

sca_mm_norm = normalizeMotifs(sca_mm, norms)

## -------------------------------------------------------------------
showMethods("getSeq")

## ----eval=FALSE-----------------------------------------------------
#  ## Somatic variant calls
#  vr_A = readVcfAsVRanges(vcf_A_path, "GenomeA")
#  vr_B = readVcfAsVRanges(vcf_B_path, "GenomeB")
#  
#  ## Genomic sequences
#  fa_A = FaFile(fasta_A_path)
#  fa_B = FaFile(fasta_B_path)
#  
#  ## Somatic motifs
#  vr_A = mutationContext(vr_A, fa_A)
#  vr_B = mutationContext(vr_B, fa_B)
#  
#  ## Combine for further analysis
#  vr = c(vr_A, vr_B)

## -------------------------------------------------------------------
citation("SomaticSignatures")

## ----eval=FALSE-----------------------------------------------------
#  source("http://bioconductor.org/biocLite.R")
#  biocLite("SomaticSignatures")

## ----eval=FALSE-----------------------------------------------------
#  source("http://bioconductor.org/biocLite.R")
#  biocLite()

## ----results='hide', message=FALSE----------------------------------
library(VariantAnnotation)

## -------------------------------------------------------------------
vr = VRanges(
    seqnames = "chr1",
    ranges = IRanges(start = 1000, width = 1),
    ref = "A",
    alt = "C")

vr

## ----echo=FALSE, results='markup'-----------------------------------
sessionInfo()

Try the SomaticSignatures package in your browser

Any scripts or data that you put into this service are public.

SomaticSignatures documentation built on Nov. 8, 2020, 5:52 p.m.