inst/doc/maftools.R

## ---- eval=FALSE--------------------------------------------------------------
#  if (!require("BiocManager"))
#      install.packages("BiocManager")
#  BiocManager::install("maftools")

## ----results='hide', message=FALSE--------------------------------------------
library(maftools)

## -----------------------------------------------------------------------------
#path to TCGA LAML MAF file
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') 
#clinical information containing survival information and histology. This is optional
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') 

laml = read.maf(maf = laml.maf, clinicalData = laml.clin)

## -----------------------------------------------------------------------------
#Typing laml shows basic summary of MAF file.
laml

## ---- eval=FALSE--------------------------------------------------------------
#  #Shows sample summry.
#  getSampleSummary(laml)
#  #Shows gene summary.
#  getGeneSummary(laml)
#  #shows clinical data associated with samples
#  getClinicalData(laml)
#  #Shows all fields in MAF
#  getFields(laml)
#  #Writes maf summary to an output file with basename laml.
#  write.mafSummary(maf = laml, basename = 'laml')

## ----fig.height=4, fig.width=6------------------------------------------------
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

## ---- fig.align='left',fig.height=3.5,fig.width=6, fig.align='left'-----------
#oncoplot for top ten mutated genes.
oncoplot(maf = laml, top = 10)

## ---- fig.height=3, fig.width=4.2, eval = T, fig.align='left'-----------------
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)
#plot titv summary
plotTiTv(res = laml.titv)

## ----fig.align='left', fig.width=4.5, fig.height=2.5--------------------------
#lollipop plot for DNMT3A, which is one of the most frequent mutated gene in Leukemia.
lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', showMutationRate = TRUE)

## ----fig.align='left', fig.width=4.5, fig.height=2.5--------------------------
lollipopPlot(maf = laml, gene = 'DNMT3A', showDomainLabel = FALSE, labelPos = 882)

## ---- results='hide', message=FALSE-------------------------------------------
brca <- system.file("extdata", "brca.maf.gz", package = "maftools")
brca = read.maf(maf = brca, verbose = FALSE)

## ---- fig.height=3,fig.width=6,fig.align='left'-------------------------------
rainfallPlot(maf = brca, detectChangePoints = TRUE, pointSize = 0.4)

## ---- fig.align='left', fig.height=3.25, fig.width=6, message=FALSE, results='hide'----
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML', logscale = TRUE, capture_size = 50)

## ---- fig.align='left', fig.height=3, fig.width=3-----------------------------
plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU')

## -----------------------------------------------------------------------------
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
scores.gis <- system.file("extdata", "scores.gistic", package = "maftools")

laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE)

#GISTIC object
laml.gistic

## ---- fig.width=4, fig.height=3, fig.align='left'-----------------------------
gisticChromPlot(gistic = laml.gistic, markBands = "all")

## ---- fig.width=4, fig.height=3, fig.align='left'-----------------------------
gisticBubblePlot(gistic = laml.gistic)

## ---- fig.align='left',fig.width=5, fig.height=3, eval=T----------------------
gisticOncoPlot(gistic = laml.gistic, clinicalData = getClinicalData(x = laml), clinicalFeatures = 'FAB_classification', sortByAnnotation = TRUE, top = 10)

## ---- fig.height=2.5,fig.width=4,fig.align='left'-----------------------------
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools")
plotCBSsegments(cbsFile = tcga.ab.009.seg)

## ---- message=FALSE, fig.height=5, fig.width=5--------------------------------
#exclusive/co-occurance event analysis on top 10 mutated genes. 
somaticInteractions(maf = laml, top = 25, pvalue = c(0.05, 0.1))

## ---- fig.align='default', fig.width=7,fig.height=5, message=F,results='hide', eval=T----
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore')

## -----------------------------------------------------------------------------
head(laml.sig)

## ---- fig.align='left', fig.width=3.2, fig.height=3.2-------------------------
plotOncodrive(res = laml.sig, fdrCutOff = 0.1, useFraction = TRUE, labelSize = 0.5)

## ---- fig.align='left', fig.width=4, fig.height=3-----------------------------
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]

## ---- fig.width=3, fig.height=3-----------------------------------------------
#Survival analysis based on grouping of DNMT3A mutation status
mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', isTCGA = TRUE)

## -----------------------------------------------------------------------------
#Using top 20 mutated genes to identify a set of genes (of size 2) to predict poor prognostic groups
prog_geneset = survGroup(maf = laml, top = 20, geneSetSize = 2, time = "days_to_last_followup", Status = "Overall_Survival_Status", verbose = FALSE)

print(prog_geneset)

## ---- fig.width=3, fig.height=3-----------------------------------------------
mafSurvGroup(maf = laml, geneSet = c("DNMT3A", "FLT3"), time = "days_to_last_followup", Status = "Overall_Survival_Status")

## ----results='hide', message=FALSE--------------------------------------------
#Primary APL MAF
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)

## ---- fig.align='left'--------------------------------------------------------
#Considering only genes which are mutated in at-least in 5 samples in one of the cohort to avoid bias due to genes mutated in single sample.
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'Primary', m2Name = 'Relapse', minMut = 5)
print(pt.vs.rt)

## ---- fig.width=4, fig.height=5, fig.align='left'-----------------------------
forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.1, color = c('royalblue', 'maroon'), geneFontSize = 0.8)

## ---- fig.height=2.5,fig.width=6, eval=T, fig.align='left'--------------------
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE)

## ---- fig.height=3, fig.width=4-----------------------------------------------
coBarplot(m1 = primary.apl, m2 = relapse.apl, m1Name = "Primary", m2Name = "Relapse")

## ---- warning=FALSE, message=FALSE,fig.align='left', results='hide', fig.height=3.5, fig.width=5----
lollipopPlot2(m1 = primary.apl, m2 = relapse.apl, gene = "PML", AACol1 = "amino_acid_change", AACol2 = "amino_acid_change", m1_name = "Primary", m2_name = "Relapse")

## -----------------------------------------------------------------------------
fab.ce = clinicalEnrichment(maf = laml, clinicalFeature = 'FAB_classification')

#Results are returned as a list. Significant associations p-value < 0.05
fab.ce$groupwise_comparision[p_value < 0.05]

## ---- fig.width=4, fig.height=3-----------------------------------------------
plotEnrichmentResults(enrich_res = fab.ce, pVal = 0.05, geneFontSize = 0.5, annoFontSize = 0.6)

## ---- fig.height=3, fig.width=5-----------------------------------------------
dgi = drugInteractions(maf = laml, fontSize = 0.75)

## -----------------------------------------------------------------------------
dnmt3a.dgi = drugInteractions(genes = "DNMT3A", drugs = TRUE)
#Printing selected columns.
dnmt3a.dgi[,.(Gene, interaction_types, drug_name, drug_claim_name)]

## ---- fig.width=3.5, fig.height=3---------------------------------------------
OncogenicPathways(maf = laml)

## ---- fig.width=5, fig.height=2.5---------------------------------------------
PlotOncogenicPathways(maf = laml, pathways = "RTK-RAS")

## ---- echo = TRUE, fig.align='left', fig.height=3.5, fig.width=4, eval=T------
#Heterogeneity in sample TCGA.AB.2972
library("mclust")
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-2972', vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
#Visualizing results
plotClusters(clusters = tcga.ab.2972.het)

## ---- fig.align='left', fig.height=3.5, fig.width=4, eval=T-------------------
seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-3009', segFile = seg, vafCol = 'i_TumorVAF_WU')
#Visualizing results. Highlighting those variants on copynumber altered variants.
plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE)

## ---- eval=TRUE---------------------------------------------------------------
#Requires BSgenome object
library(BSgenome.Hsapiens.UCSC.hg19, quietly = TRUE)
laml.tnm = trinucleotideMatrix(maf = laml, prefix = 'chr', add = TRUE, ref_genome = "BSgenome.Hsapiens.UCSC.hg19")

## ---- eval=TRUE, fig.height=3, fig.width=5------------------------------------
plotApobecDiff(tnm = laml.tnm, maf = laml, pVal = 0.2)

## ---- echo=FALSE--------------------------------------------------------------
par(mar = c(2, 2, 2, 1))
plot(NA, xlim = c(1, 10), ylim = c(0, 30), frame.plot = FALSE, axes = FALSE, xlab = NA, ylab = NA)
rect(xleft = 3, ybottom = 28, xright = 7, ytop = 30, col = grDevices::adjustcolor("gray70", alpha.f = 0.6), lwd = 1.2, border = "maroon")
text(x = 5, y = 29, labels = "MAF", font = 2)
arrows(x0 = 5, y0 = 28, x1 = 5, y1 = 26, length = 0.1, lwd = 2)
text(x = 5, y = 25, labels = "trinucleotideMatrix()", font = 3)
arrows(x0 = 5, y0 = 24, x1 = 5, y1 = 21, length = 0.1, lwd = 2)
text(x = 5, y = 20, labels = "estimateSignatures()", font = 3)
arrows(x0 = 5, y0 = 19, x1 = 5, y1 = 16, length = 0.1, lwd = 2)
text(x = 5, y = 15, labels = "plotCophenetic()", font = 3)
arrows(x0 = 5, y0 = 14, x1 = 5, y1 = 11, length = 0.1, lwd = 2)
text(x = 5, y = 10, labels = "extractSignatures()", font = 3)
arrows(x0 = 5, y0 = 9, x1 = 5, y1 = 6, length = 0.1, lwd = 2)
text(x = 5, y = 5, labels = "compareSignatures()", font = 3)
arrows(x0 = 5, y0 = 4, x1 = 5, y1 = 1, length = 0.1, lwd = 2)
text(x = 5, y = 0, labels = "plotSignatures()", font = 3)

## ---- fig.height=5, fig.width=5, eval=FALSE, message=FALSE--------------------
#  library('NMF')
#  laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6)

## ---- fig.height=3, fig.width=3, eval=TRUE, message=FALSE, echo=FALSE, include=FALSE----
#Run main function with maximum 6 signatures. 
library('NMF')
laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6, pConstant = 0.1, plotBestFitRes = FALSE, parallel = 2)

## ---- fig.width=3, fig.height=3-----------------------------------------------
plotCophenetic(res = laml.sign)

## ---- eval=FALSE--------------------------------------------------------------
#  laml.sig = extractSignatures(mat = laml.tnm, n = 3)

## ---- eval=TRUE, echo=FALSE---------------------------------------------------
laml.sig = extractSignatures(mat = laml.tnm, n = 3, pConstant = 0.1,  parallel = 2)

## -----------------------------------------------------------------------------
#Compate against original 30 signatures 
laml.og30.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "legacy")
#Compate against updated version3 60 signatures 
laml.v3.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "SBS")

## ---- fig.width=7, fig.height=2.5, fig.align='center'-------------------------
library('pheatmap')
pheatmap::pheatmap(mat = laml.og30.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures")

## ---- fig.width=6, fig.height=4, fig.align='center', eval = T-----------------
maftools::plotSignatures(nmfRes = laml.sig, title_size = 1.2, sig_db = "SBS")

## ---- eval=FALSE--------------------------------------------------------------
#  library("barplot3d")
#  #Visualize first signature
#  sig1 = laml.sig$signatures[,1]
#  barplot3d::legoplot3d(contextdata = sig1, labels = FALSE, scalexy = 0.01, sixcolors = "sanger", alpha = 0.5)

## ---- eval=T------------------------------------------------------------------
var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', 
                               tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene')


## -----------------------------------------------------------------------------
#Read sample ICGC data for ESCA
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])

## ---- eval=FALSE--------------------------------------------------------------
#  laml.mutsig.corrected = prepareMutSig(maf = laml)
#  # Converting gene names for 1 variants from 1 genes
#  #    Hugo_Symbol MutSig_Synonym N
#  # 1:    ARHGAP35          GRLF1 1
#  # Original symbols are preserved under column OG_Hugo_Symbol.

## -----------------------------------------------------------------------------
#Extract data for samples 'TCGA.AB.3009' and 'TCGA.AB.2933'  (Printing just 5 rows for display convenience)
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'), mafObj = FALSE)[1:5]
##Same as above but return output as an MAF object (Default behaviour)
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'))

## -----------------------------------------------------------------------------
#Select all Splice_Site mutations from DNMT3A and NPM1
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE,query = "Variant_Classification == 'Splice_Site'")

#Same as above but include only 'i_transcript_name' column in the output
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE, query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name')

## -----------------------------------------------------------------------------
#Select all samples with FAB clasification M4 in clinical data 
laml_m4 = subsetMaf(maf = laml, clinQuery = "FAB_classification %in% 'M4'")

## ---- eval=FALSE--------------------------------------------------------------
#  BiocManager::install(pkgs = "PoisonAlien/TCGAmutations")

## -----------------------------------------------------------------------------
sessionInfo()

Try the maftools package in your browser

Any scripts or data that you put into this service are public.

maftools documentation built on Feb. 6, 2021, 2 a.m.