knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = 'left', fig.height = 5, fig.width = 10 )
library(maftools)
#path to TCGA LAML MAF file laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') #clinical information containing survival information and histology. This is optional laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') laml = read.maf(maf = laml.maf, clinicalData = laml.clin, verbose = FALSE)
#By default the function plots top20 mutated genes oncoplot(maf = laml, draw_titv = TRUE)
#One can use any colors, here in this example color palette from RColorBrewer package is used vc_cols = RColorBrewer::brewer.pal(n = 8, name = 'Paired') names(vc_cols) = c( 'Frame_Shift_Del', 'Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins', 'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del' ) print(vc_cols) oncoplot(maf = laml, colors = vc_cols, top = 10)
There are two ways one include CN status into MAF. 1. GISTIC results 2. Custom copy number table
Most widely used tool for copy number analysis from large scale studies is GISTIC and we can simultaneously read gistic results along with MAF. GISTIC generates numerous files but we need mainly four files all_lesions.conf_XX.txt
, amp_genes.conf_XX.txt
, del_genes.conf_XX.txt
, scores.gistic
where XX is confidence level. These files contain significantly altered genomic regions along with amplified and deleted genes respectively.
#GISTIC results LAML all.lesions = system.file("extdata", "all_lesions.conf_99.txt", package = "maftools") amp.genes = system.file("extdata", "amp_genes.conf_99.txt", package = "maftools") del.genes = system.file("extdata", "del_genes.conf_99.txt", package = "maftools") scores.gis = system.file("extdata", "scores.gistic", package = "maftools") #Read GISTIC results along with MAF laml.plus.gistic = read.maf( maf = laml.maf, gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE, verbose = FALSE, clinicalData = laml.clin )
oncoplot(maf = laml.plus.gistic, top = 10)
This plot shows frequent deletions in TP53 gene which is located on one of the significantly deleted locus 17p13.2.
In case there is no GISTIC results available, one can generate a table containing CN status for known genes in known samples. This can be easily created and read along with MAF file.
For example lets create a dummy CN alterations for DNMT3A
in random 20 samples.
set.seed(seed = 1024) barcodes = as.character(getSampleSummary(x = laml)[,Tumor_Sample_Barcode]) #Random 20 samples dummy.samples = sample(x = barcodes, size = 20, replace = FALSE) #Genarate random CN status for above samples cn.status = sample( x = c('ShallowAmp', 'DeepDel', 'Del', 'Amp'), size = length(dummy.samples), replace = TRUE ) custom.cn.data = data.frame( Gene = "DNMT3A", Sample_name = dummy.samples, CN = cn.status, stringsAsFactors = FALSE ) head(custom.cn.data) laml.plus.cn = read.maf(maf = laml.maf, cnTable = custom.cn.data, verbose = FALSE) oncoplot(maf = laml.plus.cn, top = 5)
leftBarData
, rightBarData
and topBarData
arguments can be used to display additional values as barplots. Below example demonstrates adding gene expression values and mutsig q-values as left and right side bars respectivelly.
#Selected AML driver genes aml_genes = c("TP53", "WT1", "PHF6", "DNMT3A", "DNMT3B", "TET1", "TET2", "IDH1", "IDH2", "FLT3", "KIT", "KRAS", "NRAS", "RUNX1", "CEBPA", "ASXL1", "EZH2", "KDM6A") #Variant allele frequcnies (Right bar plot) aml_genes_vaf = subsetMaf(maf = laml, genes = aml_genes, fields = "i_TumorVAF_WU", mafObj = FALSE)[,mean(i_TumorVAF_WU, na.rm = TRUE), Hugo_Symbol] colnames(aml_genes_vaf)[2] = "VAF" head(aml_genes_vaf) #MutSig results (Right bar plot) laml.mutsig = system.file("extdata", "LAML_sig_genes.txt.gz", package = "maftools") laml.mutsig = data.table::fread(input = laml.mutsig)[,.(gene, q)] laml.mutsig[,q := -log10(q)] #transoform to log10 head(laml.mutsig) oncoplot( maf = laml, genes = aml_genes, leftBarData = aml_genes_vaf, leftBarLims = c(0, 100), rightBarData = laml.mutsig, rightBarLims = c(0, 20) )
Annotations are stored in clinical.data
slot of MAF.
getClinicalData(x = laml)
Include FAB_classification
from clinical data as one of the sample annotations.
oncoplot(maf = laml, genes = aml_genes, clinicalFeatures = 'FAB_classification')
More than one annotations can be included by passing them to the argument clinicalFeatures
. Above plot can be further enhanced by sorting according to annotations. Custom colors can be specified as a list of named vectors for each levels.
#Color coding for FAB classification fabcolors = RColorBrewer::brewer.pal(n = 8,name = 'Spectral') names(fabcolors) = c("M0", "M1", "M2", "M3", "M4", "M5", "M6", "M7") fabcolors = list(FAB_classification = fabcolors) print(fabcolors) oncoplot( maf = laml, genes = aml_genes, clinicalFeatures = 'FAB_classification', sortByAnnotation = TRUE, annotationColor = fabcolors )
If you prefer to highlight mutations by a specific attribute, you can use additionalFeature
argument.
Example: Highlight all mutations where alt allele is C.
oncoplot(maf = laml, genes = aml_genes, additionalFeature = c("Tumor_Seq_Allele2", "C"))
Note that first argument (Tumor_Seq_Allele2) must a be column in MAF file, and second argument (C) is a value in that column. If you want to know what columns are present in the MAF file, use getFields
.
getFields(x = laml)
Genes can be auto grouped based on their Biological processess by setting pathways = 'auto'
or by providing custom pathway list in the form of a two column tsv file or a data.frame containing gene names and their corresponding pathway.
setting pathways = 'auto'
draws top 3 most affected pathways
oncoplot(maf = laml, pathways = "auto", gene_mar = 8, fontSize = 0.6)
pathways = data.frame( Genes = c( "TP53", "WT1", "PHF6", "DNMT3A", "DNMT3B", "TET1", "TET2", "IDH1", "IDH2", "FLT3", "KIT", "KRAS", "NRAS", "RUNX1", "CEBPA", "ASXL1", "EZH2", "KDM6A" ), Pathway = rep(c( "TSG", "DNAm", "Signalling", "TFs", "ChromMod" ), c(3, 6, 4, 2, 3)), stringsAsFactors = FALSE ) head(pathways) oncoplot(maf = laml, pathways = pathways, gene_mar = 8, fontSize = 0.6)
oncoplot( maf = laml.plus.gistic, draw_titv = TRUE, pathways = pathways, clinicalFeatures = c('FAB_classification', 'Overall_Survival_Status'), sortByAnnotation = TRUE, additionalFeature = c("Tumor_Seq_Allele2", "C"), leftBarData = aml_genes_vaf, leftBarLims = c(0, 100), rightBarData = laml.mutsig[,.(gene, q)], )
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.