inst/doc/waterfall_introduction.R

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Load the GenVisR package
library("GenVisR")
set.seed(426)

# Plot with the MAF file type specified (default)
# The mainRecurCutoff parameter is described in the next section
waterfall(brcaMAF, fileType="MAF", mainRecurCutoff=.05)

## ---- eval=FALSE, error=TRUE--------------------------------------------------
#  # read in a file from the genome modeling system
#  file <- read.delim("file.anno.tsv")
#  
#  # Plot the variant information via waterfall
#  waterfall(file, fileType="MGI")

## ---- fig.keep='all', fig.width=10, fig.height=7, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE, fig.show='hold', fig.cap="In cell e/e (second row/first column) two variants are present \'z\' and \'x\'. In the first plot variant \'z\' is considered more deleterious (top panel), In the second plot variant \'x\' is considered more deleterious (bottom panel).", out.width="50%", out.height="50%"----
# make sure seed is set to 426 to reproduce!
set.seed(426)

# Create a data frame of random elements to plot
inputData <- data.frame("sample"=sample(letters[1:5], 20, replace=TRUE), "gene"=sample(letters[1:5], 20, replace=TRUE), variant_class=sample(c("x", "y", "z"), 20, replace=TRUE))

# choose the most deleterious to plot with y being defined as the most deleterious
most_deleterious <- c("y", "z", "x")

# plot the data with waterfall using the "Custom" parameter
waterfall(inputData, fileType="Custom", variant_class_order = most_deleterious, mainXlabel = TRUE)

# change the most deleterious order
waterfall(inputData, fileType="Custom", variant_class_order = rev(most_deleterious), mainXlabel = TRUE)

## ----kable 1, echo=FALSE, error=TRUE, fig.cap="Hierarchy of variant sub types from most to least deleterious."----
library(knitr)
MGI <- c("nonsense", "frame_shift_del",
         "frame_shift_ins", "splice_site_del",
         "splice_site_ins", "splice_site",
         "nonstop", "in_frame_del", "in_frame_ins",
         "missense", "splice_region_del",
         "splice_region_ins", "splice_region",
         "5_prime_flanking_region",
         "3_prime_flanking_region",
         "3_prime_untranslated_region",
         "5_prime_untranslated_region", "rna",
         "intronic", "silent")
MAF <- c("Nonsense_Mutation", "Frame_Shift_Ins",
         "Frame_Shift_Del", "Translation_Start_Site",
         "Splice_Site", "Nonstop_Mutation",
         "In_Frame_Ins", "In_Frame_Del",
         "Missense_Mutation", "5\'Flank",
         "3\'Flank", "5\'UTR", "3\'UTR", "RNA", "Intron",
         "IGR", "Silent", "Targeted_Region", "", "")

kable(as.data.frame(cbind(MAF, MGI)))

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Plot the genes with mutatations in >= 20% of samples
waterfall(brcaMAF, fileType="MAF", mainRecurCutoff=.2)

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Define specific genes to plot
genes_to_plot <- c("ERBB2", "MAPK1", "CDKN1B", "PIK3CA")

# Plot the genes defined above
waterfall(brcaMAF, plotGenes=genes_to_plot)

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Define specific genes to plot
samples_to_plot <- c("TCGA-A1-A0SO-01A-22D-A099-09", "TCGA-A2-A0EU-01A-22W-A071-09", "TCGA-A2-A0ER-01A-21W-A050-09", "TCGA-A1-A0SI-01A-11D-A142-09", "TCGA-A2-A0D0-01A-11W-A019-09")

# Plot the samples defined above
waterfall(brcaMAF, plotSamples=samples_to_plot, mainRecurCutoff=.25)

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----

# plotting all genes with a mutation recurrence above 5%, limit to plot only the top 25 and remove silent mutations
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=25, rmvSilent=TRUE)

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE, fig.cap="Altering the coverage space dramatically affects the mutation burden calculation"----

# Alter the coverage space to whole genome space
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=25, coverageSpace=3200000000)

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE----
# Create a data frame specifying the mutation burden for each sample
tumor_sample <- unique(brcaMAF$Tumor_Sample_Barcode)
mutation_burden <- sample(1:10, length(tumor_sample), replace=TRUE)
mutation_rate <- data.frame(sample=tumor_sample, mut_burden=mutation_burden)

# Alter the coverage space to whole genome space
waterfall(brcaMAF, mutBurden=mutation_rate, mainRecurCutoff=.05, maxGenes=25)

## ---- fig.keep='last', fig.width=10, fig.height=6.5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE----
# Turn off plotting of the mutation burden subplot
waterfall(brcaMAF, plotMutBurden=FALSE, mainRecurCutoff=.05, maxGenes=25)

## ---- fig.keep='last', fig.width=14, fig.height=10, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE----
# Create clinical data
subtype <- c('lumA', 'lumB', 'her2', 'basal', 'normal')
subtype <- sample(subtype, 50, replace=TRUE)
age <- c('20-30', '31-50', '51-60', '61+')
age <- sample(age, 50, replace=TRUE)
sample <- as.character(unique(brcaMAF$Tumor_Sample_Barcode))
clinical <- as.data.frame(cbind(sample, subtype, age))

# Melt the clinical data into "long" format.
library(reshape2)
clinical <- melt(clinical, id.vars=c('sample'))

# create the waterfall plot with the corresponding clinical data
waterfall(brcaMAF, clinDat=clinical, mainRecurCutoff=.05, maxGenes=25)

## ---- fig.keep='last', fig.width=12, fig.height=7.5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE----

# Create clinical data
subtype <- c('lumA', 'lumB', 'her2', 'basal', 'normal')
subtype <- sample(subtype, 50, replace=TRUE)
age <- c('20-30', '31-50', '51-60', '61+')
age <- sample(age, 50, replace=TRUE)
sample <- as.character(unique(brcaMAF$Tumor_Sample_Barcode))
clinical <- as.data.frame(cbind(sample, subtype, age))

# Melt the clinical data into "long" format.
library(reshape2)
clinical <- melt(clinical, id.vars=c('sample'))

# create the waterfall plot altering various aesthetics in the clinical data
waterfall(brcaMAF, clinDat=clinical,
          clinVarCol=c('lumA'='blue4', 'lumB'='deepskyblue', 
                            'her2'='hotpink2', 'basal'='firebrick2',
                            'normal'='green4', '20-30'='#ddd1e7',
                            '31-50'='#bba3d0', '51-60'='#9975b9',
                            '61+'='#7647a2'), 
          mainRecurCutoff=.05, maxGenes=25,
          clinLegCol=2,
          clinVarOrder=c('lumA', 'lumB', 'her2', 'basal', 'normal',
                         '20-30', '31-50', '51-60', '61+'))


## ---- fig.keep='last', fig.width=14, fig.height=6.5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE----
# plot the proportion of mutation types oserved
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, plot_proportions = TRUE)

## ---- fig.keep='last', fig.width=14, fig.height=6.5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE----
# plot the proportion of mutation types oserved
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, plot_proportions = TRUE, proportions_type="TvTi")

## ---- fig.keep='last', fig.width=14, fig.height=6.5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE----
# Use the chromosome column in brcaMAF to label cells
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainLabelCol="Chromosome")

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', error=TRUE, tidy=TRUE----
# Use the amino_acid change column in brcaMAF to label cells 
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainLabelCol="amino_acid_change_WU", mainLabelAngle=90, mainLabelSize=3)

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', error=TRUE, tidy=TRUE----
# Label the x-axis 
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainXlabel=TRUE)

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Drop unused mutation types from the legend 
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainDropMut=TRUE)

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Increase the gene label size
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainDropMut=TRUE, main_geneLabSize=14)

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# make a custom colour pallete
custom_pallete <- c("#A069C7", "#9CD05B", "#C46839", "#97BDBD", "#513C4D", "#6B7644", "#C6587F")

# provide a custom colour pallete
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainDropMut=TRUE, mainPalette=custom_pallete)

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Increase the subplot size to be equal with the main plot
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainDropMut=TRUE, section_heights=c(1, 1))

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, fig.cap="As can be seen we have changed the theme via ggplot2 however this has overwritten the previously defined theme which had suppressed the x-axis labels and removed the rotation.", error=TRUE----
# load ggplot2
library(ggplot2)

# suppress the y axis labels in the mutation burden plot
mut_burden_layer <- theme(axis.ticks.y=element_blank(), axis.text.y=element_blank(), axis.title.y=element_blank())

# change the ggplot theme back to default in the main plot
main_layer <- theme_grey()

# Run waterfall with the new layer
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, mainDropMut=TRUE, mainLayer=main_layer, mutBurdenLayer=mut_burden_layer)

## ---- fig.keep='last', fig.width=14, fig.height=10, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Create clinical data
subtype <- c('lumA', 'lumB', 'her2', 'basal', 'normal')
subtype <- sample(subtype, 50, replace=TRUE)
age <- c('20-30', '31-50', '51-60', '61+')
age <- sample(age, 50, replace=TRUE)
sample <- as.character(unique(brcaMAF$Tumor_Sample_Barcode))
clinical <- as.data.frame(cbind(sample, subtype, age))

# Melt the clinical data into "long" format.
library(reshape2)
clinical <- melt(clinical, id.vars=c('sample'))

# Obtain a sample order corresponding to the clinical data
new_samp_order <- as.character(unique(clinical[order(clinical$variable, clinical$value),]$sample))

# create the waterfall plot with the corresponding clinical data
waterfall(brcaMAF, clinDat=clinical, mainRecurCutoff=.05, maxGenes=25, sampOrder=new_samp_order)

## ---- fig.keep='last', fig.width=14, fig.height=8.5, message=FALSE, warning=FALSE, results='hide', tidy=TRUE, error=TRUE----
# Define a custom gene order
new_gene_order <- c("MUC16", "MUC17", "MUC12", "RYR2", "PIK3CA", "TP53", "USH2A", "MLL3", "TTN", "LRP2")

# Increase the gene label size
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10, geneOrder=new_gene_order)

## ---- eval=FALSE, error=TRUE--------------------------------------------------
#  # Save a GenVisR plot
#  pdf(file="myplot.pdf", height=10, width=15)
#  waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10)
#  dev.off()

## ---- fig.keep='last', fig.width=5, fig.height=5, message=FALSE, warning=FALSE, error=TRUE, results='hide', tidy=TRUE, fig.cap="A small graphics device may cause grob collisions, here the device size is 5 by 5"----
# A GenVisR plot on a small graphics device
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10)

## ---- fig.keep='last', fig.width=10, fig.height=7, message=FALSE, warning=FALSE, results='hide', error=TRUE, tidy=TRUE, fig.cap="Increasing the size of the graphic device will alleviate this issue, here the device size is 10 by 7"----
# A GenVisR plot on a small graphics device
waterfall(brcaMAF, mainRecurCutoff=.05, maxGenes=10)

Try the GenVisR package in your browser

Any scripts or data that you put into this service are public.

GenVisR documentation built on Dec. 28, 2020, 2 a.m.