inst/doc/musicatk.R

## ----setup, include=FALSE, results = "asis"-----------------------------------
BiocStyle::markdown()
knitr::opts_chunk$set(echo = TRUE, dev = "png")

## ---- eval= FALSE-------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE)){
#      install.packages("BiocManager")}
#  BiocManager::install("musicatk")

## ---- eval = FALSE------------------------------------------------------------
#  if (!requireNamespace("devtools", quietly=TRUE)){
#      install.packages("devtools")}
#  
#  library(devtools)
#  install_github("campbio/musicatk")

## ---- eval = TRUE, message = FALSE--------------------------------------------
library(musicatk)

## ----extract_variants, message = FALSE----------------------------------------
# Extract variants from a MAF File
lusc_maf <- system.file("extdata", "public_TCGA.LUSC.maf", package = "musicatk") 
lusc.variants <- extract_variants_from_maf_file(maf_file = lusc_maf)

# Extract variants from an individual VCF file
luad_vcf <- system.file("extdata", "public_LUAD_TCGA-97-7938.vcf", 
                         package = "musicatk")
luad.variants <- extract_variants_from_vcf_file(vcf_file = luad_vcf)

# Extract variants from multiple files and/or objects
melanoma_vcfs <- list.files(system.file("extdata", package = "musicatk"), 
                           pattern = glob2rx("*SKCM*vcf"), full.names = TRUE)
variants <- extract_variants(c(lusc_maf, luad_vcf, melanoma_vcfs))

## ----select_genome------------------------------------------------------------
g <- select_genome("hg38")

## ----create_musica------------------------------------------------------------
musica <- create_musica(x = variants, genome = g)

## ----build_tables-------------------------------------------------------------
build_standard_table(musica, g = g, table_name = "SBS96")

## ----discover_sigs------------------------------------------------------------
result <- discover_signatures(musica = musica, table_name = "SBS96", 
                              num_signatures = 3, method = "lda", nstart = 10)

## ----result_accessors---------------------------------------------------------
# Extract the exposure matrix
expos <- exposures(result)
expos[1:3,1:3]

# Extract the signature matrix
sigs <- signatures(result)
sigs[1:3,1:3]

## ---- plot_sigs---------------------------------------------------------------
plot_signatures(result)

## ---- name_sigs---------------------------------------------------------------
name_signatures(result, c("Smoking", "APOBEC", "UV"))
plot_signatures(result)

## ----exposures_raw------------------------------------------------------------
plot_exposures(result, plot_type = "bar")

## ----exposures_prop-----------------------------------------------------------
plot_exposures(result, plot_type = "bar", proportional = TRUE)

## ----sample_counts------------------------------------------------------------
samples <- sample_names(musica)
plot_sample_counts(musica, sample_names = samples[c(3,4,5)], table_name = "SBS96")

## ----compare_cosmic-----------------------------------------------------------
compare_cosmic_v2(result, threshold = 0.75)

## ----predict_cosmic-----------------------------------------------------------

# Load COSMIC V2 data
data("cosmic_v2_sigs")

# Predict pre-existing exposures using the "lda" method
pred_cosmic <- predict_exposure(musica = musica, table_name = "SBS96",
                               signature_res = cosmic_v2_sigs,
                               signatures_to_use =  c(1, 4, 7, 13),
                               algorithm = "lda")

# Plot exposures
plot_exposures(pred_cosmic, plot_type = "bar")

## ----subtype_map--------------------------------------------------------------
cosmic_v2_subtype_map("lung")

## ----predict_previous---------------------------------------------------------
pred_our_sigs <- predict_exposure(musica = musica, table_name = "SBS96",
                                 signature_res = result, algorithm = "lda")

## ----predict_compare----------------------------------------------------------
compare_results(result = pred_cosmic, other_result = pred_our_sigs, 
                threshold = 0.60)

## ----annotations--------------------------------------------------------------
annot <- read.table(system.file("extdata", "sample_annotations.txt", 
                                package = "musicatk"), sep = "\t", header=TRUE)
samp_annot(result, "Tumor_Subtypes") <- annot$Tumor_Subtypes

## ----sample_names-------------------------------------------------------------
sample_names(result)

## ----plot_exposures_by_subtype------------------------------------------------
plot_exposures(result, plot_type = "bar", group_by = "annotation", 
               annotation = "Tumor_Subtypes")

## ----plot_exposures_box_annot-------------------------------------------------
plot_exposures(result, plot_type = "box", group_by = "annotation", annotation = "Tumor_Subtypes")

## ----plot_exposures_box_sig---------------------------------------------------
plot_exposures(result, plot_type = "box", group_by = "signature", 
               color_by = "annotation", annotation = "Tumor_Subtypes")

## ----umap_create--------------------------------------------------------------
create_umap(result = result)

## ----umap_plot----------------------------------------------------------------
plot_umap(result = result)

## ----umap_plot_same_scale-----------------------------------------------------
plot_umap(result = result, same_scale = FALSE)

## ----umap_plot_annot----------------------------------------------------------
plot_umap(result = result, color_by = "annotation", 
          annotation = "Tumor_Subtypes", add_annotation_labels = TRUE)

## ----plotly-------------------------------------------------------------------
plot_signatures(result, plotly = TRUE)
plot_exposures(result, plotly = TRUE)
plot_umap(result, plotly = TRUE)

## ----reproducible_prediction--------------------------------------------------
seed <- 1
reproducible_prediction <- withr::with_seed(seed, 
                                            predict_exposure(musica = musica, 
                                                         table_name = "SBS96",
                                 signature_res = result, algorithm = "lda"))

## ----combine_tables-----------------------------------------------------------
data(dbs_musica)

build_standard_table(dbs_musica, g, "SBS96")
build_standard_table(dbs_musica, g, "DBS")

combine_count_tables(musica = dbs_musica, to_comb = c("SBS96", "DBS"), 
                     name = "sbs_dbs", description = "An example combined 
                     table, combining SBS96 and DBS", overwrite = TRUE)

## -----------------------------------------------------------------------------
annotate_transcript_strand(musica, "19", build_table = FALSE)
build_custom_table(musica = musica, variant_annotation = "Transcript_Strand", 
                   name = "Transcript_Strand", 
                   description = "A table of transcript strand of variants", 
                   data_factor = c("T", "U"), overwrite = TRUE)

## ----session------------------------------------------------------------------
sessionInfo()

Try the musicatk package in your browser

Any scripts or data that you put into this service are public.

musicatk documentation built on Nov. 8, 2020, 5:16 p.m.