knitr::opts_chunk$set(echo = FALSE) require(tidyverse) require(maftools) require(circlize) require(data.table) require(rtracklayer) require(RMariaDB) require(ggbeeswarm) require(DBI) require(ComplexHeatmap) require(gridExtra) require(ggsci)
get_
" functions to retrieve a specific type of mutationsannotate_
" functions to add annotations to data on-the-fly using built-in or your own annotation definitionscollate_
" functions to summarize key results per-sample for additional annotations and analyses\tiny
#library(GAMBLR) just_metadata = get_gambl_metadata() dim(just_metadata) just_metadata %>% dplyr::filter(cohort == "DLBCL_LSARP_Trios") %>% dplyr::select(patient_id, sample_id, biopsy_id, time_point) %>% head(3) %>% knitr::kable("latex", row.names = FALSE)
\normalsize
with_outcomes
\tiny
outcome_meta = get_gambl_metadata(with_outcomes = TRUE) outcome_meta %>% dplyr::select(CODE_OS, OS_YEARS, CODE_PFS, PFS_YEARS) %>% head(6) %>% knitr::kable("latex", row.names = FALSE)
\normalsize
\tiny
collate_results() %>% dplyr::select(ashm_MYC, NFKBIZ_UTR, manta_BCL6_partner) %>% dplyr::filter(NFKBIZ_UTR != "NEG" & manta_BCL6_partner != "NEG") %>% head(4) %>% knitr::kable("latex", row.names = FALSE)
\normalsize
grch37_partners
grch37_oncogene
grch37_ashm_regions
grch37_lymphoma_genes_bed
lymphoma_genes
\tiny
get_manta_sv() %>% annotate_sv(with_chr_prefix = TRUE) %>% # Add chr prefix dplyr::filter(!is.na(partner)) %>% # Drop SVs without known partners dplyr::filter(gene == "BCL6") %>% # Keep the SVs you care about head(5) %>% dplyr::select(chrom1, start1, tumour_sample_id, fusion) %>% knitr::kable("latex", row.names = FALSE)
\normalsize
get_sv
retrieves and annotate_sv
quickly annotates all the SVs in GAMBL
Input is bedpe format (can be your own non-GAMBL data!)
Expects hg19 bedpe but if you have hg38 bedpe, check out liftover_bedpe
Some tables return tumour_sample_id as an alias for sample_id (to disambiguate from normal_sample_id)
get_coding_ssm
get_ssm_by_region
if you only want variants in one regionget_ssm_by_regions
if you want to efficiently retrive for multiple regions\tiny
# get all the NFKBIZ 3' UTR mutations nfkbiz_ssm = get_ssm_by_region(region = "chr3:101,578,215-101,578,366") ggplot(nfkbiz_ssm) + geom_histogram(aes(x = Start_Position)) + theme_minimal()
\normalsize
config::get()
to see the contents of the config\tiny
maf_data = get_coding_ssm() # Everything at once maf_obj = read.maf(maf_data, clinicalData = just_metadata) # Or base_dir = config::get("project_base") maf_file = paste0(base_dir, "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf") # Combine MAF data and metadata into maftools object maf_obj = read.maf(maf_file, verbose = FALSE) plotmafSummary(maf_obj)
\normalsize
\tiny
base_dir = config::get("project_base") maf_file = paste0(base_dir, "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf") # Combine MAF data and metadata into maftools object maf_obj = read.maf(maf_file, verbose = FALSE) plotmafSummary(maf_obj)
ashm_multi_rainbow_plot
function is highly configurable and can plot mutation patterns across thousands of samples for one or many regions at once\tiny
cols = get_gambl_colours() # Get one of the custom colour palettes or all colours together some_meta = get_gambl_metadata() %>% # Narrow down to cases we want to look at dplyr::filter(pathology %in% c("DLBCL", "BL", "FL", "HGBL")) %>% arrange(bcl2_ba, myc_ba) ashm_multi_rainbow_plot(regions_to_display = c("BCL2-TSS","MYC-TSS"), custom_colours = cols, metadata = some_meta, classification_column = "lymphgen")
\normalsize
cols = get_gambl_colours() # Get one of the custom colour palettes or all colours together some_meta = get_gambl_metadata() %>% # Narrow down to cases we want to look at dplyr::filter(pathology %in% c("DLBCL", "BL", "FL", "HGBL")) %>% arrange(bcl2_ba, myc_ba) ashm_multi_rainbow_plot(regions_to_display = c("BCL2-TSS","MYC-TSS"), custom_colours = cols, metadata = some_meta, classification_column = "pathology")
sanitize_maf_data
that generates the same matrix from a maftools object and this can be shared with others without restrictionprettyOncoplot
can use either the maftools object or sanitized matrix as input so you don't need the original data to use it\tiny
bl_genes = c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13","TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X") dlbcl_genes = c("EZH2", "KMT2D", "MEF2B", "CREBBP", "MYD88") gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes))) just_metadata = dplyr::filter(just_metadata, !lymphgen %in% c("COMPOSITE")) genes = c(bl_genes, dlbcl_genes) names(gene_groups) = genes oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE)
\normalsize
\tiny
bl_genes=c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13", "TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X") dlbcl_genes = c("EZH2", "KMT2D", "MEF2B", "CREBBP", "MYD88") gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes))) just_metadata = dplyr::filter(just_metadata,!lymphgen %in% c("COMPOSITE")) genes = c(bl_genes, dlbcl_genes) names(gene_groups) = genes # oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE) # You would need to run oncoplot() at least once with the data you want to send to prettyOncoplot prettyOncoplot(maftools_obj = maf_obj, genes = genes, these_samples_metadata = just_metadata, metadataColumns = c("pathology", "lymphgen", "sex", "EBV_status_inf", "cohort"), sortByColumns = c("pathology", "sex", "lymphgen", "EBV_status_inf", "cohort"), keepGeneOrder = TRUE, splitGeneGroups = gene_groups, splitColumnName = "pathology", metadataBarHeight = 5, metadataBarFontsize = 8, fontSizeGene = 11, recycleOncomatrix = TRUE, include_noncoding = NULL, removeNonMutated = FALSE)
\normalsize
\tiny
bl_genes = c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13", "TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X") dlbcl_genes = c("EZH2", "KMT2D", "MEF2B", "CREBBP", "MYD88") gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes))) just_metadata = dplyr::filter(just_metadata,!lymphgen %in% c("COMPOSITE")) %>% dplyr::filter(pathology %in% c("DLBCL","BL")) genes = c(bl_genes, dlbcl_genes) names(gene_groups) = genes # oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE) # You would need to run oncoplot() at least once with the data you want to send to prettyOncoplot prettyOncoplot(maftools_obj = maf_obj, genes = genes, these_samples_metadata = just_metadata, metadataColumns = c("pathology", "lymphgen", "sex", "EBV_status_inf", "cohort"), sortByColumns = c("pathology", "sex" ,"lymphgen", "EBV_status_inf", "cohort"), keepGeneOrder = TRUE, splitGeneGroups = gene_groups, splitColumnName = "pathology", metadataBarHeight = 5, metadataBarFontsize = 8, fontSizeGene = 11, recycleOncomatrix = TRUE, include_noncoding = NULL, removeNonMutated = FALSE)
\normalsize
plot_cols = function(include_nhl = FALSE, remove_composite = TRUE){ path_cols = ggsci::get_ash("b-cell") path_df = data.frame(Pathology = factor(names(path_cols), levels = names(path_cols)), hex = path_cols) %>% dplyr::filter(!Pathology %in% c("B-ALL", "PMBCL")) nhl = ggplot(path_df, aes(x = Pathology, y = 0, fill = hex, label = Pathology)) + geom_tile(width = 0.9, height = 1) + geom_text(color = "white", fontface = "bold") + scale_fill_identity(guide = "none") + coord_flip() + theme_void() + labs(title = "B-NHL") + theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold")) coo_cols = ggsci::get_ash("coo") coo_df = data.frame(COO = factor(names(coo_cols), levels = names(coo_cols)), hex = coo_cols) %>% dplyr::filter(!COO == "U" & !COO == "UNC") coo = ggplot(coo_df, aes(x = COO, y = 0, fill = hex, label = COO)) + geom_tile(width = 0.9, height = 1) + geom_text(color = "white", fontface="bold") + scale_fill_identity(guide = "none") + coord_flip() + theme_void() + labs(title = "COO") + theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold")) harvard_cols = ggsci::get_ash("harvard") harvard_df = data.frame(Harvard = factor(names(harvard_cols), levels = names(harvard_cols)), hex = harvard_cols) harvard = ggplot(harvard_df, aes(x = Harvard, y = 0, fill = hex, label = Harvard)) + geom_tile(width = 0.9, height = 1) + geom_text(color = "white", fontface = "bold") + scale_fill_identity(guide = "none") + coord_flip() + theme_void() + labs(title = "Harvard") + theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold")) lymphgen_cols = ggsci::get_ash("lymphgen") # lymphgen_cols = c("#52000F", lymphgen_cols) # names(lymphgen_cols)[1] = "EZB-M+" lymphgen_df = data.frame(LymphGen = factor(names(lymphgen_cols), levels = names(lymphgen_cols)), hex = lymphgen_cols) if(remove_composite){ lymphgen_df = dplyr::filter(lymphgen_df, LymphGen %in% c("Other", "A53", "N1", "BN2", "MCD", "ST2", "EZB", "EZB-MYC")) } lymphgen = ggplot(lymphgen_df, aes(x = LymphGen, y = 0, fill = hex, label = LymphGen)) + geom_tile(width = 0.9, height = 1) + geom_text(color = "white", fontface = "bold") + scale_fill_identity(guide = "none") + coord_flip() + theme_void() + labs(title = "LymphGen") + theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold")) hmrn_cols = ggsci::get_ash("hmrn") hmrn_df = data.frame(HMRN = factor(names(hmrn_cols), levels = names(hmrn_cols)), hex = hmrn_cols) hmrn = ggplot(hmrn_df, aes(x = HMRN, y = 0, fill = hex, label = HMRN)) + geom_tile(width = 0.9, height = 1) + geom_text(color = "white", fontface = "bold") + scale_fill_identity(guide = "none") + coord_flip() + theme_void() + labs(title = "HMRN") + theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold")) if(include_nhl){ grid.arrange(nhl, coo, harvard, lymphgen, hmrn, ncol = 5) } else{ grid.arrange(coo, harvard, lymphgen, hmrn, ncol = 5) } } plot_cols(include_nhl = TRUE)
\tiny
get_gene_expression(hugo_symbols = c("IRF4", "MYC"), join_with = "genome") %>% head(4) %>% knitr::kable("latex", row.names = FALSE)
\normalsize
\tiny
all_exp = get_gene_expression(hugo_symbols = c("IRF4", "MYC"), join_with = "genome") collated_meta = collate_results(join_with_full_metadata = TRUE) %>% dplyr::filter(!lymphgen %in% c("COMPOSITE")) %>% dplyr::filter(pathology %in% c("DLBCL", "BL")) # Get everything together exp_with_meta = left_join(all_exp, collated_meta) %>% dplyr::filter(!is.na(MYC) & !is.na(ashm_MYC)) %>% mutate(myc_hypermutated = ifelse(ashm_MYC > 1, "POS", "NEG")) ggplot(exp_with_meta, aes(x = myc_hypermutated, y = MYC, colour = manta_MYC_sv)) + geom_boxplot() + geom_quasirandom(dodge.width = 0.8) + scale_color_manual(values = get_gambl_colours()) + facet_wrap(~pathology, ncol = 3)
\normalsize
\tiny
copy_number_vaf_plot(this_sample = "HTMCP-01-06-00422-01A-01D")
\normalsize
\tiny
my_genes = GAMBLR.data::lymphoma_genes %>% # Use the bundled list of lymphoma genes dplyr::filter(BL == TRUE | DLBCL == TRUE) %>% pull(Gene) copy_number_vaf_plot(this_sample = "07-35482T", coding_only = TRUE, genes_to_label = my_genes)
\normalsize
some_genes = unique(c("BCL6", "MYC", "IRF4", "BEST3", "CREB3L2", "HEY2", "TNFRSF13B", "FCRLB", "TOX", "SERPINA9", "MAPK10", "PDGFD", "PDE4B", "SUGCT", "SLC1A1", "MME", "CCND3")) some_genes = c("IRF4", "BANK1", "ABI3", "CCDC50", "S1PR2", "ITPKB", "SERPINA9", "TNFRSF13B") all_exp = get_gene_expression(hugo_symbols = some_genes, join_with = "genome") all_metadata = collate_results(join_with_full_metadata = TRUE, sbs_manipulation = "scale") %>% dplyr::filter(pathology %in% c("BL", "DLBCL")) extra_meta = left_join(all_exp, all_metadata, by = "sample_id") %>% dplyr::filter(!is.na(patient_id)) %>% dplyr::filter(!is.na(IRF4)) bl_cases = get_gambl_metadata(case_set = "BLGSP-study") %>% dplyr::filter(cohort == "BL_Adult") %>% head() just_metadata = get_gambl_metadata() # extra_meta_scaled_everything = extra_meta %>% # mutate(across(all_of(some_genes), ~ trim_scale_expression(.x))) bl_genes = c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13", "TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X") dlbcl_genes = c("EZH2", "KMT2D", "CREBBP") gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes))) just_metadata = dplyr::filter(just_metadata, !lymphgen %in% c("COMPOSITE")) %>% dplyr::filter(pathology %in% c("DLBCL", "BL")) genes = c(bl_genes, dlbcl_genes) names(gene_groups) = genes # oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE) # You would need to run oncoplot() at least once with the data you want to send to prettyOncoplot extra_meta = mutate(extra_meta, DLBCL90_dhitsig_call = ifelse(is.na(DLBCL90_dhitsig_call), "NA", DLBCL90_dhitsig_call)) # extra_meta = mutate(extra_meta, sbs9 = ifelse(is.na(sbs9), 0, sbs9)) probe_level = read_tsv("/projects/rmorin/projects/gambl-repos/gambl-rmorin/data/metadata/raw_metadata/blgsp_dlbcl90_byprobe.tsv") %>% select(-1, -3, -4, -7, -8, -normval, -pmblval, -pmblp, -pmblcall, -dlbclval, -dlbclp, -dlbclcall, -totalcall, -dhitsig_score, -dhitsig_class, -dhitsig_prob_pos, -dhitsig_prob_neg, -bcl6) extra_meta = left_join(extra_meta, probe_level, by = c("patient_id" = "sample_id2")) cluster_bl = read_tsv("/projects/rmorin/adult_blgsp/results_manuscript/NMF_clustering_non_redundant_hotspots.tsv") %>% dplyr::select(-pathology, -cohort) extra_meta = left_join(extra_meta, cluster_bl) prettyOncoplot(maftools_obj = maf_obj, genes = genes, these_samples_metadata = extra_meta, metadataColumns = c("pathology", "COO_consensus", "cluster_name", "EBV_status_inf"), sortByColumns = c("pathology", "cluster_name", "IRF4", "COO_consensus"), expressionColumns = c("IRF4", "irf4"), keepGeneOrder = FALSE, splitGeneGroups = gene_groups, splitColumnName = "pathology", metadataBarHeight = 5, metadataBarFontsize = 8, fontSizeGene = 11, recycleOncomatrix = TRUE, include_noncoding = NULL, removeNonMutated = FALSE)
groups=c("TP53" = "TP53", "ID3" = "ICS", "CCND3" = "ICS", "SMARCA4" = "ICS", "TCF3" = "ICS", "P2RY8" = "ICS", "DDX3X" = "DGS", "GNA13" = "DGS", "GNAI2" = "DGS", "HNRNPU" = "DGS", "SIN3A" = "DGS", "ARID1A" = "BL", "FOXO1" = "BL", "FBXO11" = "BL", "RHOA" = "BL", "KMT2D" = "DLBCL", "EZH2" = "DLBCL", "TET2" = "DLBCL", "KLHL6" = "DLBCL") bl_genes = names(groups) prettyOncoplot(maftools_obj = maf_obj, genes = bl_genes, these_samples_metadata = extra_meta, metadataColumns = c("pathology", "COO_consensus", "cluster_name", "EBV_status_inf"), sortByColumns = c("IRF4"), expressionColumns = c("IRF4", "irf4"), keepGeneOrder = FALSE, splitGeneGroups = groups, splitColumnName = "cluster_name", metadataBarHeight = 5, metadataBarFontsize = 8, fontSizeGene = 11, recycleOncomatrix = TRUE, include_noncoding = NULL, removeNonMutated = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.