knitr::opts_chunk$set(echo = FALSE)
require(tidyverse)
require(maftools)
require(circlize)
require(data.table)
require(rtracklayer)
require(RMariaDB)
require(ggbeeswarm)
require(DBI)
require(ComplexHeatmap)
require(gridExtra)
require(ggsci)

What is GAMBLR?

How does it work?

Where can you learn more?

Getting started: metadata and collated results

\tiny

#library(GAMBLR)
just_metadata = get_gambl_metadata() 
dim(just_metadata)

just_metadata %>%
    dplyr::filter(cohort == "DLBCL_LSARP_Trios") %>% 
    dplyr::select(patient_id, sample_id, biopsy_id, time_point) %>%
    head(3) %>%
    knitr::kable("latex", row.names = FALSE) 

\normalsize

Extended metadata: outcomes

\tiny

outcome_meta = get_gambl_metadata(with_outcomes = TRUE) 

outcome_meta %>% 
  dplyr::select(CODE_OS, OS_YEARS, CODE_PFS, PFS_YEARS) %>% 
  head(6) %>%
  knitr::kable("latex", row.names = FALSE) 

\normalsize

Extended metadata: derived results

\tiny

collate_results()  %>% 
  dplyr::select(ashm_MYC, NFKBIZ_UTR, manta_BCL6_partner) %>% 
  dplyr::filter(NFKBIZ_UTR != "NEG" & manta_BCL6_partner != "NEG") %>%
  head(4) %>%
  knitr::kable("latex", row.names = FALSE) 

\normalsize

Bundled data

Getting and annotating SVs

\tiny

get_manta_sv() %>%
    annotate_sv(with_chr_prefix = TRUE) %>% # Add chr prefix
    dplyr::filter(!is.na(partner)) %>% # Drop SVs without known partners
    dplyr::filter(gene == "BCL6") %>% # Keep the SVs you care about
    head(5) %>%
    dplyr::select(chrom1, start1, tumour_sample_id, fusion) %>% 
    knitr::kable("latex", row.names = FALSE) 

\normalsize get_sv retrieves and annotate_sv quickly annotates all the SVs in GAMBL Input is bedpe format (can be your own non-GAMBL data!) Expects hg19 bedpe but if you have hg38 bedpe, check out liftover_bedpe Some tables return tumour_sample_id as an alias for sample_id (to disambiguate from normal_sample_id)

Getting and using coding SSMs

\tiny

# get all the NFKBIZ 3' UTR mutations
nfkbiz_ssm = get_ssm_by_region(region = "chr3:101,578,215-101,578,366")

ggplot(nfkbiz_ssm) + 
  geom_histogram(aes(x = Start_Position)) + 
  theme_minimal()

\normalsize

Getting and using coding SSMs

\tiny

maf_data = get_coding_ssm() # Everything at once
maf_obj = read.maf(maf_data, clinicalData = just_metadata) 

# Or 
base_dir = config::get("project_base")
maf_file = paste0(base_dir, "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf")

# Combine MAF data and metadata into maftools object
maf_obj = read.maf(maf_file, verbose = FALSE)

plotmafSummary(maf_obj)

\normalsize

Getting and using coding SSMs

\tiny

base_dir = config::get("project_base")
maf_file = paste0(base_dir, "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf")

# Combine MAF data and metadata into maftools object
maf_obj = read.maf(maf_file, verbose = FALSE)

plotmafSummary(maf_obj)

Visualization of aSHM patterns

\tiny

cols = get_gambl_colours() # Get one of the custom colour palettes or all colours together

some_meta = get_gambl_metadata() %>% # Narrow down to cases we want to look at
  dplyr::filter(pathology %in% c("DLBCL", "BL", "FL", "HGBL")) %>% 
  arrange(bcl2_ba, myc_ba)

ashm_multi_rainbow_plot(regions_to_display = c("BCL2-TSS","MYC-TSS"), 
                        custom_colours = cols,
                        metadata = some_meta,
                        classification_column = "lymphgen")

\normalsize

Visualization of aSHM patterns

cols = get_gambl_colours() # Get one of the custom colour palettes or all colours together

some_meta = get_gambl_metadata() %>% # Narrow down to cases we want to look at
  dplyr::filter(pathology %in% c("DLBCL", "BL", "FL", "HGBL")) %>% 
  arrange(bcl2_ba, myc_ba)

ashm_multi_rainbow_plot(regions_to_display = c("BCL2-TSS","MYC-TSS"), 
                        custom_colours = cols,
                        metadata = some_meta,
                        classification_column = "pathology")

Ugly (a.k.a. Maftools) Oncoplots

\tiny

bl_genes = c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13","TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X")
dlbcl_genes = c("EZH2", "KMT2D", "MEF2B", "CREBBP", "MYD88")
gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes)))
just_metadata = dplyr::filter(just_metadata, !lymphgen %in% c("COMPOSITE"))
genes = c(bl_genes, dlbcl_genes)
names(gene_groups) = genes
oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE)

\normalsize

Pretty (not Maftools) Oncoplots

\tiny

bl_genes=c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13", "TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X")
dlbcl_genes = c("EZH2", "KMT2D", "MEF2B", "CREBBP", "MYD88")
gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes)))
just_metadata = dplyr::filter(just_metadata,!lymphgen %in% c("COMPOSITE"))
genes = c(bl_genes, dlbcl_genes)
names(gene_groups) = genes

# oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE) 

# You would need to run oncoplot() at least once with the data you want to send to prettyOncoplot
prettyOncoplot(maftools_obj = maf_obj,
               genes = genes,
               these_samples_metadata = just_metadata,
               metadataColumns = c("pathology", "lymphgen", "sex", "EBV_status_inf", "cohort"),
               sortByColumns = c("pathology", "sex", "lymphgen", "EBV_status_inf", "cohort"),
               keepGeneOrder = TRUE, 
               splitGeneGroups = gene_groups,
               splitColumnName = "pathology",
               metadataBarHeight = 5,
               metadataBarFontsize = 8,
               fontSizeGene = 11,
               recycleOncomatrix = TRUE,
               include_noncoding = NULL,
               removeNonMutated = FALSE)

\normalsize

Pretty (not Maftools) Oncoplots

\tiny

bl_genes = c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13", "TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X")
dlbcl_genes = c("EZH2", "KMT2D", "MEF2B", "CREBBP", "MYD88")
gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes)))

just_metadata = dplyr::filter(just_metadata,!lymphgen %in% c("COMPOSITE")) %>% 
  dplyr::filter(pathology %in% c("DLBCL","BL"))

genes = c(bl_genes, dlbcl_genes)
names(gene_groups) = genes
# oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE) 

# You would need to run oncoplot() at least once with the data you want to send to prettyOncoplot
prettyOncoplot(maftools_obj = maf_obj,
               genes = genes,
               these_samples_metadata = just_metadata,
               metadataColumns = c("pathology", "lymphgen", "sex", "EBV_status_inf", "cohort"),
               sortByColumns = c("pathology", "sex" ,"lymphgen", "EBV_status_inf", "cohort"),
               keepGeneOrder = TRUE,
               splitGeneGroups = gene_groups,
               splitColumnName = "pathology",
               metadataBarHeight = 5,
               metadataBarFontsize = 8,
               fontSizeGene = 11,
               recycleOncomatrix = TRUE,
               include_noncoding = NULL,
               removeNonMutated = FALSE)

\normalsize

What's with the colours?

plot_cols = function(include_nhl = FALSE, remove_composite = TRUE){
  path_cols = ggsci::get_ash("b-cell")

  path_df = data.frame(Pathology = factor(names(path_cols), levels = names(path_cols)), hex = path_cols) %>%
    dplyr::filter(!Pathology %in% c("B-ALL", "PMBCL"))

  nhl = ggplot(path_df, aes(x = Pathology, y = 0, fill = hex, label = Pathology)) +
    geom_tile(width = 0.9, height = 1) +
    geom_text(color = "white", fontface = "bold") +
    scale_fill_identity(guide = "none") +
    coord_flip() + 
    theme_void() +
    labs(title = "B-NHL") +
    theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold"))

  coo_cols = ggsci::get_ash("coo")
  coo_df = data.frame(COO = factor(names(coo_cols), levels = names(coo_cols)), hex = coo_cols) %>%
    dplyr::filter(!COO == "U" & !COO == "UNC")

  coo = ggplot(coo_df, aes(x = COO, y = 0, fill = hex, label = COO)) +
    geom_tile(width = 0.9, height = 1) +
    geom_text(color = "white", fontface="bold") +
    scale_fill_identity(guide = "none") +
    coord_flip() +
    theme_void() +
    labs(title = "COO") +
    theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold"))

  harvard_cols = ggsci::get_ash("harvard")

  harvard_df = data.frame(Harvard = factor(names(harvard_cols), levels = names(harvard_cols)), hex = harvard_cols)

  harvard = ggplot(harvard_df, aes(x = Harvard, y = 0, fill = hex, label = Harvard)) +
    geom_tile(width = 0.9, height = 1) +
    geom_text(color = "white", fontface = "bold") +
    scale_fill_identity(guide = "none") +
    coord_flip() +
    theme_void() +
    labs(title = "Harvard") +
    theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold"))

  lymphgen_cols = ggsci::get_ash("lymphgen")
  # lymphgen_cols = c("#52000F", lymphgen_cols)
  # names(lymphgen_cols)[1] = "EZB-M+"

  lymphgen_df = data.frame(LymphGen = factor(names(lymphgen_cols), levels = names(lymphgen_cols)), hex = lymphgen_cols)
  if(remove_composite){
    lymphgen_df = dplyr::filter(lymphgen_df, LymphGen %in% c("Other", "A53", "N1", "BN2", "MCD", "ST2", "EZB", "EZB-MYC"))
  }

  lymphgen = ggplot(lymphgen_df, aes(x = LymphGen, y = 0, fill = hex, label = LymphGen)) +
    geom_tile(width = 0.9, height = 1) +
    geom_text(color = "white", fontface = "bold") +
    scale_fill_identity(guide = "none") +
    coord_flip() +
    theme_void() +
    labs(title = "LymphGen") +
    theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold"))

  hmrn_cols = ggsci::get_ash("hmrn")
  hmrn_df = data.frame(HMRN = factor(names(hmrn_cols), levels = names(hmrn_cols)), hex = hmrn_cols)

  hmrn = ggplot(hmrn_df, aes(x = HMRN, y = 0, fill = hex, label = HMRN)) +
    geom_tile(width = 0.9, height = 1) +
    geom_text(color = "white", fontface = "bold") +
    scale_fill_identity(guide = "none") +
    coord_flip() +
    theme_void() +
    labs(title = "HMRN") +
    theme(plot.title = element_text(lineheight = 0.9, hjust = 0.5, face = "bold"))

  if(include_nhl){
    grid.arrange(nhl, coo, harvard, lymphgen, hmrn, ncol = 5)
  }
  else{
    grid.arrange(coo, harvard, lymphgen, hmrn, ncol = 5)
  }
}
plot_cols(include_nhl = TRUE)

This doesn't work for gene expression data, right?

\tiny

get_gene_expression(hugo_symbols = c("IRF4", "MYC"), join_with = "genome") %>%
  head(4) %>%
  knitr::kable("latex", row.names = FALSE) 

\normalsize

What can I use this for?

\tiny

all_exp = get_gene_expression(hugo_symbols = c("IRF4", "MYC"), join_with = "genome") 

collated_meta = collate_results(join_with_full_metadata = TRUE) %>%
  dplyr::filter(!lymphgen %in% c("COMPOSITE")) %>% 
  dplyr::filter(pathology %in% c("DLBCL", "BL")) # Get everything together

exp_with_meta = left_join(all_exp, collated_meta) %>%
  dplyr::filter(!is.na(MYC) & !is.na(ashm_MYC)) %>%
  mutate(myc_hypermutated = ifelse(ashm_MYC > 1, "POS", "NEG"))

ggplot(exp_with_meta, aes(x = myc_hypermutated, y = MYC, colour = manta_MYC_sv)) + 
  geom_boxplot() +
  geom_quasirandom(dodge.width = 0.8) + 
  scale_color_manual(values = get_gambl_colours()) +
  facet_wrap(~pathology, ncol = 3)

\normalsize

How can I work with copy number?

Per-sample copy number and VAF visualization

\tiny

copy_number_vaf_plot(this_sample = "HTMCP-01-06-00422-01A-01D")

\normalsize

Optional feature: coding mutation focus

\tiny

my_genes = GAMBLR.data::lymphoma_genes %>% # Use the bundled list of lymphoma genes
  dplyr::filter(BL == TRUE | DLBCL == TRUE) %>% 
  pull(Gene)

copy_number_vaf_plot(this_sample = "07-35482T", coding_only = TRUE, genes_to_label = my_genes)

\normalsize

PrettyOncoplots with expression data

some_genes = unique(c("BCL6", "MYC", "IRF4", "BEST3", "CREB3L2", "HEY2", "TNFRSF13B", "FCRLB", "TOX", "SERPINA9", "MAPK10", "PDGFD", "PDE4B", "SUGCT", "SLC1A1", "MME", "CCND3"))
some_genes = c("IRF4", "BANK1", "ABI3", "CCDC50", "S1PR2", "ITPKB", "SERPINA9", "TNFRSF13B")
all_exp = get_gene_expression(hugo_symbols = some_genes, join_with = "genome")

all_metadata = collate_results(join_with_full_metadata = TRUE, sbs_manipulation = "scale") %>% 
  dplyr::filter(pathology %in% c("BL", "DLBCL"))

extra_meta = left_join(all_exp, all_metadata, by = "sample_id") %>% 
  dplyr::filter(!is.na(patient_id)) %>% 
  dplyr::filter(!is.na(IRF4))

bl_cases = get_gambl_metadata(case_set = "BLGSP-study") %>%
  dplyr::filter(cohort == "BL_Adult") %>%
  head()

just_metadata = get_gambl_metadata()
# extra_meta_scaled_everything = extra_meta %>% 
#   mutate(across(all_of(some_genes), ~ trim_scale_expression(.x)))

bl_genes = c("MYC", "ID3", "TP53", "ARID1A", "FBXO11", "GNA13", "TCF3", "TFAP4", "HNRNPU", "FOXO1", "CCND3", "SMARCA4", "DDX3X")
dlbcl_genes = c("EZH2", "KMT2D", "CREBBP")
gene_groups = c(rep("BL", length(bl_genes)), rep("DLBCL", length(dlbcl_genes)))
just_metadata = dplyr::filter(just_metadata, !lymphgen %in% c("COMPOSITE")) %>% 
  dplyr::filter(pathology %in% c("DLBCL", "BL"))

genes = c(bl_genes, dlbcl_genes)
names(gene_groups) = genes

# oncoplot(maf_obj, writeMatrix = TRUE, genes = genes, removeNonMutated = FALSE) 
# You would need to run oncoplot() at least once with the data you want to send to prettyOncoplot

extra_meta = mutate(extra_meta, DLBCL90_dhitsig_call = ifelse(is.na(DLBCL90_dhitsig_call), "NA", DLBCL90_dhitsig_call))

# extra_meta = mutate(extra_meta, sbs9 = ifelse(is.na(sbs9), 0, sbs9))

probe_level = read_tsv("/projects/rmorin/projects/gambl-repos/gambl-rmorin/data/metadata/raw_metadata/blgsp_dlbcl90_byprobe.tsv") %>% 
  select(-1, -3, -4, -7, -8, -normval, -pmblval, -pmblp, -pmblcall, -dlbclval, -dlbclp, -dlbclcall, -totalcall, -dhitsig_score, -dhitsig_class,    
         -dhitsig_prob_pos, -dhitsig_prob_neg, -bcl6)

extra_meta = left_join(extra_meta, probe_level, by = c("patient_id" = "sample_id2"))

cluster_bl = read_tsv("/projects/rmorin/adult_blgsp/results_manuscript/NMF_clustering_non_redundant_hotspots.tsv") %>%
  dplyr::select(-pathology, -cohort)

extra_meta = left_join(extra_meta, cluster_bl)

prettyOncoplot(maftools_obj = maf_obj,
               genes = genes,
               these_samples_metadata = extra_meta,
               metadataColumns = c("pathology", "COO_consensus", "cluster_name", "EBV_status_inf"),
               sortByColumns = c("pathology", "cluster_name", "IRF4", "COO_consensus"),
               expressionColumns = c("IRF4", "irf4"),
               keepGeneOrder = FALSE, 
               splitGeneGroups = gene_groups,
               splitColumnName = "pathology",
               metadataBarHeight = 5,
               metadataBarFontsize = 8,
               fontSizeGene = 11,
               recycleOncomatrix = TRUE,
               include_noncoding = NULL,
               removeNonMutated = FALSE)
groups=c("TP53" = "TP53", "ID3" = "ICS", "CCND3" = "ICS", "SMARCA4" = "ICS", "TCF3" = "ICS", "P2RY8" = "ICS",
         "DDX3X" = "DGS", "GNA13" = "DGS", "GNAI2" = "DGS", "HNRNPU" = "DGS", "SIN3A" = "DGS", "ARID1A" = "BL",
         "FOXO1" = "BL", "FBXO11" = "BL", "RHOA" = "BL", "KMT2D" = "DLBCL", "EZH2" = "DLBCL", "TET2" = "DLBCL", 
         "KLHL6" = "DLBCL")

bl_genes = names(groups)

prettyOncoplot(maftools_obj = maf_obj,
               genes = bl_genes,
               these_samples_metadata = extra_meta,
               metadataColumns = c("pathology", "COO_consensus", "cluster_name", "EBV_status_inf"),
               sortByColumns = c("IRF4"),
               expressionColumns = c("IRF4", "irf4"),
               keepGeneOrder = FALSE,
               splitGeneGroups = groups,
               splitColumnName = "cluster_name",
               metadataBarHeight = 5,
               metadataBarFontsize = 8,
               fontSizeGene = 11,
               recycleOncomatrix = TRUE,
               include_noncoding = NULL,
               removeNonMutated = FALSE)


morinlab/GAMBLR documentation built on Feb. 10, 2024, 12:11 a.m.