R/1_rnaseq.R

Defines functions plot_tfea run_tfea plot_gsea run_gsea plot_rnaseq_goi dds_to_symbols get_unique_symbol_ids plot_rnaseq_volcano plot_rnaseq_venn find_gsea_overlap find_rnaseq_overlap identify_deg plot_rnaseq_pca vst_rnaseq count_rnaseq

# rnaseq.R

count_rnaseq <- function() {

  dds <- rnaseq.lf.hypoxia.molidustat::lf_hyp_bay_rnaseq

  SummarizedExperiment::colData(dds) <-
    SummarizedExperiment::colData(dds) %>%
    tibble::as_tibble() %>%
    dplyr::mutate(
      experiment = factor(experiment),
      oxygen = factor(oxygen, levels = c("21%", "0.5%"), labels = c("N", "H")),
      treatment = factor(treatment, labels = c("DMSO", "BAY")),
      group = stringr::str_c(oxygen, treatment, sep = "."),
      group = factor(group, levels = c("N.DMSO", "H.DMSO", "N.BAY", "H.BAY"))
    ) %>%
    as("DataFrame")

  rownames(SummarizedExperiment::colData(dds)) <- SummarizedExperiment::colData(dds)$id

  design <- ~ experiment + group

  dds <-
    DESeq2::DESeqDataSet(
      dds,
      design = design
    )

  keep <- rowSums(DESeq2::counts(dds)) > 1
  dds <- dds[keep, ]
  DESeq2::DESeq(dds)
}

vst_rnaseq <- function(dds) {
  vsd <- DESeq2::vst(dds, blind = FALSE)
  SummarizedExperiment::assay(vsd) <-
    limma::removeBatchEffect(SummarizedExperiment::assay(vsd), vsd$experiment)

  DESeq2::plotPCA(
    vsd,
    intgroup = "group",
    returnData = TRUE
  ) %>%
    dplyr::mutate(
      label = dplyr::case_when(
        group == "N.DMSO" ~ "21%\nDMSO",
        group == "H.DMSO" ~ "0.5%\nDMSO",
        group == "N.BAY" ~ "21%\nBAY",
        group == "H.BAY" ~ "0.5%\nBAY"
      )
    )

}

plot_rnaseq_pca <- function(pca_data) {

  percent_variance <- round(100 * attr(pca_data, "percentVar"))

  pair_clrs <- c(
    N.DMSO = "#b2df8a",
    H.DMSO = "#33a02c",
    N.BAY = "#cab2d6",
    H.BAY = "#6a3d9a"
  )

  ggplot2::ggplot(pca_data) +
    ggplot2::aes(
      x = PC1,
      y = PC2,
      color = group
    ) +
    ggforce::geom_mark_ellipse(
      ggplot2::aes(
        color = group,
        label = label
      ),
      expand = ggplot2::unit(2, "mm"),
      label.fontsize = 5,
      label.fontface = "plain",
      label.family = "Calibri",
      label.hjust = 0.5,
      label.buffer = ggplot2::unit(0, "mm"),
      label.margin = ggplot2::margin(-1.5, -1.5, -1.5, -1.5, "mm"),
      con.type = "none",
      show.legend = FALSE
    ) +
    ggplot2::geom_point(
      ggplot2::aes(fill = group),
      pch = 21,
      color = "white",
      size = 2,
      show.legend = FALSE
    ) +
    ggplot2::labs(
      x = paste0("PC1: ", percent_variance[1], "% variance"),
      y = paste0("PC2: ", percent_variance[2], "% variance")
    ) +
    ggplot2::scale_fill_manual(
      name = NULL,
      values = pair_clrs,
      labels = c("21% | DMSO", "0.5% | DMSO", "21% | BAY", "0.5% | BAY")
    ) +
    ggplot2::scale_color_manual(
      name = NULL,
      values = pair_clrs,
      labels = c("21% | DMSO", "0.5% | DMSO", "21% | BAY", "0.5% | BAY")
    ) +
    ggplot2::scale_x_continuous(expand = ggplot2::expansion(add = c(3, 3))) +
    ggplot2::scale_y_continuous(expand = ggplot2::expansion(add = c(2, 2))) +
    theme_plots() +
    ggplot2::theme(
      legend.position = "bottom",
      legend.key.size = ggplot2::unit(1, units = "lines")
    )

}

identify_deg <- function(dds, expr) {
  alpha <- 0.05
  fc <- 1

  mod_mat <- model.matrix(
    DESeq2::design(dds),
    SummarizedExperiment::colData(dds)
  )

  n.dmso <- colMeans(mod_mat[dds$group == "N.DMSO", ])
  n.bay <- colMeans(mod_mat[dds$group == "N.BAY", ])
  h.dmso <- colMeans(mod_mat[dds$group == "H.DMSO", ])
  h.bay <- colMeans(mod_mat[dds$group == "H.BAY", ])

  con <- eval(expr)
  # con <- (h.dmso - n.dmso) - (n.bay - n.dmso)

  annots <-
    tibble::as_tibble(SummarizedExperiment::rowData(dds), rownames = "row") %>%
    dplyr::select(row, hgnc_symbol, description)

  DESeq2::results(
    dds,
    contrast = con,
    alpha = alpha,
    lfcThreshold = log(fc, base = 2),
    tidy = TRUE,
    parallel = TRUE
  ) %>%
    dplyr::left_join(annots, by = "row") %>%
    dplyr::rename(symbol = hgnc_symbol) %>%
    dplyr::relocate(symbol, description, .after = "row") %>%
    dplyr::arrange(padj)
}

find_rnaseq_overlap <- function(dds) {
  bay <-
    identify_deg(dds, expr(n.bay - n.dmso)) %>%
    dplyr::filter(padj < 0.05)
  hyp <- identify_deg(dds, expr(h.dmso - n.dmso)) %>%
    dplyr::filter(padj < 0.05)

  nm <- rownames(SummarizedExperiment::assay(dds))
  bay_deg <- nm %in% bay$row
  hyp_deg <- nm %in% hyp$row
  tibble::tibble(nm, hyp_deg, bay_deg)
}

find_gsea_overlap <- function(a, b, src = "HALLMARK") {
  dplyr::full_join(a, b, by = c("source", "pathway")) %>%
    dplyr::select(source, pathway, padj.x, padj.y) %>%
    dplyr::filter(source %in% src) %>%
    dplyr::mutate(
      hyp_deg = padj.x < 0.05,
      bay_deg = padj.y < 0.05,
      dplyr::across(tidyselect::matches("_deg"), tidyr::replace_na, FALSE)
    )
}

plot_rnaseq_venn <- function(df, title) {
  ggplot2::ggplot(df) +
    ggplot2::aes(A = hyp_deg, B = bay_deg) +
    ggvenn::geom_venn(
      set_names = c("0.5%", "BAY"),
      digits = 0,
      show_percentage = TRUE,
      fill_color = clrs[c(2, 4)],
      fill_alpha = 0.5,
      stroke_size = 0.25,
      set_name_size = 6/ggplot2::.pt,
      text_size = 5/ggplot2::.pt
    ) +
    ggplot2::labs(title = title) +
    theme_plots() +
    ggplot2::coord_fixed(clip = "off") +
    ggplot2::theme(
      panel.border = ggplot2::element_blank(),
      axis.text = ggplot2::element_blank(),
      axis.ticks = ggplot2::element_blank(),
      plot.title = ggplot2::element_text(hjust = 0.5)
    )
}

plot_rnaseq_volcano <- function(results, gois = NULL, xlab = NULL) {

  df <-
    tibble::as_tibble(results)

  left <-
    df %>%
    dplyr::filter(log2FoldChange < 0) %>%
    dplyr::slice_min(stat, n = 10)

  right <-
    df %>%
    dplyr::filter(log2FoldChange > 0) %>%
    dplyr::slice_max(stat, n = 10)

  ggplot2::ggplot(df) +
    ggplot2::aes(
      # x = 2 ^ log2FoldChange,
      x = log2FoldChange,
      y = padj
    ) +
    ggrepel::geom_text_repel(
      data = left,
      ggplot2::aes(
        label = symbol,
        color = symbol %in% gois
      ),
      size = 5/ggplot2::.pt,
      max.overlaps = 20,
      segment.size = 0.1,
      nudge_x = -8 - left$log2FoldChange,
      hjust = 0,
      direction = "y",
      family = "Calibri",
      segment.color = "black",
      show.legend = FALSE
    ) +
    ggrepel::geom_text_repel(
      data = right,
      ggplot2::aes(
        label = symbol,
        color = symbol %in% gois
      ),
      size = 5/ggplot2::.pt,
      max.overlaps = 20,
      segment.size = 0.1,
      nudge_x = 8 - right$log2FoldChange,
      hjust = 1,
      direction = "y",
      segment.color = "black",
      family = "Calibri",
      show.legend = FALSE
    ) +
    ggplot2::geom_hex(
      bins = c(250, 15),
      show.legend = FALSE
    ) +
    ggplot2::scale_fill_viridis_c(trans = "log10") +
    ggplot2::scale_color_manual(values = c("black", "darkred")) +
    ggplot2::scale_y_continuous(trans = reverselog_trans(10)) +
    ggplot2::scale_x_continuous(
      breaks = seq(-6, 6, 2),
      labels = scales::math_format(2^.x),
      expand = ggplot2::expansion(mult = 0.3)
    ) +
    ggplot2::labs(
      x = xlab,
      y = "Adjusted P value"
    ) +
    theme_plots() +
    ggplot2::coord_cartesian(xlim = c(-6, 6), clip = "off")
}

get_unique_symbol_ids <- function(dds) {
  tibble::as_tibble(SummarizedExperiment::rowData(dds), rownames = "entrezid") %>%
    dplyr::filter(hgnc_symbol != "" & !is.na(hgnc_symbol)) %>%
    dplyr::group_by(hgnc_symbol) %>%
    dplyr::filter(baseMean == max(baseMean)) %>%
    dplyr::pull(entrezid)
}

dds_to_symbols <- function(dds, unique_symbol_ids) {
  df <- dds[rownames(dds) %in% unique_symbol_ids, ]
  rownames(df) <- SummarizedExperiment::rowData(df)$hgnc_symbol
  df
}

plot_rnaseq_goi <- function(dds, goi) {

  df <-
    dds[rownames(dds) %in% goi] %>%
    SummarizedExperiment::assay() %>%
    tibble::as_tibble(rownames = "symbol") %>%
    tidyr::pivot_longer(
      -symbol,
      names_to = "id",
      values_to = "count"
    ) %>%
    dplyr::left_join(
      dplyr::select(tibble::as_tibble(SummarizedExperiment::colData(dds)), id:group),
      by = "id",
      copy = TRUE
    ) %>%
    dplyr::mutate(oxygen = factor(oxygen, levels = c("N", "H"), labels = c("21%", "0.5%")))

  ggplot2::ggplot(df) +
    ggplot2::facet_wrap(~ symbol, scales = "free_y", nrow = 1) +
    ggplot2::aes(
      x = treatment,
      y = count/1000,
      fill = oxygen
    ) +
    ggplot2::stat_summary(
      geom = "col",
      fun = "mean",
      position = ggplot2::position_dodge(width = 0.6),
      width = 0.6,
      show.legend = TRUE
    ) +
    ggplot2::stat_summary(
      ggplot2::aes(group = oxygen),
      geom = "errorbar",
      fun.data = "mean_se",
      position = ggplot2::position_dodge(width = 0.6),
      width = 0.2,
      size = 0.25,
      show.legend = FALSE
    ) +
    ggplot2::scale_fill_manual(values = clrs, limits = force) +
    ggplot2::labs(
      x = "Treatment",
      y = expression(paste("Count (x", 10^3, ")")),
      fill = NULL
    ) +
    ggplot2::ylim(c(0, NA)) +
    theme_plots() +
    ggplot2::theme(
      legend.key.width = ggplot2::unit(0.5, "lines"),
      legend.key.height = ggplot2::unit(0.5, "lines"),
      legend.position = "bottom",
      legend.box.margin = ggplot2::margin(t = -10)
    ) +
    NULL
}

run_gsea <- function(results) {

  pathways <-
    fgsea::gmtPathways("~/Dropbox (Partners HealthCare)/msigdb_v7.2/msigdb_v7.2_GMTs/msigdb.v7.2.entrez.gmt")

  rnks <-
    results %>%
    dplyr::select(row, stat) %>%
    dplyr::arrange(stat) %>%
    tibble::deframe()

  fgsea::fgsea(
    pathways = pathways,
    stats = rnks,
    nPermSimple = 10000,
    eps = 0,
    BPPARAM = BiocParallel::bpparam()
  ) %>%
    tibble::as_tibble() %>%
    dplyr::arrange(desc(NES)) %>%
    tidyr::separate(pathway, c("source", "pathway"), "_", extra = "merge")
}

plot_gsea <- function(rnaseq_gsea, sources, lbls, vals) {
  rnaseq_gsea %>%
    dplyr::filter(padj < 0.05) %>%
    dplyr::filter(source %in% sources) %>%
    dplyr::select(source, pathway, NES) %>%
    dplyr::mutate(
      pathway = stringr::str_replace_all(pathway, "_", " "),
      pathway = tolower(pathway)
    ) %>%
    dplyr::distinct() %>%
    ggplot2::ggplot() +
    ggplot2::aes(
      x = NES,
      y = reorder(pathway, NES)
    ) +
    ggplot2::geom_col(
      ggplot2::aes(fill = NES > 0)
    ) +
    ggplot2::labs(
      x = "Normalized Enrichment Score",
      y = NULL
    ) +
    ggplot2::scale_y_discrete(position = "right") +
    ggplot2::scale_fill_manual(
      name = NULL,
      labels = lbls,
      values = vals
    ) +
    theme_plots() +
    ggplot2::theme(
      legend.key.width = ggplot2::unit(0.5, "lines"),
      legend.key.height = ggplot2::unit(0.5, "lines"),
      legend.position = "bottom",
      legend.box.margin = ggplot2::margin(t = -10)
    ) +
    NULL
}

run_tfea <- function(df)
{
  df <- dplyr::rename(df, Genes = row)

  load("~/Dropbox (Partners HealthCare)/RM2020_GH_doubleElite.rdata")
  TFEA.ChIP::set_user_data(MetaData, Mat01)

  de_genes <-
    TFEA.ChIP::Select_genes(
      df,
      min_LFC = 0.5
    )

  ctl_genes <-
    TFEA.ChIP::Select_genes(
      df,
      min_pval = 0.5,
      max_pval = 1,
      min_LFC = -0.25,
      max_LFC = 0.25
    )

  TFEA.ChIP::contingency_matrix(de_genes, ctl_genes) %>%
    TFEA.ChIP::getCMstats() %>%
    TFEA.ChIP::rankTFs(rankMethod = "gsea")
}

plot_tfea <- function(rnaseq_tfea)
{
  df <-
    rnaseq_tfea %>%
    dplyr::mutate(padj = p.adjust(pVal, method = "fdr")) %>%
    dplyr::filter(padj < 0.05)

  ggplot2::ggplot(df) +
    ggplot2::aes(
      y = reorder(TF, ES),
      x = ES,
      fill = ES > 0
    ) +
    ggplot2::geom_col(
      show.legend = TRUE
    ) +
    ggplot2::labs(
      y = NULL,
      x = "Enrichment score"
    ) +
    ggplot2::scale_y_discrete(position = "right") +
    ggplot2::scale_fill_manual(
      name = NULL,
      labels = c("With BAY", "With Hypoxia"),
      values = unname(clrs[c(4, 2)])
    ) +
    theme_plots() +
    ggplot2::theme(
      legend.key.width = ggplot2::unit(0.5, "lines"),
      legend.key.height = ggplot2::unit(0.5, "lines"),
      legend.position = c("bottom"),
      legend.box.margin = ggplot2::margin(t = -10)
    )
}
oldhamlab/Copeland.2021.hypoxia.flux documentation built on Feb. 5, 2022, 8:31 p.m.