R/gsea_disease_set.R

Defines functions gsea_disease_set

Documented in gsea_disease_set

#' gsea enrichment analysis for disease up/down geneset to molecules signature
#'
#' @param m2dObj raw m2d object
#' @param p_deg significant deg cutoff
#' @param n_degs disease gene sets size
#' @param p_both cutoff for both enrichment
#' @param p_single cutoff for single enrichment
#' @param demo gsea result for demo use to save time
#'
#' @return significant molecules that disease up/down geneset can enrich
#' @export
#'
#' @examples
#' library(M2D)
#' dir_disease <- system.file("extdata",
#'   "demo_BRCA_disease.csv",
#'   package = "M2D"
#' )
#' disease <- read.csv(dir_disease)
#' dir_molecule <- system.file("extdata",
#'   "demo_molecules_321_12320.csv",
#'   package = "M2D"
#' )
#' molecules <- data.table::fread(dir_molecule,
#'   data.table = FALSE
#' ) %>%
#'   tibble::column_to_rownames("V1")
#' m2dObj <- list(
#'   disease = disease,
#'   molecule = molecules
#' )
#'
#' enrich_double_list <- gsea_disease_set(
#'   m2dObj = m2dObj,
#'   demo = TRUE
#' )
#' enrich_double <- enrich_double_list$gsea_sig
#' dim(enrich_double)
#' unique(enrich_double$molecule)
gsea_disease_set <- function(m2dObj, # raw data
                             p_deg = 0.05, # significant deg cutoff
                             n_degs = 500, # disease gene sets size
                             p_both = 0.05,
                             p_single = 0.01,
                             demo = FALSE) {
  # p_deg=0.05
  # n_degs=500
  # p_both   = 0.05
  # p_single = 0.01
  ## step1: define disease up/down gene set
  sig_gene <- m2dObj$disease %>%
    dplyr::filter(P.Value < p_deg) %>%
    dplyr::arrange(desc(logFC))
  up_gene <- dplyr::filter(sig_gene, logFC > 0) %>%
    head(n_degs) %>%
    na.omit()
  up_df <- data.frame(Term = "UP", gene = up_gene$Symbol)
  dw_gene <- dplyr::filter(sig_gene, logFC < 0) %>%
    tail(n_degs) %>%
    na.omit()
  dw_df <- data.frame(Term = "DOWN", gene = dw_gene$Symbol)
  sig_df <- rbind(up_df, dw_df)

  ## step2: GSEA enrichment(consuming time)
  if (demo) {
    data("demo_gsea_df_all")
    gsea_df_all <- demo_gsea_df_all
  } else {
    gsea_df_all <- lapply(seq(ncol(m2dObj$molecule)), function(i) {
      # i = 1
      print(paste0(i, " / ", ncol(m2dObj$molecule)))
      genelist <- m2dObj$molecule[, i]
      names(genelist) <- rownames(m2dObj$molecule)
      genelist <- sort(genelist, T)
      genelist <- genelist[genelist != 0]
      if (!any(names(genelist) %in% sig_df$gene)) {
        return(NULL)
      }
      set.seed(1203)
      gsea_res <- clusterProfiler::GSEA(genelist,
        TERM2GENE = sig_df,
        pvalueCutoff = 1
      )
      gsea_df <- gsea_res@result %>%
        dplyr::mutate(., molecule = colnames(m2dObj$molecule)[i]) %>%
        dplyr::rename(disease_deg_set = Description) %>%
        dplyr::select(
          molecule, disease_deg_set, pvalue,
          NES, dplyr::everything()
        )
      return(gsea_df)
    })
    names(gsea_df_all) <- colnames(m2dObj$molecule)
  }
  gsea_df <- do.call(rbind, gsea_df_all) %>%
    dplyr::mutate(sign = sign(ifelse(ID == "UP", 1, -1) * NES)) %>%
    dplyr::select(molecule, disease_deg_set, sign, everything())

  both <- gsea_df %>%
    dplyr::filter(pvalue < p_both, sign < 0) %>%
    dplyr::arrange(molecule)
  both <- both$molecule[duplicated(both$molecule)]

  single <- gsea_df %>%
    dplyr::filter(p.adjust < p_single, sign < 0) %>%
    .[, "molecule"] %>%
    unique()
  single <- gsea_df %>%
    dplyr::filter(sign > 0, molecule %in% single, pvalue < 0.05) %>%
    .[, "molecule"] %>%
    setdiff(single, .)

  gsea_sig <- gsea_df %>%
    dplyr::filter(molecule %in% c(both, single), sign == -1) %>%
    dplyr::mutate(Type = ifelse(molecule %in% both, "both", "single")) %>%
    dplyr::select(Type, dplyr::everything()) %>%
    dplyr::arrange(Type, p.adjust)

  return(list(
    gsea_sig = gsea_sig,
    gsea_raw = gsea_df
  ))
}
lishensuo/M2D documentation built on Jan. 4, 2022, 9:44 a.m.