#' gsea enrichment analysis for disease up/down geneset to molecules signature
#'
#' @param m2dObj raw m2d object
#' @param p_deg significant deg cutoff
#' @param n_degs disease gene sets size
#' @param p_both cutoff for both enrichment
#' @param p_single cutoff for single enrichment
#' @param demo gsea result for demo use to save time
#'
#' @return significant molecules that disease up/down geneset can enrich
#' @export
#'
#' @examples
#' library(M2D)
#' dir_disease <- system.file("extdata",
#' "demo_BRCA_disease.csv",
#' package = "M2D"
#' )
#' disease <- read.csv(dir_disease)
#' dir_molecule <- system.file("extdata",
#' "demo_molecules_321_12320.csv",
#' package = "M2D"
#' )
#' molecules <- data.table::fread(dir_molecule,
#' data.table = FALSE
#' ) %>%
#' tibble::column_to_rownames("V1")
#' m2dObj <- list(
#' disease = disease,
#' molecule = molecules
#' )
#'
#' enrich_double_list <- gsea_disease_set(
#' m2dObj = m2dObj,
#' demo = TRUE
#' )
#' enrich_double <- enrich_double_list$gsea_sig
#' dim(enrich_double)
#' unique(enrich_double$molecule)
gsea_disease_set <- function(m2dObj, # raw data
p_deg = 0.05, # significant deg cutoff
n_degs = 500, # disease gene sets size
p_both = 0.05,
p_single = 0.01,
demo = FALSE) {
# p_deg=0.05
# n_degs=500
# p_both = 0.05
# p_single = 0.01
## step1: define disease up/down gene set
sig_gene <- m2dObj$disease %>%
dplyr::filter(P.Value < p_deg) %>%
dplyr::arrange(desc(logFC))
up_gene <- dplyr::filter(sig_gene, logFC > 0) %>%
head(n_degs) %>%
na.omit()
up_df <- data.frame(Term = "UP", gene = up_gene$Symbol)
dw_gene <- dplyr::filter(sig_gene, logFC < 0) %>%
tail(n_degs) %>%
na.omit()
dw_df <- data.frame(Term = "DOWN", gene = dw_gene$Symbol)
sig_df <- rbind(up_df, dw_df)
## step2: GSEA enrichment(consuming time)
if (demo) {
data("demo_gsea_df_all")
gsea_df_all <- demo_gsea_df_all
} else {
gsea_df_all <- lapply(seq(ncol(m2dObj$molecule)), function(i) {
# i = 1
print(paste0(i, " / ", ncol(m2dObj$molecule)))
genelist <- m2dObj$molecule[, i]
names(genelist) <- rownames(m2dObj$molecule)
genelist <- sort(genelist, T)
genelist <- genelist[genelist != 0]
if (!any(names(genelist) %in% sig_df$gene)) {
return(NULL)
}
set.seed(1203)
gsea_res <- clusterProfiler::GSEA(genelist,
TERM2GENE = sig_df,
pvalueCutoff = 1
)
gsea_df <- gsea_res@result %>%
dplyr::mutate(., molecule = colnames(m2dObj$molecule)[i]) %>%
dplyr::rename(disease_deg_set = Description) %>%
dplyr::select(
molecule, disease_deg_set, pvalue,
NES, dplyr::everything()
)
return(gsea_df)
})
names(gsea_df_all) <- colnames(m2dObj$molecule)
}
gsea_df <- do.call(rbind, gsea_df_all) %>%
dplyr::mutate(sign = sign(ifelse(ID == "UP", 1, -1) * NES)) %>%
dplyr::select(molecule, disease_deg_set, sign, everything())
both <- gsea_df %>%
dplyr::filter(pvalue < p_both, sign < 0) %>%
dplyr::arrange(molecule)
both <- both$molecule[duplicated(both$molecule)]
single <- gsea_df %>%
dplyr::filter(p.adjust < p_single, sign < 0) %>%
.[, "molecule"] %>%
unique()
single <- gsea_df %>%
dplyr::filter(sign > 0, molecule %in% single, pvalue < 0.05) %>%
.[, "molecule"] %>%
setdiff(single, .)
gsea_sig <- gsea_df %>%
dplyr::filter(molecule %in% c(both, single), sign == -1) %>%
dplyr::mutate(Type = ifelse(molecule %in% both, "both", "single")) %>%
dplyr::select(Type, dplyr::everything()) %>%
dplyr::arrange(Type, p.adjust)
return(list(
gsea_sig = gsea_sig,
gsea_raw = gsea_df
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.