R/find_disease_pw.R

Defines functions find_disease_pw

Documented in find_disease_pw

#' find enriched  GO_BP pathways related disease
#'
#' @param m2dObj raw m2dObj
#' @param n_degs  how many disease degs to enrich
#' @param p_deg signifcant disease degs cutoff
#' @param pathway one of c("GOBP","user_gobp")
#' @param n_terms how many disease related pathway
#' @param p_term significant enriched degs
#' @param uncor_GOBP wheather independent pathway
#' @param uncor_cut  correlation cutoff of independent pathway
#' @param user_gobp GO BP ids
#'
#' @return a list of disease-pathway-info.
#' @export
#'
#' @examples
#' dir_disease <- system.file("extdata",
#'   "demo_BRCA_disease.csv",
#'   package = "M2D"
#' )
#' disease <- read.csv(dir_disease)
#' dir_molecule <- system.file("extdata",
#'   "demo_molecules_321_12320.csv",
#'   package = "M2D"
#' )
#' molecules <- data.table::fread(dir_molecule,
#'   data.table = FALSE
#' ) %>%
#'   tibble::column_to_rownames("V1")
#' m2dObj <- list(
#'   disease = disease,
#'   molecule = molecules
#' )
#' disease_pw_list <- find_disease_pw(m2dObj)
#' disease_pw <- disease_pw_list$term_df
#' table(disease_pw$Term)
#' head(disease_pw)
#' enrich_raw <- disease_pw_list$enrich_raw@result
#' enrich_raw[1:4, 1:4]
find_disease_pw <- function(m2dObj,
                            n_degs = 500,
                            p_deg = 0.05,
                            pathway = "GOBP",
                            n_terms = 10,
                            p_term = 0.05,
                            uncor_GOBP = TRUE,
                            uncor_cut=0.6,
                            user_gobp = NULL) {
  # n_degs=500
  # p_deg=0.05
  # n_terms=10
  # p_term=0.05
  # uncor_GOBP=TRUE
  # uncor_cut=0.6
  # pathway="GOBP"
  # user_gobp = sample(enrich_res@result$ID,10)
  d_deg_sig <- m2dObj$disease %>%
    dplyr::filter(P.Value < p_deg, ) %>%
    dplyr::arrange(desc(abs(logFC))) %>%
    head(n_degs)
  mode <- switch(pathway,
    GOBP = 1,
    user_gobp = 2
  )
  if (mode == 1) {
    enrich_res <- clusterProfiler::enrichGO(
      gene = d_deg_sig$Symbol,
      OrgDb = org.Hs.eg.db::org.Hs.eg.db,
      keyType = "SYMBOL",
      ont = "BP",
      pAdjustMethod = "BH",
      pvalueCutoff = 1,
      qvalueCutoff = 1,
      minGSSize = 100
    )
    enrich_df <- enrich_res@result
    enrich_df <- enrich_res@result %>%
      dplyr::filter(
        ID %in% hsGO_BP@geneAnno$GO,
        pvalue < p_term
      ) %>%
      head(100)
    if (uncor_GOBP) {
      pw_retained <- GOBP_uncor(enrich_df$ID, retain_number = n_terms)
    } else {
      pw_retained <- enrich_df$ID[1:n_terms]
    }
  } else if (mode == 2) {
    pw_retained <- user_gobp
    enrich_res <- disease_pw_list$enrich_raw
  }
  term_genes <- lapply(pw_retained, function(x) {
    # x=ego_sig2$ID[1]
    unique(unlist(egSymbols[goegs[[x]]]))
  })
  names(term_genes) <- pw_retained
  term_df <- term_genes %>%
    reshape2::melt() %>%
    dplyr::rename(Term = L1, Gene = value) %>%
    dplyr::select(Term, Gene)
  term_tmp <- enrich_res@result[pw_retained, c("ID", "Description", "pvalue")] %>%
    dplyr::mutate(
      log10 = -log10(pvalue),
      # weight = (log10-min(log10)+1)/(max(log10)-min(log10)))
      weight = exp((log10 - min(log10)) / (max(log10) - min(log10)))
    )
  term_tmp$size <- term_df %>%
    dplyr::group_by(Term) %>%
    dplyr::summarise(n = dplyr::n()) %>%
    as.data.frame() %>%
    tibble::column_to_rownames("Term") %>%
    .[term_tmp$ID, ]
  term_df <- dplyr::left_join(term_df, term_tmp[, c("ID", "Description", "weight", "size")],
    by = c("Term" = "ID")
  )
  return(list(
    term_df = term_df,
    enrich_raw = enrich_res
  ))
}
lishensuo/M2D documentation built on Jan. 4, 2022, 9:44 a.m.