R/gs_shaker.R
In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

Documented in shake_davidResult shake_enrichResult shake_enrichrResult shake_fgseaResult shake_gprofilerResult shake_topGOtableResult

# for all the available enrichment results type, we need
  # - gs_id
  # - gs_description
  # - gs_pvalue
  # - gs_genes
 # optional:
  # - gs_de_count
  # - gs_bg_count
# - plus: cleverly the row name should be gs_id (consistent with clever indexing)
# - plus: for practical reasons, I could use the original columns - or store it in a slot?


#' Convert an enrichResult object
#'
#' Convert an enrichResult object for straightforward use in [GeneTonic()]
#' 
#' This function is able to handle the output of `clusterProfiler` and `reactomePA`,
#' as they both return an object of class `enrichResult` - and this in turn 
#' contains the information required to create correctly a `res_enrich` object.
#'
#' @param obj An `enrichResult` object, obtained via `clusterProfiler` (or also 
#' via `reactomePA`)
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#' # dds
#' library("macrophage")
#' library("DESeq2")
#' data(gse)
#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' res_de <- res_macrophage_IFNg_vs_naive
#' de_symbols_IFNg_vs_naive <- res_macrophage_IFNg_vs_naive[
#'   (!(is.na(res_macrophage_IFNg_vs_naive$padj))) &
#'   (res_macrophage_IFNg_vs_naive$padj <= 0.05), "SYMBOL"]
#' bg_ids <- rowData(dds_macrophage)$SYMBOL[rowSums(counts(dds_macrophage)) > 0]
#' \dontrun{
#' library("clusterProfiler")
#' library("org.Hs.eg.db")
#' ego_IFNg_vs_naive <- enrichGO(gene = de_symbols_IFNg_vs_naive,
#'                               universe      = bg_ids,
#'                               keyType       = "SYMBOL",
#'                               OrgDb         = org.Hs.eg.db,
#'                               ont           = "BP",
#'                               pAdjustMethod = "BH",
#'                               pvalueCutoff  = 0.01,
#'                               qvalueCutoff  = 0.05,
#'                               readable      = FALSE)
#'
#' res_enrich <- shake_enrichResult(ego_IFNg_vs_naive)
#' head(res_enrich)
#' }
shake_enrichResult <- function(obj) {
  if (!is(obj, "enrichResult"))
    stop("Provided object must be of class `enrichResult`")

  if (is.null(obj@result$geneID)) {
    stop("You are providing an object where the gene symbols are not specified, ",
         "this is required for running GeneTonic properly.")
  }

  message("Found ", nrow(obj@result), " gene sets in `enrichResult` object, of which ", nrow(as.data.frame(obj)), " are significant.")
  message("Converting for usage in GeneTonic...")

  fullresults <- obj@result

  mydf <- data.frame(
    gs_id = fullresults$ID,
    gs_description = fullresults$Description,
    gs_pvalue = fullresults$pvalue,
    gs_genes = gsub("/", ",", fullresults$geneID),
    gs_de_count = fullresults$Count,
    gs_bg_count = unlist(lapply(strsplit(fullresults$BgRatio, "/"), function(arg) arg[[1]])),
    gs_ontology = obj@ontology,
    GeneRatio = fullresults$GeneRatio,
    BgRatio = fullresults$BgRatio,
    p.adjust = fullresults$p.adjust,
    qvalue = fullresults$qvalue,
    stringsAsFactors = FALSE
  )

  rownames(mydf) <- mydf$gs_id

  return(mydf)
}




#' Convert a topGOtableResult object
#'
#' Convert a topGOtableResult object for straightforward use in [GeneTonic()]
#'

#'
#' @param obj A `topGOtableResult` object
#' @param p_value_column Character, specifying which column the p value for
#' enrichment has to be used. Example values are "p.value_elim" or "p.value_classic"
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#'
#' @family shakers
#'
#' @examples
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#'
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#'
shake_topGOtableResult <- function(obj,
                                   p_value_column = "p.value_elim") {

  if(!all(c("GO.ID", "Term", "Annotated", "Significant", "Expected", "p.value_classic") %in%
          colnames(obj))) {
    stop("The provided object must be of in the format specified by the `pcaExplorer::topGOtable` function")
  }

  if(!p_value_column %in% colnames(obj)) {
    stop("You specified a column for the p-value which is not contained in the provided object. \n",
         "Please check the colnames of your object in advance.")
  }

  if(!"genes" %in% colnames(obj)) {
    stop("The column `genes` is not present in the provided object and is required for properly running GeneTonic.",
         "\nMaybe you did set `addGeneToTerms` to FALSE in the call to `pcaExplorer::topGOtable`?")
  }

  # Thought: store somewhere the ontology if possible - in an extra column?
  message("Found ", nrow(obj), " gene sets in `topGOtableResult` object.")
  message("Converting for usage in GeneTonic...")

  fullresults <- obj

  mydf <- data.frame(
    gs_id = fullresults$GO.ID,
    gs_description = fullresults$Term,
    gs_pvalue = fullresults[[p_value_column]],
    gs_genes = fullresults$genes,
    gs_de_count = fullresults$Significant,
    gs_bg_count = fullresults$Annotated,
    # gs_ontology = obj@ontology,
    Expected = fullresults$Expected,
    stringsAsFactors = FALSE
  )

  rownames(mydf) <- mydf$gs_id

  return(mydf)
}

## potential extensions, if the formats/classes get defined in a stable/compatible manner
# shake_goseqResult?
# shake_viseago?
# shake_GSECA?


#' Convert the output of DAVID
#'
#' Convert the output of DAVID for straightforward use in [GeneTonic()]
#' 
#' @param david_output_file The location of the text file output, as exported from
#' DAVID 
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#' 
#' @family shakers
#'
#' @examples
#' david_output_file <- system.file("extdata", 
#'                                  "david_output_chart_BPonly_ifng_vs_naive.txt", 
#'                                  package = "GeneTonic")
#' res_enrich <- shake_davidResult(david_output_file)
shake_davidResult <- function(david_output_file) {
  
  if(!file.exists(david_output_file))
    stop("File not found")
  
  my_david <- read.delim(david_output_file, header = TRUE, sep = "\t")
  # careful, names are auto-sanitized
  
  exp_colnames <- c("Category", "Term", "Count", "X.", "PValue", "Genes",
                    "List.Total", "Pop.Hits", "Pop.Total", "Fold.Enrichment",
                    "Bonferroni", "Benjamini", "FDR")
  if (!all(colnames(my_david) %in% exp_colnames))
    stop("I could not find some of the usual column names from the DAVID output exported to file")
  
  message("Found ", nrow(my_david), " gene sets in the file output from DAVID of which ", sum(my_david$PValue <= 0.05), " are significant (p-value <= 0.05).")
  message("Converting for usage in GeneTonic...")
  
  mydf <- data.frame(
    gs_id = unlist(lapply(strsplit(my_david$Term, "~"), function(arg) arg[[1]])),
    gs_description = unlist(lapply(strsplit(my_david$Term, "~"), function(arg) arg[[2]])),
    gs_pvalue = my_david$PValue,
    gs_genes = gsub(", ", ",", my_david$Genes),
    gs_de_count = my_david$Count,
    gs_bg_count = my_david$Pop.Hits,
    gs_ontology = my_david$Category,
    gs_generatio = my_david$Count/my_david$List.Total,
    gs_bgratio = my_david$Pop.Hits/my_david$Pop.Total,
    gs_foldenrich  = my_david$Fold.Enrichment,
    gs_bonferroni = my_david$Bonferroni,
    gs_benjamini = my_david$Benjamini,
    gs_FDR = my_david$FDR,
    stringsAsFactors = FALSE
  )
  
  rownames(mydf) <- mydf$gs_id
  
  return(mydf)
}



#' Convert the output of Enrichr
#'
#' Convert the output of Enrichr for straightforward use in [GeneTonic()]
#' 
#' @param enrichr_output_file The location of the text file output, as exported from
#' Enrichr 
#' @param enrichr_output A data.frame with the output of `enrichr`, related to a 
#' specific set of genesets. Usually it is one of the members of the list returned
#' by the initial call to `enrichr`.
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#' 
#' @family shakers
#'
#' @examples
#' # library("enrichR")
#' # dbs <- c("GO_Molecular_Function_2018", 
#' #          "GO_Cellular_Component_2018", 
#' #          "GO_Biological_Process_2018", 
#' #          "KEGG_2019_Human", 
#' #          "Reactome_2016", 
#' #          "WikiPathways_2019_Human")
#' # degenes <- (deseqresult2df(res_macrophage_IFNg_vs_naive, FDR = 0.01)$SYMBOL)
#' # if called directly withín R...
#' # enrichr_output_macrophage <- enrichr(degenes, dbs)
#' # or alternatively, if downloaded from the website in tabular format
#' enrichr_output_file <- system.file("extdata", 
#'                                    "enrichr_tblexport_IFNg_vs_naive.txt", 
#'                                    package = "GeneTonic")
#' res_from_enrichr <- shake_enrichrResult(enrichr_output_file = enrichr_output_file)
#' # res_from_enrichr2 <- shake_enrichrResult(
#' #   enrichr_output = enrichr_output_macrophage[["GO_Biological_Process_2018"]])
shake_enrichrResult <- function(enrichr_output_file,
                                enrichr_output = NULL) {
  if (is.null(enrichr_output)) {
    if(!file.exists(enrichr_output_file))
      stop("File not found")
    enrichr_output <- read.delim(enrichr_output_file, header = TRUE, sep = "\t")
  }
  
  if (!is.null(enrichr_output)) {
    # if still a list, might need to select the appropriate element
    if (is(enrichr_output, "list"))
      stop("Expecting a data.frame object. Maybe you are providing the list", 
           " containing it? You could do so by selecting the appropriate element",
           " of the list")
  }
  
  exp_colnames <- c("Term", "Overlap", "P.value", "Adjusted.P.value",
                    "Old.P.value", "Old.Adjusted.P.value", "Odds.Ratio",
                    "Combined.Score", "Genes")
  if (!all(colnames(enrichr_output) %in% exp_colnames))
    stop("I could not find some of the usual column names from the Enrichr output")
  
  message("Found ", nrow(enrichr_output), " gene sets in the file output from Enrichr of which ", sum(enrichr_output$P.value <= 0.05), " are significant (p-value <= 0.05).")
  message("Converting for usage in GeneTonic...")
  
  # TODO: split up id and term - or just keep em the same?!
  # this does work for go term as they encode it...
  mydf <- data.frame(
    gs_id = gsub("\\)", "", gsub("^.* \\(", "", enrichr_output$Term)),
    gs_description = gsub(" \\(GO.*$", "", enrichr_output$Term),
    gs_pvalue = enrichr_output$P.value,
    gs_genes = gsub(";", ",", enrichr_output$Genes),
    gs_de_count = as.numeric(
      unlist(lapply(strsplit(enrichr_output$Overlap, "/"), function(arg) arg[[1]]))),
    gs_bg_count = as.numeric(
      unlist(lapply(strsplit(enrichr_output$Overlap, "/"), function(arg) arg[[2]]))),
    gs_adj_pvalue = enrichr_output$Adjusted.P.value,
    stringsAsFactors = FALSE
  )
  
  rownames(mydf) <- mydf$gs_id
  
  return(mydf)
}


#' Convert the output of g:Profiler
#'
#' Convert the output of g:Profiler for straightforward use in [GeneTonic()]
#' 
#' @param gprofiler_output_file The location of the text file output, as exported from
#' g:Profiler 
#' @param gprofiler_output A data.frame with the output of `gost()` in `gprofiler2`. 
#' Usually it is one of the members of the list returned by the initial call to `gost`.
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#' 
#' @family shakers
#'
#' @examples
#' # degenes <- (deseqresult2df(res_macrophage_IFNg_vs_naive, FDR = 0.01)$SYMBOL)
#' # if called directly withín R...
#' # enrichr_output_macrophage <- enrichr(degenes, dbs)
#' # or alternatively, if downloaded from the website in tabular format
#' gprofiler_output_file <- system.file(
#'   "extdata", 
#'   "gProfiler_hsapiens_5-25-2020_tblexport_IFNg_vs_naive.csv", 
#'   package = "GeneTonic")
#' res_from_gprofiler <- shake_gprofilerResult(gprofiler_output_file = gprofiler_output_file)
#' 
#' data(gostres_macrophage, package = "GeneTonic")
#' res_from_gprofiler_2 <- shake_gprofilerResult(
#'   gprofiler_output = gostres_macrophage$result
#' )
shake_gprofilerResult <- function(gprofiler_output_file,
                                  gprofiler_output = NULL) {
  if (is.null(gprofiler_output)) {
    # expecting text input
    if(!file.exists(gprofiler_output_file))
      stop("File not found")
    gprofiler_output <- read.delim(gprofiler_output_file, header = TRUE, sep = ",")
    
    exp_colnames_textual <- c("source", "term_name", "term_id", "adjusted_p_value", 
                              "negative_log10_of_adjusted_p_value", "term_size", 
                              "query_size", "intersection_size", "effective_domain_size",
                              "intersections")
    
    if (!all(colnames(gprofiler_output) %in% exp_colnames_textual))
      stop("I could not find some of the usual column names from the g:Profiler output.", 
           " A possible reason could be that you did not specify `evcodes = TRUE`?",
           " This is required to fill in all the required fields of `res_enrich`")
    
    message("Found ", nrow(gprofiler_output), " gene sets in the file output from Enrichr of which ", sum(gprofiler_output$adjusted_p_value <= 0.05), " are significant (p-value <= 0.05).")
    message("Converting for usage in GeneTonic...")
    
    mydf <- data.frame(
      gs_id = gprofiler_output$term_id,
      gs_description = gprofiler_output$term_name,
      gs_pvalue = gprofiler_output$adjusted_p_value,
      gs_genes = gprofiler_output$intersections,
      gs_de_count = gprofiler_output$intersection_size,
      gs_bg_count = gprofiler_output$term_size,
      gs_adj_pvalue = gprofiler_output$adjusted_p_value,
      stringsAsFactors = FALSE
    )
    
  } else {
    # using directly the output from the call from gprofiler2
    # if still a list, might need to select the appropriate element
    if (is(gprofiler_output, "list"))
      stop("Expecting a data.frame object. Maybe you are providing the list", 
           " containing it? You could do so by selecting the appropriate element",
           " of the list")
    
    exp_colnames_rcall <- c("query", "significant", "p_value", "term_size", "query_size",
                            "intersection_size", "precision", "recall",
                            "term_id", "source", "term_name", "effective_domain_size",
                            "source_order", "parents", "evidence_codes", "intersection")
    
    if (!all(colnames(gprofiler_output) %in% exp_colnames_rcall))
      stop("I could not find some of the usual column names from the g:Profiler output.", 
           " A possible reason could be that you did not specify `evcodes = TRUE`?",
           " This is required to fill in all the required fields of `res_enrich`")
    
    message("Found ", nrow(gprofiler_output), " gene sets in the file output from Enrichr of which ", sum(gprofiler_output$p_value <= 0.05), " are significant (p-value <= 0.05).")
    message("Converting for usage in GeneTonic...")
    
    mydf <- data.frame(
      gs_id = gprofiler_output$term_id,
      gs_description = gprofiler_output$term_name,
      gs_pvalue = gprofiler_output$p_value,
      gs_genes = gprofiler_output$intersection,
      gs_de_count = gprofiler_output$intersection_size,
      gs_bg_count = gprofiler_output$term_size,
      gs_adj_pvalue = gprofiler_output$p_value,
      gs_ontology = gprofiler_output$source,
      stringsAsFactors = FALSE
    )
  }
  
  rownames(mydf) <- mydf$gs_id
  
  return(mydf)
}



#' Convert the output of fgsea
#'
#' Convert the output of fgsea for straightforward use in [GeneTonic()]
#' 
#' @param fgsea_output A data.frame with the output of `fgsea()` in `fgsea`. 
#'
#' @return A `data.frame` compatible for use in [GeneTonic()] as `res_enrich`
#' @export
#' 
#' @family shakers
#'
#' @examples
#' data(fgseaRes, package = "GeneTonic")
#' res_from_fgsea <- shake_fgseaResult(fgseaRes)
shake_fgseaResult <- function(fgsea_output) {
  if (!is(fgsea_output, "data.frame")) {
    stop("fgsea output should be a data.frame!")
  }
  exp_colnames <- c("pathway", "pval", "padj", "ES", "NES",
                    "nMoreExtreme", "size", "leadingEdge")
  if (!all(colnames(fgsea_output) %in% exp_colnames))
    stop("I could not find some of the usual column names from the fgsea output.", 
         " Maybe you performed additional processing/filtering steps?")
    
  if (!is(fgsea_output$leadingEdge, "list")) {
    stop("Expecting 'leadingEdge' column to be a list")
  }
    
  message("Found ", nrow(fgsea_output), " gene sets in the file output from Enrichr of which ", sum(fgsea_output$padj <= 0.05), " are significant (p-value <= 0.05).")
  message("Converting for usage in GeneTonic...")
  
  message(
    "Using the content of the 'leadingEdge' column to generate the 'gs_genes' for GeneTonic...",
    " If you have that information available directly, please adjust the content accordingly.",
    "\n\nUsing the set of the leadingEdge size to compute the 'gs_de_count'"
  )

  message("\n\nfgsea is commonly returning no identifier for the gene sets used.",
          " Please consider editing the 'gs_id' field manually according to the gene set you",
          " provided")
  
  mydf <- data.frame(
    gs_id = fgsea_output$pathway,
    gs_description = fgsea_output$pathway,
    gs_pvalue = fgsea_output$pval,
    gs_genes = vapply(fgsea_output$leadingEdge, 
                      function (arg) paste(arg, collapse = ","), character(1)),
    gs_de_count = lengths(fgsea_output$leadingEdge),
    gs_bg_count = fgsea_output$size,
    gs_NES = fgsea_output$NES,
    gs_adj_pvalue = fgsea_output$padj,
    stringsAsFactors = FALSE
  )
  
  rownames(mydf) <- mydf$gs_id
  
  # consider re-sorting by p-value?
  
  
  return(mydf)
}

Any scripts or data that you put into this service are public.

GeneTonic documentation built on Nov. 8, 2020, 5:27 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GeneTonic
Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

R/gs_shaker.R
In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

Defines functions shake_fgseaResult shake_gprofilerResult shake_enrichrResult shake_davidResult shake_topGOtableResult shake_enrichResult

Documented in shake_davidResult shake_enrichResult shake_enrichrResult shake_fgseaResult shake_gprofilerResult shake_topGOtableResult

Try the GeneTonic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GeneTonic Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

R/gs_shaker.R In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

Defines functions shake_fgseaResult shake_gprofilerResult shake_enrichrResult shake_davidResult shake_topGOtableResult shake_enrichResult

Documented in shake_davidResult shake_enrichResult shake_enrichrResult shake_fgseaResult shake_gprofilerResult shake_topGOtableResult

Try the GeneTonic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GeneTonic
Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis

R/gs_shaker.R
In GeneTonic: Enjoy Analyzing And Integrating The Results From Differential Expression Analysis And Functional Enrichment Analysis