R/ora.R

Defines functions ora

Documented in ora

#' Classical Over Representation Analysis with FOBI
#'
#' @description This function performs a traditional over representation analysis based on hypergeometric test: classes are treated as sets of individual metabolites and all metabolites are treated as equally informative. This function uses Food-Biomarker Ontology knowledge as biological information.
#'
#' @param metaboliteList A vector of FOBI metabolite identifiers that define the selected list of metabolites.
#' @param metaboliteUniverse A vector of FOBI metabolite identifiers that define the universe of possible metabolites (all metabolites analyzed in the study).
#' @param subOntology A character string specifying one of the two FOBI sub-ontologies: "food", or "biomarker".
#' @param pvalCutoff A numeric value indicating a p-value cutoff for raw p-values generated by hypergeometric test.
#' @param fobi FOBI table obtained with `parse_fobi()`. If this value is set to NULL, the last version of FOBI will be downloaded from GitHub.
#'
#' @export
#'
#' @return A tibble with ORA results.
#' @references G. Korotkevich, V. Sukhov, A. Sergushichev. Fast gene set enrichment analysis. bioRxiv (2019), doi:10.1101/060012
#' @references Pol Castellano-Escuder, Raúl González-Domínguez, David S Wishart, Cristina Andrés-Lacueva, Alex Sánchez-Pla, FOBI: an ontology to represent food intake data and associate it with metabolomic data, Database, Volume 2020, 2020, baaa033, https://doi.org/10.1093/databa/baaa033.
#' @author Pol Castellano-Escuder
#'
#' @examples
#' 
#' metaboliteUniverse <- c(fobitools::idmap$FOBI[1:200], fobitools::idmap$FOBI[400:450])
#' metaboliteList <- c(fobitools::idmap$FOBI[1:50], fobitools::idmap$FOBI[70:80])
#' 
#' # Food enrichment analysis
#' fobitools::ora(metaboliteList = metaboliteList, 
#'                metaboliteUniverse = metaboliteUniverse, 
#'                pvalCutoff = 1)
#' 
#' # Chemical class enrichment analysis
#' fobitools::ora(metaboliteList = metaboliteList, 
#'                metaboliteUniverse = metaboliteUniverse, 
#'                subOntology = "biomarker", 
#'                pvalCutoff = 1)
#' 
#' @importFrom magrittr %>%
#' @importFrom tidyr unnest
#' @importFrom dplyr select rename filter mutate_all arrange desc as_tibble
#' @importFrom fgsea fora
ora <- function(metaboliteList,
                metaboliteUniverse,
                subOntology = "food",
                pvalCutoff = 0.01,
                fobi = fobitools::fobi){

  if (is.null(metaboliteList)) {
    stop("metaboliteList must be a vector with selected metabolites")
  }
  if (!is.vector(metaboliteList)) {
    stop("metaboliteList must be a vector with selected metabolites")
  }
  if (is.null(metaboliteUniverse)) {
    stop("metaboliteUniverse must be a vector with the universe of possible metabolites")
  }
  if (!is.vector(metaboliteUniverse)) {
    stop("metaboliteUniverse must be a vector with the universe of possible metabolites")
  }
  if (!(subOntology %in% c("food", "biomarker"))) {
    stop("Incorrect value for subOntology argument. Options are 'food' and 'biomarker'")
  }
  if (pvalCutoff < 0) {
    stop("pvalCutoff cannot be less than zero")
  }

  ##

  if (is.null(fobi)){
    fobi <- parse_fobi()
  }
  
  ##
  
  n_metaboliteUniverse <- length(unique(metaboliteUniverse[!is.na(metaboliteUniverse)]))
  metaboliteUniverse <- unique(metaboliteUniverse[!is.na(metaboliteUniverse)])
    
  ##

  ptw_foods <- fobi %>%
    dplyr::filter(!BiomarkerOf == "NULL") %>%
    dplyr::select(id_BiomarkerOf, BiomarkerOf, name, FOBI) %>%
    tidyr::unnest(cols = c(id_BiomarkerOf, BiomarkerOf, name, FOBI)) %>%
    dplyr::mutate_all(as.factor)
  
  ptw_chemicals <- fobi %>%
    dplyr::filter(!BiomarkerOf == "NULL") %>%
    dplyr::select(is_a_code, is_a_name, name, FOBI) %>%
    tidyr::unnest(cols = c(is_a_code, is_a_name, name, FOBI)) %>%
    dplyr::mutate_all(as.factor) %>%
    dplyr::filter(!duplicated(.))

  ##

  n_food_metabolites <- ptw_foods %>%
    dplyr::filter(FOBI %in% metaboliteUniverse) %>%
    dplyr::filter(duplicated(id_BiomarkerOf))
  
  n_biomarker_metabolites <- ptw_chemicals %>%
    dplyr::filter(FOBI %in% metaboliteUniverse) %>%
    dplyr::filter(duplicated(is_a_code)) 

  ##
  
  if (subOntology == "food") {
    if(nrow(n_food_metabolites) > 1){
      ptw <- unstack(ptw_foods[, c(4,2)])
    }
    else {
      stop("At least two FOBI food classes must be represented by more than one compound in the metaboliteUniverse!")
    }
  }
  
  else if (subOntology == "biomarker") {
    if(nrow(n_biomarker_metabolites) > 1){
      ptw <- unstack(ptw_chemicals[, c(4,2)])
    }
    else {
      stop("At least two FOBI chemical classes must be represented by more than one compound in the metaboliteUniverse!")
    }
  }
  
  ##

  metaboliteList <- unique(metaboliteList[!is.na(metaboliteList)])
  n_metaboliteList <- length(metaboliteList)
  
  ## ORA
  
  res_ora <- fgsea::fora(pathways = ptw, genes = metaboliteList, universe = metaboliteUniverse, minSize = 1, maxSize = Inf)
  
  result <- res_ora %>%
    dplyr::rename(className = pathway, classSize = size, overlapMetabolites = overlapGenes) %>%
    dplyr::select(className, classSize, overlap, pval, padj, overlapMetabolites) %>%
    dplyr::arrange(-dplyr::desc(pval)) %>%
    dplyr::filter(pval <= pvalCutoff) %>%
    dplyr::as_tibble()

  message(paste0(crayon::blue("metaboliteUniverse size: ", n_metaboliteUniverse), 
                 "\n",
                 crayon::blue("metaboliteList size: ", n_metaboliteList)))
  
  return(result)
  
}
pcastellanoescuder/FOBIEnrichR documentation built on Feb. 20, 2024, 11:14 p.m.