R/AnnotateMarkerGenes.R

Defines functions AnnotateMarkerGenes

Documented in AnnotateMarkerGenes

#' Annotate differentially expressed genes using \code{biomaRt}.
#'
#' @name AnnotateMarkerGenes
#' @author Jack Leary
#' @description This function uses the \code{biomaRt} package to fetch a user-defined list of attributes for a list of dataframes containing differentially expressed genes. Intended to be run directly after \code{\link{FindSubpopulationMarkers}}.
#' @importFrom biomaRt useMart getBM
#' @param marker.genes The dataframe of marker genes generated by \code{\link{FindSubpopulationMarkers}}. Must contain a column called \code{gene} containing either HGNC or MGI symbols (depending on species). Defaults to NULL.
#' @param species The species of the cells being analyzed. Defaults to "human", but also supports "mouse".
#' @param desired.annos The vector containing the annotations you'd like to retrieve for each gene. Defaults to NULL.
#' @return A data.frame of gene-level annotations.
#' @seealso \code{\link[biomaRt]{listAttributes}}
#' @seealso \code{\link[biomaRt]{listDatasets}}
#' @export
#' @examples
#' \dontrun{
#' AnnotateMarkerGenes(marker.genes = de_results,
#'                     species = "human",
#'                     desired.annos = c("ensembl_id", "gene_biotype"))
#' }

AnnotateMarkerGenes <- function(marker.genes = NULL,
                                species = "human",
                                desired.annos = NULL) {
  # check inputs
  if (is.null(marker.genes)) { stop("Please supply a list of dataframes containing marker genes.") }
  if (is.null(desired.attrs)) { stop("Please supply a vector of annotations you'd like to generate.") }
  # create marts
  if (species == "human") {
    mart <- biomaRt::useMart("ensembl", dataset = "hsapiens_gene_ensembl")
  } else if (species == "mouse") {
    mart <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
  }
  # retrieve annotations
  genes <- marker.genes$gene
  if (species == "human") {
    annos <- biomaRt::getBM(attributes = c("hgnc_symbol", desired.annos),
                            mart = mart,
                            filters = "hgnc_symbol",
                            values = genes)
  } else if (species == "mouse") {
    annos <- biomaRt::getBM(attributes = desired.annos,
                            mart = mart,
                            filters = "mgi_symbol",
                            values = genes)
  }
  # prepare result dataframe
  for (i in seq(unique(marker.genes$cluster))) {
    clust_df <- marker.genes[marker.genes$cluster == unique(marker.genes$cluster)[i], ]
    genes <- clust_df$gene
    annos <- biomaRt::getBM(attributes = c("hgnc_symbol", desired.annos),
                            mart = mart,
                            filters = "hgnc_symbol",
                            values = genes)
  }
  return(anno_genes)
}
jr-leary7/SCISSORS documentation built on April 20, 2023, 8:21 p.m.