#' getIDs
#'
#' Mapping gene identifiers.
#' An easy-to-use and robust wrapper around AnnotationDbi's mapIds function.
#'
#' @param identifiers - input gene identifiers
#'
#' @param from - input identifier type, one of (case insensitive):
#' `c("ACCNUM", "ALIAS", "ENSEMBL", "ENSEMBLPROT",
#' "ENSEMBLTRANS", "ENTREZID", "ENZYME", "EVIDENCE", "EVIDENCEALL", "GENENAME",
#' "GO", "GOALL", "IPI", "MGI", "ONTOLOGY", "ONTOLOGYALL", "PATH",
#' "PFAM", "PMID", "PROSITE", "REFSEQ", "SYMBOL", "UNIGENE", "UNIPROT")`
#'
#' @param to - output identifier type, see `from`.
#'
#' @param species - organism identifier for input genes.
#'
#' @return output gene identifiers
#'
#' @author Tyler W Bradshaw, \email{twesleyb10@gmail.com}
#'
#' @references none
#'
#' @keywords none
#'
#' @importFrom AnnotationDbi mapIds
#'
#' @export getIDs
#'
#' @examples
#' getIDs(mygenes, from = "symbol", to = "entrez", species = "mouse")
getIDs <- function(identifiers, from, to, species = NULL, taxid = NULL,
quiet = TRUE, multiVals = "first", ...) {
# Wrapper around AnnotationDbi::mapIds()
# Check input identifiers.
if (sum(is.na(identifiers))) {
message("Warning: missing values (NA) detected in input identifiers.")
}
# load annotation database info
annotationDBs <- mappingDBs()
# Get organism specific mapping database
if (!is.null(taxid)) {
orgDB <- unlist(annotationDBs[sapply(annotationDBs, "[", 1) == taxid])
} else if (!is.null(species)) {
orgDB <- unlist(annotationDBs[sapply(annotationDBs, "[", 3) == tolower(species)])
} else {
stop("Please provide a species or taxid for gene identifiers.")
}
names(orgDB) <- sapply(strsplit(names(orgDB), "\\."), "[", 2)
# load mapping database
suppressPackageStartupMessages({
eval(parse(text = paste0("require(", orgDB[["database"]], ",quietly=TRUE)")))
})
osDB <- eval(parse(text = orgDB[["database"]]))
# Get input type (from) and output type (to)
colIDto <- grep(toupper(to), columns(osDB))
colIDfrom <- grep(toupper(from), columns(osDB))
# Check that from and to map to a single column
keys <- keytypes(osDB)
if (length(colIDto) > 1) {
msg <- c(
"Input argument 'to' matches multiple keys: ",
paste(keys[colIDto], collapse = ", "),"\n",
paste("Using: ", columns(osDB)[colIDto[1]])
)
warning(msg)
colIDto <- colIDto[1]
}
if (length(colIDfrom) > 1) {
msg <- c(
"Input argument 'from' matches multiple keys: ",
paste(keys[colIDfrom], collapse = ", "),"\n",
paste("Using: ", columns(osDB)[colIDfrom[1]])
)
warning(msg)
colIDfrom <- colIDfrom[1]
}
# Check MGI format if input is MGI
if (columns(osDB)[colIDfrom] == "MGI") {
if (!any(grepl("MGI:", identifiers))) {
stop("Please provide MGI identifiers as MGI:ID")
}
identifiers <- paste0(
"MGI:MGI:",
sapply(strsplit(identifiers, "MGI:"), tail, 1)
)
}
# Map gene identifiers
suppressMessages({
output <- AnnotationDbi::mapIds(osDB,
keys = as.character(identifiers),
column = columns(osDB)[colIDto],
keytype = columns(osDB)[colIDfrom],
multiVals = multiVals
)
})
# Check if output is a list
if (is.list(output)) {
# Replace NULL
is_null <- sapply(output, is.null)
output[is_null] <- NA
output <- unlist(output)
}
# Check that all nodes (entrez) are mapped to gene symbols
not_mapped <- is.na(output)
if (!quiet & sum(is.na(output)) != 0) {
message(paste0(
"Warning: Unable to map ", sum(not_mapped), " ", species, " ",
from, "(s)", " to ", to, " identifiers!"
))
}
names(output) <- identifiers
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.