#' Get gene symbols from ensemble gene names
#' @param geneNames a character vector
#' @param dataset default human: hsapiens_gene_ensembl,
#' for zebrafish drerio_gene_ensembl, for yeast scerevisiae_gene_ensembl
#' @param biomart default "ensembl"
#' @importFrom biomaRt getBM
#' @importFrom biomaRt useEnsembl
#' @return a data.table of geneNames and symbols (2 columns)
getAllORFGeneSymbols <- function(geneNames, dataset, biomart = "ensembl"){
ensembl <- useEnsembl(biomart = "ensembl", dataset = dataset)
uniqueGenes <- unique(geneNames)
geneHits <- getBM(attributes = c('ensembl_gene_id', 'hgnc_symbol'),
filters = 'ensembl_gene_id', values = uniqueGenes, mart = ensembl)
group2 <- data.table::chmatch(geneNames, geneHits$ensembl_gene_id)
return(data.table(geneNames = geneNames, symbol = geneHits$hgnc_symbol[group2]))
}
#' Get Go terms
#' @param geneNames ensembl gene names
#' @param organism scientific name
#' @importFrom biomartr getGO
#' @return a data.table of geneNames and go terms(2 columns)
getORFsGoTerms <- function(geneNames, organism){
old <- geneNames
geneNames <- unique(geneNames)
Go <- biomartr::getGO(organism = organism,
genes = geneNames,
filters = "ensembl_gene_id")
desc <- Go$goslim_goa_description
return(desc[data.table::chmatch(as.character(old), as.character(geneNames))])
}
#' Guess biomart from organism name
#' @inheritParams checkAndInitPipe
#' @importFrom biomaRt listDatasets
#' @importFrom biomaRt useEnsembl
#' @return a character with dataset used
getBiomartFromOrganism <- function(organism, biomart="ensembl") {
if (is.null(biomart)) {
return(invisible(NULL))
}
ensembl = useEnsembl(biomart = biomart)
a <- listDatasets(ensembl)
guess <- a[grep(pattern = p(unlist(strsplit(organism, " ")), collapse = "|"),
x = a$dataset, value = FALSE, ignore.case = TRUE),][, 1:2]
if (nrow(guess) == 0) {
message(p("Did not find biomart candidate for organism", organism))
message("Set the 'dataset' argument in checkAndInitPipe(dataset = ) from this list:")
print(a)
stop()
}
if (nrow(guess) > 1) {
message(p("Found multiple biomart candidates for organism", organism))
message("Possibilities were")
print(guess)
message("Set the 'dataset' argument in checkAndInitPipe(dataset = ) to correct choice this list:")
print(a)
stop()
}
message(p("Using biomart dataset: ", guess$dataset))
return(guess$dataset)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.