#' getOrthologs
#'
#' This function uses the \pkg{biomaRt} package to get ortholog information for supplied human genes
#'
#' @param con A \code{SQLiteConnection} object
#' @param genes A vector of ENSEMBL gene ids
#' @param species The ENSEMBL species id eg mmusculus for mouse
#' @param alt_mart A Mart object created using biomaRt::useMart. This should specify \code{hsapiens_gene_ensembl}
#' as the dataset.
#' @return a data frame
#' @export
#' @examples
#' \dontrun{
#' ex_con <- setupSQLite()
#' ex_genes <- c('ENSG00000074800', 'ENSG00000127616')
#'
#' #get mouse orthologs
#' getOrthologs(ex_con, ex_genes, species='mmusculus')
#'
#' #get fly orthologs
#' getOrthologs(ex_con, ex_genes, species='dmelanogaster')
#'
#' #view the tables in the database
#' DBI::dbListTables(ex_con)
#'
#' #larger set of genes
#' all_genes <- getAllHumanGenes(ex_con)
#' set.seed(20120806)
#' ex_genes2 <- sample(all_genes$gene_id, 2000)
#' ortho_df <- getOrthologs(ex_con, ex_genes2, species='mmusculus')
#'
#' #Specify which Mart to use
#' my_mart <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="sep2015.archive.ensembl.org")
#' getOrthologs(ex_con, ex_genes, species='mmusculus', alt_mart=my_mart)
#' getOrthologs(ex_con, ex_genes, species='dmelanogaster', alt_mart=my_mart)
#' }
getOrthologs <- function(con, genes, species='mmusculus', alt_mart=NULL) {
message(sprintf("Get the %s orthologs for %s human genes from biomart", species, length(genes)))
#get the data from ensembl - use alt_mart if supplied
if(class(alt_mart) == 'Mart' && alt_mart@dataset == 'hsapiens_gene_ensembl') {
message("Using user provided mart details")
ensembl <- alt_mart
} else {
message("Using default mart details")
ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl", host="mar2015.archive.ensembl.org")
}
#display info on mart
message(sprintf("Biomart: %s\nHost: %s\nDataset: %s\n", ensembl@biomart, ensembl@host, ensembl@dataset))
#check that the species is valid
if (!(sprintf('%s_homolog_ensembl_gene', species) %in% biomaRt::listAttributes(ensembl)$name)) {
stop(sprintf('%s not a valid ENSEMBL species id'))
}
# list of gene-chunks, each of chunk 500 genes
gene_chunks <- split(genes, ceiling(seq_along(genes)/500))
message(paste("splitting data into ",length(gene_chunks),"chunks of up to 500 genes"))
#internal function to get the data from biomart
queryMart <- function(query_genes) {
message('fetching from mart ...')
biomaRt::getBM ( attributes = c('ensembl_gene_id',
sprintf('%s_homolog_ensembl_gene', species),
sprintf('%s_homolog_orthology_type', species),
sprintf('%s_homolog_perc_id', species),
sprintf('%s_homolog_perc_id_r1', species)),
filters = 'ensembl_gene_id',
values = query_genes,
mart = ensembl,
bmHeader=F)
}
#get the data from ensembl
martdata <- lapply(gene_chunks, queryMart) %>% dplyr::bind_rows() %>% as.data.frame()
message('Done, now writing to database')
DBI::dbWriteTable(con, sprintf("%s_martdata", species), martdata, overwrite=T)
message('Finished')
return(martdata)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.