R/ortholog_convert.R

# load("data/GRCh38_tx2gene.rda")
# load("data/GRCm38_tx2gene.rda")
# attrs <- c("ensembl_gene_id", "external_gene_name",  "entrezgene_id")
# mart.hs <- useMart("ENSEMBL_MART_ENSEMBL", "hsapiens_gene_ensembl",
#                    host="asia.ensembl.org", ssl.verifypeer=F)
# mart.mm <- useMart("ENSEMBL_MART_ENSEMBL", "mmusculus_gene_ensembl",
#                    host="asia.ensembl.org", ssl.verifypeer=F)
# hs_mm <- biomaRt::getLDS(attributes=attrs, filters="ensembl_gene_id",
#                          values=unique(GRCh38_tx2gene$GENEID), mart=mart.hs,
#                          attributesL=attrs, martL=mart.mm)
# colnames(hs_mm) <- c("ensembl_gene_id.hs", "external_gene_name.hs", "entrezgene_id.hs",
#                      "ensembl_gene_id.mm", "external_gene_name.mm", "entrezgene_id.mm")
# mm_hs <- biomaRt::getLDS(attributes=attrs, filters="ensembl_gene_id",
#                          values=unique(GRCm38_tx2gene$GENEID), mart=mart.mm,
#                          attributesL=attrs, martL=mart.hs)
# colnames(mm_hs) <- c( "ensembl_gene_id.mm", "external_gene_name.mm", "entrezgene_id.mm",
#                       "ensembl_gene_id.hs", "external_gene_name.hs", "entrezgene_id.hs")
# mm_hs <- dplyr::select(mm_hs, 4:6, 1:3)
# ortholog <- dplyr::bind_rows(hs_mm, mm_hs)
# ortholog <- dplyr::distinct(ortholog, ensembl_gene_id.hs, ensembl_gene_id.mm,
#                             external_gene_name.hs, external_gene_name.mm,
#                             entrezgene_id.hs, entrezgene_id.mm, .keep_all=T) %>%
#   as_tibble()
#
# usethis::use_data(ortholog, internal=F, overwrite=T)
#
# writexl::write_xlsx(ortholog, path="~/Desktop/ortholog.ensembl.95.xlsx")


#' Title convert human mouse gene each other easily
#'
#' @param genes a vector contaning genes you want convert from
#' @param from genes species, it must be hs or mm
#' @param fromType genes type, it must be one of ensembl, symbol, entrezid
#' @param toType gene type to be retrieved, if NULL, it's same as fromType, if defined, it must be one or more of ensembl_gene_id, external_gene_name, entrezgene
#' @param dataSource if NULL, it will internal ortholog to finish conversion
#'
#' @return a dataframe containing genes fromType and retrieved toType
#' @export
#'
#' @examples
#' genes <- c("Gucy1a1", "Pten", "Zyx", "Zscan5b", "Dada")
#' ortholog_symbol_convert(genes, "mm", "symbol", toType=c("ensembl_gene_id", "symbol", "entrezgene"))


ortholog_symbol_convert <- function(genes, from, fromType, toType=NULL, dataSource=NULL) {

  if(!(from %in% c("hs", "mm"))) {
    stop("Incorrect fromType, it should be hs or mm")
  }

  if(!(fromType %in% c("ensembl_gene_id", "ensembl_gene_name", "entrezgene_id"))) {
    stop("Incorrect fromType, it should be one of ensembl_gene_id, ensembl_gene_name, entrezgene_id")
  }

  if(is.null(toType)) {
    toType <- fromType
  } else {
    if(!all(toType %in% c("ensembl_gene_id", "ensembl_gene_name", "entrezgene_id"))) {
      stop("Incorrect toType, it should be one or more of ensembl_gene_id, ensembl_gene_name, entrezgene_id")
    }
  }

  # genes query from column
  from_column=paste(fromType, from, sep=".")

  # genes query to column
  to <- ifelse(from=="hs", "mm", "hs")
  to_column=paste(toType, to, sep=".")

  if(is.null(dataSource)) {
    data <- AnnotationHub::ortholog
  } else {
    data <- as_tibble(dataSource)
  }

  # query
  genes <- as.character(genes)

  index.found <- which(data[[from_column]] %in% genes)
  result <- data[index.found, c(from_column, to_column)]

  # not found genes
  notfound <- setdiff(genes, result[[from_column]])

 if(length(notfound)>0) {
   warning("Part of genes can't be found in ortholog, it may be the in-corindice between species or incorrect spelling")
 }

  result <- as_tibble(result)

  return(result)
}
soulong/AnnotationHub documentation built on July 6, 2019, 3:17 a.m.