knitr::opts_chunk$set(echo = TRUE)
options(stringsAsFactors = F)
rm(list = ls())

convegene

This package is designed to conveniently convert gene between human and mouse. Hope it may be valuable for researcher/bioinformatican on analyses of human/mouse data.

functions

some functions Here I get a stirng of genes from a literature, Cdh5 Ptprc Unc5b CD14 CD15 CD33 I want to check their expression in my dataset. first, I need to split the string into a vector of gene symbols.
second, I may check the official gene symbol for some of them.
third, I want to check their homologous genes in human.

I also want to get all the gene IDs commonly used for my selected genes.
Or even export a html for external searching in web browser.

So I write down some functions to realize this routinely need in my work.
str_to_gene(): split a string into a vector of genes alias_to_official(): map alias to official symbols or IDs gene_to_homo(): get homologous genes for the input genes all_to_html(): export the gene tables into a local searchable html

HomologyGene <- function(HOM_AllOrganism = "data/HOM_AllOrganism.rpt", )
library(magrittr)
library(tidyverse)
# read homoloGene table
orgs <- read.table(
  file = "HOM_AllOrganism.rpt",
  header = T, sep = "\t",
  check.names = F,
  quote = "", comment.char = "",
  stringsAsFactors = F)
hm <- orgs %>%
  filter(`Common Organism Name` %in% c("human", "mouse, laboratory")) %>%
  dplyr::select(`HomoloGene ID`, `Common Organism Name`, Symbol) %>%
  set_colnames(c("homoloID", "organism", "gene")) %>%
  mutate_at("organism", function(x) str_replace(x, "mouse, laboratory", "mouse")) %>%
  unique

hm_bid <- tapply(hm$gene, list(hm$homoloID, hm$organism), c)
hm_len <- as.data.frame(apply(hm_bid, c(1,2), function(x) length(unlist(x))))

hm_len %<>%
  rownames_to_column %>%
  mutate(
    symid = sapply(hm_bid[,"human"], function(x) ifelse(is.null(unlist(x)[1]), "", unlist(x)[1])),
    hm_prefix = case_when(
      human == 1 & mouse == 1 ~ "",
      human == 1 & mouse > 1  ~ "Mm",
      human > 1 & mouse == 1  ~ "Hs",
      human > 1 & mouse > 1   ~ "MmHs",
      TRUE                    ~ as.character(NA)),
    homoloSymbol = case_when(
      human == 1 & mouse == 1 ~ symid,
      human == 1 & mouse > 1  ~ as.character(str_glue("Mm({symid})")),
      human > 1 & mouse == 1  ~ as.character(str_glue("Hs({symid})")),
      human > 1 & mouse > 1   ~ as.character(str_glue("MmHs({symid})")),
      TRUE                    ~ as.character(NA)),
  ) %>%
  column_to_rownames

table(hm_len$hm_prefix)
# Hs    Mm  MmHs
# 16468    60   212    10

hm %<>%
  as_tibble %>%
  mutate(homoloSymbol = hm_len[as.character(homoloID), "homoloSymbol"]) %>%
  dplyr::select(homoloID, homoloSymbol, gene, organism)

save(hm, file = "human_mouse.HomoloGeneID.collapsed.Rdata")
WriteXLS::WriteXLS(hm, "human_mouse.HomoloGeneID.collapsed.xls")

alias_to_official()

library(org.Hs.eg.db)
#library(org.Mm.eg.db)
columns(org.Hs.eg.db)

clusterProfiler::bitr(toupper(genes), fromType = "ALIAS", toType = "SYMBOL", OrgDb = org.Hs.eg.db)
genes <- c("CD14", "CDH5")
select(org.Hs.eg.db, keys = genes, columns = c("SYMBOL", "ALIAS"), keytype = "SYMBOL")

insertVector

# vector insertion
insertVector <- function(x, index, value){
  # insert value after index of x
  # if index == 0, value will be insert at the begin of x
  # negative index is also supported
  # index larger than length of x will be warned, but allowed with NAs filled in x.
  if(any(duplicated(index))) stop("index is duplicated")
  index[index < 0] <- length(x)+1+index[index < 0] # in case of negative index
  if(any(duplicated(index))) stop("index is duplicated, beware of negative index")
  if(any(index < 0)) stop("beware of negative index")

  index_ord <- order(index)
  value <- value[seq_along(index)]

  index <- index[index_ord]
  value <- value[index_ord]

  if(index[length(index)] > length(x)) {
    warning("max index is larger than length of x, NA will be introduced")
    x <- c(x, rep(NA, index[length(index)] - length(x)))
  }

  ret <- unlist(lapply(seq_along(index), function(i){
    if(index[i] == 0) return(value[[i]])
    start_ind <- ifelse(i == 1, 0, index[[i-1]]) + 1
    end_ind <- index[[i]]
    return(c(x[start_ind:end_ind], value[[i]]))
  }))

  if(index[length(index)] < length(x)) ret <- c(ret, x[(index[length(index)]+1):length(x)])

  return(ret)
}


zzwch/convgene documentation built on July 11, 2021, 9:41 a.m.