Mapping homologous gene symbols

This vignette shows how to map homologous gene symbols from Macaca fascicularis to the human genome.

all_times <- list()  # store the time for each chunk
knitr::knit_hooks$set(time_it = local({
  now <- NULL
  function(before, options) {
    if (before) {
      now <<- Sys.time()
    } else {
      res <- difftime(Sys.time(), now, units = "secs")
      all_times[[options$label]] <<- res
    }
  }
}))
knitr::opts_chunk$set(
  tidy = TRUE,
  tidy.opts = list(width.cutoff = 95),
  message = FALSE,
  warning = FALSE,
  time_it = TRUE
)
orthologs <- readRDS(file = "fls/orthologs.rds")

Load the essential packages

require(biomaRt)
require(tidyverse)
require(SignacX)

After mapping the reads to the Macaca fasccicularis genome, we load the genes, which were generated from the output of the cellranger pipeline from 10X Genomics.

Load Macaca fascicularis genes

features.tsv <- read.delim("fls/features.tsv.gz", header=FALSE, stringsAsFactors=FALSE)
head(features.tsv)

Map homologous genes

# get human and cyno gene symbols
human.R95 <- useMart(host='jan2019.archive.ensembl.org', 
                   biomart='ENSEMBL_MART_ENSEMBL', 
                   dataset='hsapiens_gene_ensembl')
cyno.R95 <- useMart(host='jan2019.archive.ensembl.org', 
                   biomart='ENSEMBL_MART_ENSEMBL', 
                   dataset='mfascicularis_gene_ensembl')

# values = listeENSID: list of cynomolgus ensembl IDs to be retrieved. 
listeENSID = features.tsv$V1
orthologs <- getLDS(attributes = c("ensembl_gene_id","external_gene_name"), 
                  filters = "ensembl_gene_id", 
                  values = listeENSID, 
                  mart = cyno.R95, 
                  attributesL = c("hgnc_symbol","ensembl_gene_id"), 
                  martL = human.R95)
orthologs <- as_tibble(orthologs)
colnames(orthologs) <- c("GeneID","cynoSymbol","HumanSymbol","HumanGeneID")

# keep only 1:1 orthologs
one2one <- orthologs %>% 
group_by(GeneID) %>% 
summarise(n()) %>% 
filter(`n()`<=1) %>%
dplyr::select(`GeneID`) %>%
pull()
orthologs <- orthologs %>% filter(GeneID %in% one2one)

# replace empty HumanSymbol (where there isn't a gene name for a homologous gene) with NA
orthologs <- orthologs %>%
mutate(HumanSymbol=replace(HumanSymbol,HumanSymbol=="", NA))
orthologs <- orthologs %>%
mutate(cynoSymbol=replace(cynoSymbol,cynoSymbol=="", NA))

idx = match(listeENSID, orthologs$GeneID)
xx = orthologs$HumanSymbol[idx]
logik = !is.na(orthologs$HumanSymbol[idx]) # sum(logik) returns 17,365 homologous genes
xx = xx[logik]
orthologs = orthologs[!is.na(orthologs$HumanSymbol),]
# note: several of these genes are not unique mappers; we will aggregate them later or make them unique. To aggregate, where E is the sparse expression matrix with rownames set to xx:
# E = Matrix.utils::aggregate.Matrix(E, row.names(E))

Now we have mapped homologous gene symbols across species:

head(orthologs)

After mapping homologous genes, Signac can be used to classify the cell types.

write.csv(x = t(as.data.frame(all_times)), file = "fls/tutorial_times_Crabeating_vignette.csv")
saveRDS(orthologs, file = "fls/orthologs.rds")

Session Info

sessionInfo()



Try the SignacX package in your browser

Any scripts or data that you put into this service are public.

SignacX documentation built on Nov. 18, 2021, 5:07 p.m.