R/fetch_GeneNames.R

###
#
# R script for fetching gene names and annotations
# TDL
#
###

# BioConductor 'annotate' package for all sorts of conversions and genetic location info, etc
source("https://bioconductor.org/biocLite.R")
# Annotation for microarrays
# For Entrez <-> Hugo Gene Symbol mapping
# http://bioconductor.org/packages/release/bioc/html/annotate.html
if(!require(annotate)) {
	biocLite("annotate", suppressUpdates=T)
	library(annotate)
}
# Genome wide annotation for Human
# For Entrez <-> Hugo Gene Symbol mapping (database)
# https://bioconductor.org/packages/release/data/annotation/html/org.Hs.eg.db.html
if(!require(org.Hs.eg.db)){
	biocLite("org.Hs.eg.db", suppressUpdates=T)
	library(org.Hs.eg.db)
}

# biomaRt
if(!require(biomaRt)){
	biocLite("biomaRt", suppressUpdates=T)
	library(biomaRt)
}

# Fetch gene names
ensembl = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
genes <- getBM(attributes=c('ensembl_gene_id', 'ensembl_transcript_id','hgnc_symbol','chromosome_name','start_position','end_position'), mart = ensembl)
# Using hgnc gene symbols 
genenames <- unique(genes$hgnc_symbol)


# Alternative approach using 'GenomicFeatures'
if(!require(GenomicFeatures)){
	stop("Require 'GenomicFeatures' for GenomicRanges")
}
hg38_genes <- makeTxDbFromUCSC(genome="hg38", table="refGene")
refseq_genes <- genes(hg38_genes)
Syksy/curatedTools documentation built on May 27, 2019, 9:55 a.m.