workflows/210127_get_biomart_genes.R

library(biomaRt)
listMarts()


## generate biomart reference for basepair positions
ensembl = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh = 37)
all_genes <- getBM(attributes=c('ensembl_gene_id','ensembl_transcript_id','hgnc_symbol','chromosome_name','start_position','end_position'), mart = ensembl)
all_genes <- all_genes[all_genes$chromosome_name %in% c(1:22,'X','XY','Y'),]
all_genes <- all_genes[!duplicated(all_genes),]
write.csv(all_genes, '~/Desktop/hsapiens_gene_ensembl_grch37_chr1_24_gene_positions.csv', quote = F, row.names = F)

#hsapiens_gene_ensembl_grch37_chr1_24 = all_genes

#save(hsapiens_gene_ensembl_grch37_chr1_24, file = 'data/hsapiens_gene_ensembl_grch37_chr1_24.rda', compress = 'xz')

## function for sorting through this data
frhl/our documentation built on Feb. 5, 2021, 7:30 p.m.