R/getAllHumanGenes.R

#' getAllHumanGenes
#'
#' This function uses the \pkg{ensembldb} package to extract information on all human genes
#'
#' @param con A \code{SQLiteConnection} object
#' @param EnsDb An \code{\link[ensembldb]{EnsDb-class}} object.  Defaults to EnsDb.Hsapiens.v79.
#' @return a data frame
#' @export
#' @importFrom magrittr %>%
#' @examples
#' \dontrun{
#' #standard usage
#' library(ensembldb)
#' library(EnsDb.Hsapiens.v79)
#'
#' ex_con <- setupSQLite()
#' all_genes <- getAllHumanGenes(ex_con, EnsDb=EnsDb.Hsapiens.v79)
#'
#' #using annotation hub to make an EnsDb object
#' library(AnnotationHub)
#' ah <- AnnotationHub()
#'
#' ## query all available files from Ensembl release 84 for
#' ## Homo sapiens
#' query(ah, c("Homo", "release-82"))
#'
#' ## get the gtf file
#' Gtf <- ah[["AH50308"]]
#'
#' ## create a EnsDb database file from the Gtf
#' DbFile <- ensDbFromGRanges(Gtf, path=tempdir(), organism="Homo_sapiens", version=82, genomeVersion = 'GRCh38')
#' ## Make an EnsDb object from the database file
#' Edb <- EnsDb(DbFile)
#'
#' # use the newly create db file
#' all_genes <- getAllHumanGenes(ex_con, EnsDb=Edb)
#'
#' #or from a GTF file
#' ## ftp://ftp.ensembl.org/pub/release-82/gtf/homo_sapiens/
#' gtffile <- "Homo_sapiens.GRCh38.82.gtf.gz"
#' ## generate the SQLite database file
#' DbFile2 <- ensDbFromGtf(gtf=gtffile, verbose=TRUE)
#' Edb2 <- EnsDb(DbFile2)
#' all_genes <- getAllHumanGenes(ex_con, EnsDb=Edb2)
#'
#' }
#'
#'
getAllHumanGenes <- function(con, EnsDb=EnsDb.Hsapiens.v79::EnsDb.Hsapiens.v79) {

    stopifnot(class(EnsDb) == 'EnsDb')

    all_genes <- ensembldb::genes(EnsDb,
                                  filter=list(ensembldb::GenebiotypeFilter('protein_coding'))) %>%
        GenomicRanges::as.data.frame() %>%
        dplyr::filter(seqnames %in% c(1:23,'X', 'Y')) %>%
        dplyr::mutate(seqnames = factor(seqnames, levels=c(1:23,'X', 'Y'))) %>%
        dplyr::arrange(seqnames, start) %>%
        dplyr::select(1:8)

    DBI::dbWriteTable(con, 'human_genes', all_genes, overwrite=T)

    return(all_genes)
}
chapmandu2/CollateralVulnerability2016 documentation built on May 13, 2019, 3:27 p.m.