R/txdb.R

Defines functions getGeneAnnotMap getAnnotGrl.gencode getTxDb.gencode

Documented in getAnnotGrl.gencode getGeneAnnotMap getTxDb.gencode

#' getTxDb.gencode
#' 
#' Create a \code{TxDb} object by downloading the corresponding GTF file from Gencode
#' @param name	gencode identifier. Currently supported are: "gencode.v27", "gencode.v19", "gencode.vM16", "gencode.vM1"
#' @return \code{TxDb} object
#' @author Fabian Mueller
#' @export 
getTxDb.gencode <- function(name){
	require(GenomicFeatures)
	fileTab <- data.frame(
		name=c("gencode.v44", "gencode.v29", "gencode.v27", "gencode.v21", "gencode.v19", "gencode.vM21", "gencode.vM16", "gencode.vM1"),
		version=c("44", "29", "27", "21", "19", "M21", "M16", "M1"),
		fileURL=c(
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/gencode.v27.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_21/gencode.v21.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M21/gencode.vM21.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M16/gencode.vM16.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M1/gencode.vM1.annotation.gtf.gz"
		),
		organism = c("Homo sapiens", "Homo sapiens", "Homo sapiens", "Homo sapiens", "Homo sapiens", "Mus musculus", "Mus musculus", "Mus musculus"),

		genomeAss=c("hg38", "hg38", "hg38", "hg38", "hg19", "mm10", "mm10", "mm9"),
		stringsAsFactors = FALSE
	)
	rownames(fileTab) <- fileTab$name
	if (!is.element(name, fileTab$name)){
		logger.error(c("Gencode name not supported:", name, "(must be one of", paste(fileTab$name, collapse=", "), ")"))
	}
	chrInfo <- getSeqlengths4assembly(fileTab[name, "genomeAss"])
	chrInfo <- Seqinfo(names(chrInfo), chrInfo, genome=fileTab[name, "genomeAss"])
	txdb <- makeTxDbFromGFF(fileTab[name, "fileURL"], format="gtf", dataSource=paste("Gencode version", fileTab[name, "version"]), circ_seqs=character(), organism=fileTab[name, "organism"], chrominfo=chrInfo)
	return(txdb)
}

#' getAnnotGrl.gencode
#' 
#' Create a \code{GRangesList} with element annotation by downloading the corresponding GTF file from Gencode
#' @param name	gencode identifier. Currently supported are: "gencode.v27", "gencode.v19", "gencode.vM16", "gencode.vM1"
#' @return \code{GRangesList} object with annotated elements for each element type (genes, transcripts, exons, ...)
#' @author Fabian Mueller
#' @export 
getAnnotGrl.gencode <- function(name){
	require(GenomicFeatures)
	require(rtracklayer)
	fileTab <- data.frame(
		name=c("gencode.v44", "gencode.v29", "gencode.v27", "gencode.v21", "gencode.v19", "gencode.vM21", "gencode.vM16", "gencode.vM1"),
		version=c("44", "29", "27", "21", "19", "M21", "M16", "M1"),
		fileURL=c(
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/gencode.v27.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_21/gencode.v21.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M21/gencode.vM21.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M16/gencode.vM16.annotation.gtf.gz",
			"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M1/gencode.vM1.annotation.gtf.gz"
		),
		organism = c("Homo sapiens", "Homo sapiens", "Homo sapiens", "Homo sapiens", "Homo sapiens", "Mus musculus", "Mus musculus", "Mus musculus"),

		genomeAss=c("hg38", "hg38", "hg38", "hg38", "hg19", "mm10", "mm10", "mm9"),
		stringsAsFactors = FALSE
	)
	rownames(fileTab) <- fileTab$name
	if (!is.element(name, fileTab$name)){
		logger.error(c("Gencode name not supported:", name, "(must be one of", paste(fileTab$name, collapse=", "), ")"))
	}
	gr <- import.gff(fileTab[name, "fileURL"])
	gr <- setGenomeProps(gr, fileTab[name, "genomeAss"])
	gr <- sortGr(gr)
	grl <- split(gr, elementMetadata(gr)[,"type"])
	return(grl)
}


#' getGeneAnnotMap
#' 
#' Get a mapping (e.g. of identifiers) and automatically select the correct \code{AnnotationDbi} database for a given
#' assembly
#' @param assembly	character string specifying the assembly
#' @param from      the column name that will be used as the key for the resulting map
#' @param to        the column name that will be used as the result for the resulting map
#' @param multiMap  character string specifying what to do if multiple mappings are found for a key
#'                  by default (\code{multiMap="paste"}) the results will be pasted into a single character string (separated by ';').
#'                  Other options include \code{'first'} for just returning the first value or 'list' for returning a list of all values
#' @return a named vector (or list depending on how the \code{multiMap} argument is chosen) providing a mapping
#' @author Fabian Mueller
#' @export
getGeneAnnotMap <- function(assembly, from="ENSEMBL", to="SYMBOL", multiMap="paste"){
	aa <- NULL
	if (is.element(assembly, c("hg38", "hg19"))){
		require(org.Hs.eg.db)
		aa <- org.Hs.eg.db::org.Hs.eg.db
	} else if (is.element(assembly, c("mm9", "mm10"))){
		require(org.Mm.eg.db)
		aa <- org.Mm.eg.db::org.Mm.eg.db
	} else {
		stop(paste0("Unknown assembly:", assembly))
	}
	kk <- keys(aa, keytype=from)
	if (multiMap=="paste"){
		res <- mapIds(aa, keys=kk, column=to, keytype=from, multiVals="list")
		res <- sapply(res, FUN=function(x){paste(x, collapse=";")})
	} else {
		res <- mapIds(aa, keys=kk, column=to, keytype=from, multiVals=multiMap)
	}
	return(res)
}
demuellae/muRtools documentation built on Sept. 8, 2023, 4:32 p.m.