R/annotations.R

Defines functions rnb.infinium.control.targets rnb.get.chromosomes rnb.get.mapping rnb.region.types rnb.remove.annotation rnb.load.annotation.from.db rnb.load.annotation rnb.export.all.annotation rnb.export.annotation rnb.save.annotation rnb.set.annotation.and.cpg.stats rnb.set.annotation rnb.get.annotation rnb.get.assemblies rnb.annotation2data.frame rnb.annotation.size rnb.update.annotation.infos unload.annotations load.annotations data.frame2GRanges append.cpg.stats match.chrom.names get.cpg.stats rnb.load.bed rnb.chromosome.lengths rnb.sort.regions rnb.fix.strand

Documented in data.frame2GRanges get.cpg.stats rnb.annotation2data.frame rnb.annotation.size rnb.export.all.annotation rnb.export.annotation rnb.get.annotation rnb.get.assemblies rnb.get.chromosomes rnb.get.mapping rnb.infinium.control.targets rnb.load.annotation rnb.load.annotation.from.db rnb.region.types rnb.remove.annotation rnb.save.annotation rnb.set.annotation rnb.set.annotation.and.cpg.stats

########################################################################################################################
## annotations.R
## created: 2012-08-16
## creator: Yassen Assenov
## ---------------------------------------------------------------------------------------------------------------------
## Collection of helper constants and functions related to the management of probe and region annotations.
########################################################################################################################

#' RnBeads Annotation Tables
#'
#' RnBeads uses sets of annotation tables and mappings (from regions to sites) for each of the supported genomes. The
#' structures for one assembly are stored in a separate dedicated annotation package. The following annotation packages
#' are available in Bioconductor:
#' \describe{
#'   \item{\pkg{RnBeads.hg19}}{for \code{"hg19"}}
#'   \item{\pkg{RnBeads.mm10}}{for \code{"mm10"}}
#'   \item{\pkg{RnBeads.mm9}}{for \code{"mm9"}}
#'   \item{\pkg{RnBeads.rn5}}{for \code{"rn5"}}
#' }
#'
#' @details
#' An assembly-specific scaffold is automatically loaded upon initialization of its annotation, that is, by the first
#' valid call to any of the following functions: \code{\link{rnb.get.chromosomes}},
#' \code{\link{rnb.get.annotation}}, \code{\link{rnb.set.annotation}}, \code{\link{rnb.get.mapping}},
#' \code{\link{rnb.annotation.size}}. Adding an annotation amounts to attaching its table(s) and mapping structures to
#' the scaffold.
#'
#' @docType data
#' @keywords datasets
#' @name RnBeads.data
#' @format \code{list} of four elements - \code{"regions"}, \code{"sites"}, \code{"controls"} and \code{"mappings"}.
#'         These elements are described below.
#'         \describe{
#'           \item{\code{"regions"}}{\code{list} of \code{NULL}s; the names of the elements correspond to the built-in
#'                region annotation tables. Once the default annotations are loaded, the attribute \code{"builtin"} is
#'                a \code{logical} vector storing, for each region annotation, whether it is the default (built-in) or
#'                custom.}
#'           \item{\code{"sites"}}{\code{list} of \code{NULL}s; the names of the elements correspond to the site and
#'                probe annotation tables.}
#'           \item{\code{"controls"}}{\code{list} of \code{NULL}s; the names of the elements correspond to the control
#'                probe annotation tables. The attribute \code{"sites"} is a \code{character} vector pointing to the
#'                site annotation that encompasses the respective control probes.}
#'           \item{\code{"mappings"}}{\code{list} of \code{NULL}s; the names of the elements correspond to the built-in
#'                region annotation tables.}
#'         }
#' @author Yassen Assenov
NULL

## G L O B A L S #######################################################################################################

## Environment to contain all probe, site and region annotation tables.
##
##  hg19
##      $GENOME                 character
##      $CHROMOSOMES            character
##      $regions
##          $tiling             GRangesList
##          $genes              GRangesList
##          $promoters          GRangesList
##          $cpgislands         GRangesList
##      $sites
##          $CpG                GRangesList
##          $probes450          GRangesList
##      $controls
##          $controls450        data.frame
##      $mappings
##          $tilinig
##              $CpG            list of IRanges
##              $probes450      list of IRanges
##          $genes
##              $CpG            list of IRanges
##              $probes450      list of IRanges
##          $promoters
##              $CpG            list of IRanges
##              $probes450      list of IRanges
##          $cpgislands
##              $CpG            list of IRanges
##              $probes450      list of IRanges
##      $lengths                int[ <chromosomes> , <annotations> ]
.rnb.annotations <- new.env()

## Control probe types
EPIC.CONTROL.TARGETS <- c(
	"bisulfite conversion I" = "BISULFITE CONVERSION I",
	"bisulfite conversion II" = "BISULFITE CONVERSION II",
	"extension" = "EXTENSION",
	"hybridization" = "HYBRIDIZATION",
	"negative control" = "NEGATIVE",
	"non-polymorphic" = "NON-POLYMORPHIC",
	"norm A" = "NORM_A",
	"norm C" = "NORM_C",
	"norm G" = "NORM_G",
	"norm T" = "NORM_T",
	"restoration" = "RESTORATION",
	"specificity I" = "SPECIFICITY I",
	"specificity II" = "SPECIFICITY II",
	"staining" = "STAINING",
	"target removal" = "TARGET REMOVAL")

HM450.CONTROL.TARGETS <- c(
	"bisulfite conversion I" = "BISULFITE CONVERSION I",
	"bisulfite conversion II" = "BISULFITE CONVERSION II",
	"extension" = "EXTENSION",
	"hybridization" = "HYBRIDIZATION",
	"negative control" = "NEGATIVE",
	"non-polymorphic" = "NON-POLYMORPHIC",
	"norm A" = "NORM_A",
	"norm C" = "NORM_C",
	"norm G" = "NORM_G",
	"norm T" = "NORM_T",
	"specificity I" = "SPECIFICITY I",
	"specificity II" = "SPECIFICITY II",
	"staining" = "STAINING",
	"target removal" = "TARGET REMOVAL")

HM27.CONTROL.TARGETS<-c(
	"bisulfite conversion" = "Bisulfite conversion",
	"extension" = "Extension",
	"hybridization" = "Hybridization",
	"negative control" = "Negative",
	"SNP" = "Genotyping",
	"non-polymorphic" = "Non-Polymorphic",
	"norm Grn" = "Normalization-Green",
	"norm Red" = "Normalization-Red",
	"specificity" = "Specificity",
	"staining" = "Staining",
	"target removal" = "Target Removal",
	"pACYC174" = "pACYC174",
	"pUC19" = "pUC19",
	"phiX174" = "phiX174")

## Sample-independent control probe types (subset of CONTROL.TARGETS)
CONTROL.TARGETS.SAMPLE.INDEPENDENT <- c("STAINING", "HYBRIDIZATION", "TARGET REMOVAL", "EXTENSION")

## Genotyping probes on the 27k microarray
HM27.CY3.SNP.PROBES<-c(
		"rs798149",
		"rs2959823",
		"rs2235751",
		"rs2125573",
		"rs2804694")

HM27.CY5.SNP.PROBES<-c(
		"rs1941955",
		"rs845016",
		"rs866884",
		"rs739259",
		"rs1416770",
		"rs1019916",
		"rs2521373",
		"rs10457834",
		"rs6546473",
		"rs5931272",
		"rs264581")

## F U N C T I O N S ###################################################################################################

#' rnb.fix.strand
#'
#' Converts, if necessary, the provided strand information into a factor vector with levels \code{"+"}, \code{"-"} and
#' \code{"*"}.
#'
#' @param values Vector of strand information to be converted.
#' @return The (possibly modified) strand information as a factor vector.
#'
#' @author Yassen Assenov
#' @noRd
rnb.fix.strand <- function(values) {
	if (is.factor(values) && setequal(levels(values), c("+", "-", "*"))) {
		values[is.na(values)] <- "*"
	} else {
		values <- as.character(values)
		values[is.na(values)] <- "*"
		i.positive <- values %in% c("+", "1", "+1", "F", "f", "TOP")
		i.negative <- values %in% c("-", "-1", "R", "r", "BOT")
		values[i.positive] <- "+"
		values[i.negative] <- "-"
		values[!(i.positive | i.negative)] <- "*"
		values <- factor(values, levels = c("+", "-", "*"))
	}
	return(values)
}

########################################################################################################################

#' rnb.sort.regions
#'
#' Sorts the given regions based on start and end position.
#'
#' @param x Genomic regions as an object of type \code{\link{GRanges}} or \code{\link{GRangesList}}.
#' @return Set of the same regions as x, sorted based on start and end positions.
#'
#' @author Yassen Assenov
#' @noRd
rnb.sort.regions <- function(x) {
	if (inherits(x, "GRanges")) {
		return(x[order(start(x), end(x), as.integer(strand(x))), ])
	}
	if (inherits(x, "GRangesList")) {
		return(endoapply(x, function(y) { y[order(start(y), end(y), as.integer(strand(y))), ] }))
	}
	stop("invalid value for x")
}

########################################################################################################################

#' Get chromosome sizes
#' 
#' Gets the lengths of the supported chromosomes for the given genome assembly.
#' 
#' @param assembly   Genome assembly of interest. Must be one of \code{\link{rnb.get.assemblies}}.
#' @param file.chrom Optionally, the name of the text file to save the chromosome lengths to. If this file exists, it
#'                   will be overwritten.
#' @return Invisibly, a named \code{integer} vector listing the lengths, in base pairs, of all supported chromosomes.
#' @author Yassen Assenov
#' @noRd
rnb.chromosome.lengths <- function(assembly, file.chrom = NULL) {
	a.table <- NULL
	if (exists(assembly, .rnb.annotations, inherits = FALSE)) {
		a.table <- get(assembly, .rnb.annotations, inherits = FALSE)[["sites"]]
		if (length(a.table) == 0) {
			a.table <- NULL
		} else {
			a.table <- a.table[[1]]
		}
	}
	if (is.null(a.table)) {
		a.table <- rnb.get.annotation(assembly = assembly)
	}
	c.lengths <- seqlengths(a.table)
	if (!is.null(file.chrom)) {
		write(paste0(names(c.lengths), "\t", c.lengths), file.chrom, sep = "\t")
	}
	invisible(c.lengths)
}

########################################################################################################################

#' rnb.load.bed
#'
#' Loads a BED file into a \code{data.frame} with fixed column names. The file contents is validated for structure
#' (at least 3 columns), as well as for integer values in columns 2 and 3.
#'
#' @param fname BED file to load.
#' @return \code{data.frame} with at least 3 and at most 6 columns. The column names are: \code{"Chromosome"},
#'         \code{"Start"}, \code{"End"}, \code{"id"}, \code{"score"} and \code{"strand"}. Columns after the sixth one,
#'         if present, are dropped.
#'
#' @author Yassen Assenov
#' @noRd
rnb.load.bed <- function(fname) {
	BED.COLUMNS <- c("Chromosome", "Start", "End", "id", "score", "strand")
	tbl <- tryCatch(suppressWarnings(read.delim(fname, header = FALSE, quote = "", comment.char = "#",
				stringsAsFactors = FALSE, na.strings = "")), error = function(e) { e })
	if (inherits(tbl, "error")) {
		if (grepl("cannot open", tbl, fixed = TRUE)) {
			stop("cannot open file")
		}
		stop("invalid file format")
	}
	if (ncol(tbl) < 3) {
		stop("invalid file format; expected at least 3 columns")
	}
	if (!(is.integer(tbl[[2]]) && is.integer(tbl[[3]]))) {
		stop("invalid file format; expected start and end positions in columns 2 and 3, respectively")
	}
	if (length(BED.COLUMNS) < ncol(tbl)) {
		tbl <- tbl[, 1:length(BED.COLUMNS)]
	}
	colnames(tbl) <- BED.COLUMNS[1:ncol(tbl)]
	tbl[[1]] <- as.character(tbl[[1]])
	if (4 <= ncol(tbl)) {
		tbl[[4]] <- as.character(tbl[[4]])
		if (anyDuplicated(tbl[[4]]) == 0) {
			rownames(tbl) <- tbl[[4]]
			tbl <- tbl[, -4]
		}
	}
	return(tbl)
}

########################################################################################################################

#' get.cpg.stats
#'
#' Computes CpG-related statistics for the specified regions.
#'
#' @param chrom.sequence Chromosome sequence, usually obtained from the assembly's genome definition. This must be an
#'                       object of type \code{MaskedDNAString}.
#' @param starts         \code{integer} vector of start positions for the regions of interest.
#' @param ends           \code{integer} vector of end positions for the regions of interest.
#' @return Table of statistics for the regions in the form of a \code{matrix} with the following columns:
#'         \code{"CpG"} and \code{"GC"}. The columns contain the number of CpG dinucleoties and the number of C and G
#'         bases in each region.
#'
#' @author Yassen Assenov
#' @export
get.cpg.stats <- function(chrom.sequence, starts, ends) {
	if (!(inherits(chrom.sequence, "MaskedDNAString") | inherits(chrom.sequence, "DNAString"))) {
		stop("invalid value for chrom.sequence")
	}
	if (!(is.integer(starts) && !any(is.na(starts)))) {
		stop("invalid value for starts")
	}
	if (!(is.integer(ends) && !any(is.na(ends)))) {
		stop("invalid value for ends")
	}
	chrom.regions <- suppressWarnings(Views(chrom.sequence, start = starts, end = ends))
	cbind(
		"CpG" = dinucleotideFrequency(chrom.regions)[, "CG"],
		"GC" = as.integer(rowSums(letterFrequency(chrom.regions, c("C", "G")))))
}

########################################################################################################################
#' match.chrom.names
#'
#' invariant chromosome name matching. I.e. find the occurrences of the names in the first
#' vector in the second vector, neglecting the "chr" prefix
#'
#' @param q chromosome name query
#' @param q chromosome name reference
#' @return the names from r corresponding to the entries in q
#'
#' @author Fabian Mueller
#' @noRd
match.chrom.names <- function(q, r) {
	rr <- gsub("^chr","",r)
	qq <- gsub("^chr","",q)
	res <- r[match(qq,rr)]
	names(res) <- q
	return(res)
}

#' append.cpg.stats
#'
#' Appends additional metadata columns for CpG count and GC density to the specified regions.
#'
#' @param genome.data Genome of interest.
#' @param regionlist  Genomic regions as a list of \code{GRanges} objects (or an obect of type \code{GRangesList}),
#'                    containing one set of regions per chromosome.
#' @return The modified \code{regionlist}. Two columns are appedned to the metadata of each element in this list -
#'         \code{"CpG"} and \code{"GC"}. If the metadata already contains these columns, this function appends columns
#'         with similar names.
#'
#' @author Yassen Assenov
#' @noRd
append.cpg.stats <- function(genome.data, regionlist) {
	cpg.stats <- function(chrom) {
		chromName.gd <- match.chrom.names(chrom,seqnames(genome.data))
		stats <- get.cpg.stats(genome.data[[chromName.gd]], start(regionlist[[chrom]]), end(regionlist[[chrom]]))
		result <- regionlist[[chrom]]
		mcols(result) <- IRanges::cbind(mcols(result), DataFrame(stats))
		result
	}
	regions.enriched <- suppressWarnings(
		foreach(chrom = names(regionlist), .packages = "GenomicRanges",
			.export = c("get.cpg.stats", "genome.data", "regionlist")) %dopar% cpg.stats(chrom))
	names(regions.enriched) <- names(regionlist)
	return(GRangesList(regions.enriched))
}

########################################################################################################################

#' data.frame2GRanges
#'
#' Converts a \code{data.frame} that defines genomic regions to object of type \code{GRanges}.
#'
#' @param dframe        Table defining genomic regions.
#' @param ids           Region names (identifiers) as a \code{character} vector, or \code{NULL} if no names are present.
#' @param chrom.column  Column name or index that lists the chromosome names.
#' @param start.column  Column name or index that lists the start positions of the regions.
#' @param end.column    Column name or index that lists the end positions of the regions.
#' @param strand.column Column name or index that lists the strands on which the regions are located. Set this to
#'                      \code{NULL} if this region set is not strand-specific.
#' @param assembly      Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported
#'                      genomes.
#' @param sort.result   Should the resulting table be sorted
#' @return \code{GRanges} object encapsulating all well defined regions on supported chromosomes, contained in
#'         \code{dframe}. Columns other that the ones listed as parameters in this function are included as metadata.
#'
#' @author Yassen Assenov
#' @export
data.frame2GRanges <- function(dframe, ids = rownames(dframe), chrom.column = "Chromosome", start.column = "Start",
	end.column = "End", strand.column = NULL, assembly = "hg19", sort.result = TRUE) {
	if (is.character(chrom.column)) { chrom.column <- which(colnames(dframe) == chrom.column) }
	if (is.character(start.column)) { start.column <- which(colnames(dframe) == start.column) }
	if (is.character(end.column)) { end.column <- which(colnames(dframe) == end.column) }
	if (is.character(strand.column)) { strand.column <- which(colnames(dframe) == strand.column) }

	## Process the chromosome names
	chroms <- as.character(dframe[, chrom.column])
	chroms <- paste0("chr", sub("^chr", "", chroms))
	param.list <- list()
	param.list[["seqnames"]] <- chroms

	## Process the regions
	dframe <- dframe[!(is.na(dframe[, start.column]) | is.na(dframe[, end.column])), ]
	param.list[["ranges"]] <- IRanges(start = dframe[, start.column], end = dframe[, end.column], names = ids)

	## Process the strands
	if (!is.null(strand.column)) {
		param.list[["strand"]] <- rnb.fix.strand(dframe[, strand.column])
	}

	for (cname in colnames(dframe)[-c(chrom.column, start.column, end.column, strand.column)]) {
		param.list[[cname]] <- dframe[[cname]]
	}
	if (!is.null(assembly)) {
		CHROMOSOMES <- names(.rnb.annotations[[assembly]][['CHROMOSOMES']])
		i.valid <- (chroms %in% CHROMOSOMES)
		param.list <- lapply(param.list, function(x) { x[i.valid] })
	}
	result <- do.call(GRanges, param.list)
	if (!is.null(assembly)) {
		seqlevels(result) <- CHROMOSOMES
		genome(result) <- assembly
	}
	if (sort.result){
		result <- rnb.sort.regions(result)
	}
	return(result)
}

########################################################################################################################

#' load.annotations
#'
#' Loads the specifieid annotation tables and mappings, if they are not already loaded.
#'
#' @param assembly Genome assembly regions to load.
#' @param sites    Sites and mappings to load. This parameter is considered only if \code{assembly} is not \code{NULL}.
#' @return \code{TRUE} if the required assembly (and sites) were successfully loaded; \code{FALSE} if such annotations
#'         do not exist.
#'
#' @author Yassen Assenov
#' @noRd
load.annotations <- function(assembly = NULL, sites = NULL) {
	load.f <- function(fname, pname = paste0("RnBeads.", assembly)) {
		if (pname != "RnBeads") {
			if (!suppressPackageStartupMessages(require(pname, character.only = TRUE))) {
				return(invisible(FALSE))
			}
		}
		fp <- system.file(file.path("data", fname), package=pname)
		tryCatch(load(fp, envir = .rnb.annotations),
			error = function(e) {
				stop(paste0("Internal error in ", pname, ": loading required file '", fp, "' failed"))
			}
		)
		invisible(TRUE)
	}

	if (!is.null(assembly)) {
		updated <- character()
		if (!exists(assembly, where = .rnb.annotations, inherits = F)) {
			## Add scaffold for sites, probes and control probes
			if (!load.f(paste0(assembly, ".RData"))) {
				return(FALSE)
			}
			## Load built-in regions
			if (!load.f("regions.RData")) {
				return(FALSE)
			}
#			assign(assembly, list("sites" = list(), "controls" = list(), "mappings" = list()), .rnb.annotations)
#			vnames <- c("CHROMOSOMES", "GENOME", "regions")
#			for (vname in vnames) {
#				.rnb.annotations[[assembly]][[vname]] <- get(vname, .rnb.annotations, inherits = FALSE)
#			}
#			rm(list = vnames, pos = .rnb.annotations)
			.rnb.annotations[[assembly]][["regions"]] <- .rnb.annotations[["regions"]]
			updated <- names(.rnb.annotations[[assembly]][["regions"]])
			rm(list = "regions", pos = .rnb.annotations)

			## Create matrix of elements per chromosome
			CHROMOSOMES <- names(.rnb.annotations[[assembly]][["CHROMOSOMES"]])
			s.names <- c(names(.rnb.annotations[[assembly]][["sites"]]), updated)
			.rnb.annotations[[assembly]][["lengths"]] <-
				matrix(0L, nrow = length(CHROMOSOMES), ncol = length(s.names), dimnames = list(CHROMOSOMES, s.names))
			rm(CHROMOSOMES, s.names)
		}
		if (!is.null(sites)) {
			if (sites %in% names(.rnb.annotations[[assembly]][["controls"]])) {
				c.sites <- attr(.rnb.annotations[[assembly]][["controls"]], "sites")
				sites <- c.sites[sites]
				rm(c.sites)
			}
			if (!(sites %in% names(.rnb.annotations[[assembly]][["sites"]]))) {
				return(FALSE)
			}
			if (is.null(.rnb.annotations[[assembly]][["sites"]][[sites]])) {
				## Load the required sites
				load.f(paste0(sites, ".RData"))
				sinfo <- get("sites", .rnb.annotations, inherits = FALSE)
				rm(list = "sites", pos = .rnb.annotations)
				.rnb.annotations[[assembly]][["sites"]][[sites]] <- sinfo$sites
				for (cnames in setdiff(names(sinfo), c("sites", "mappings"))) {
					.rnb.annotations[[assembly]][["controls"]][[cnames]] <- sinfo[[cnames]]
				}
				## Assign and/or create mappings
				region.bi <- attr(.rnb.annotations[[assembly]][["regions"]], "builtin")
				for (rname in names(.rnb.annotations[[assembly]][["regions"]])) {
					if (region.bi[rname]) {
						rmapping <- sinfo$mappings[[rname]]
					} else {
						rmapping <- rnb.regions2sites(.rnb.annotations[[assembly]][["regions"]][[rname]], sinfo$sites)
					}
					.rnb.annotations[[assembly]][["mappings"]][[rname]][[sites]] <- rmapping
					rm(rmapping)
				}
				suppressWarnings(rm(sinfo, region.bi, rname))
				updated <- sites
			}
		}
		if (length(updated) != 0) {
			## Update annotations lengths for the newly loaded regions and/or sites
			genome.info <- get(assembly, .rnb.annotations, inherits = FALSE)
			for (a.name in updated) {
				.rnb.annotations[[assembly]][["lengths"]][, a.name] <- 0L
				if (a.name %in% names(genome.info$regions)) {
					a.lengths <- elementNROWS(genome.info$regions[[a.name]])
				} else { # a.name %in% names(genome.info$sites)
					a.lengths <- elementNROWS(genome.info$sites[[a.name]])
				}
				.rnb.annotations[[assembly]][["lengths"]][names(a.lengths), a.name] <- a.lengths
			}
		}
	}

	return(TRUE)
}

########################################################################################################################

#' unload.annotations
#'
#' Removes all loaded annotation tables and mappings (if such exist).
#'
#' @author Yassen Assenov
#' @noRd
unload.annotations <- function() {
	rm(list = ls(envir = .rnb.annotations, all.names = TRUE), envir = .rnb.annotations)
}

########################################################################################################################

#' rnb.update.annotation.infos
#'
#' Updates or adds supporting attributes for a new region annotation.
#'
#' @param type     One-element \code{character} vector giving the name of the freshly added/updated region annotation.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#'
#' @author Yassen Assenov
#' @noRd
rnb.update.annotation.infos <- function(type, assembly) {

	## Set the annotation for built-in regions
	region.builtin <- attr(.rnb.annotations[[assembly]][["regions"]], "builtin")
	if (type %in% names(region.builtin)) {
		region.builtin[type] <- FALSE
	} else {
		region.builtin <- c(region.builtin, FALSE)
		names(region.builtin)[length(region.builtin)] <- type
	}
	attr(.rnb.annotations[[assembly]][["regions"]], "builtin") <- region.builtin

	## Set the information for number of regions per chromosome
	CHROMOSOMES <- names(.rnb.annotations[[assembly]][['CHROMOSOMES']])
	a.lengths.full <- rep.int(0L, length(CHROMOSOMES))
	names(a.lengths.full) <- CHROMOSOMES
	a.lengths <- elementNROWS(.rnb.annotations[[assembly]][["regions"]][[type]])
	a.lengths.full[names(a.lengths)] <- a.lengths
	i.type <- which(colnames(.rnb.annotations[[assembly]][["lengths"]]) == type)
	if (length(i.type) == 0) {
		lengths <- cbind(.rnb.annotations[[assembly]][["lengths"]], a.lengths.full)
		colnames(lengths)[ncol(lengths)] <- type
	} else {
		lengths <- .rnb.annotations[[assembly]][["lengths"]]
		lengths[, i.type] <- a.lengths.full
	}
	.rnb.annotations[[assembly]][["lengths"]] <- lengths
}

########################################################################################################################

#' rnb.annotation.size
#'
#' Gets the size, in number of genomic elements, of the specified annotation.
#'
#' @param type     Name of annotation. Control probe annotations are not accepted.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @return \code{integer} vector showing the number of elements the specified annotation contains per chromosome. The
#'         names of the vector are the names of \code{\link{rnb.get.chromosomes}} for the given genome assembly.
#'         Chromosomes that are not covered by the annotation have their respective value set to \code{0} (zero).
#'
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' rnb.annotation.size("probes450")
#' }
#' @seealso \code{\link{rnb.region.types}} for a list of supported region annotations
#'
#' @author Yassen Assenov
#' @export
rnb.annotation.size <- function(type = "CpG", assembly = "hg19") {
	if (!(is.character(type) && length(type) == 1 && (!is.na(type)))) {
		stop("invalid value for type")
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	load.annotations(assembly, type)
	if (!(type %in% colnames(.rnb.annotations[[assembly]][["lengths"]]))) {
		stop("unsupported annotation type")
	}
	.rnb.annotations[[assembly]][["lengths"]][, type]
}

########################################################################################################################

#' rnb.annotation2data.frame
#'
#' Transform the specified site, probe or region annotation to \code{data.frame}.
#'
#' @param annotation.table Annotation in the form of non-empty \code{GRangesList} object, as returned by
#'                         \code{\link{rnb.get.annotation}}.
#' @param add.names        Flag indicating if element names should be extracted and returned also as a column named
#'                         \code{"ID"} in the resulting \code{data.frame}. Note that element names, if present, are
#'                         set to be the row names of the table.
#' @return Annotation in the form of a single \code{data.frame}. The columns in this table include, among other,
#'         \code{"Chromosome"}, \code{"Start"} and \code{"End"}.
#'
#' @examples
#' \donttest{
#' library(RnBeads.hg19)
#' head(rnb.annotation2data.frame(rnb.get.annotation("probes450")))
#' }
#' @author Yassen Assenov
#' @export
rnb.annotation2data.frame <- function(annotation.table, add.names = TRUE) {
	if (!(inherits(annotation.table, "GRangesList") && length(annotation.table) != 0)) {
		stop("invalid value for annotation.table")
	}
	if (!parameter.is.flag(add.names)) {
		stop("invalid value for add.names")
	}

	## Convert the GRangesList object to a data.frame
	el.names <- do.call("c", lapply(annotation.table, names))
	if (is.null(el.names) || anyDuplicated(el.names) != 0) {
		result <- GenomicRanges::as.data.frame(annotation.table, row.names = NULL)
		el.names <- NULL
	} else {
		result <- GenomicRanges::as.data.frame(annotation.table)
		rownames(result) <- el.names
	}
	if (ncol(result) > 2 && identical(colnames(result)[1:2], c("group", "group_name"))) {
		result <- result[, c(-1, -2), drop = FALSE]
	}

	## Adjust column and row names
	c.names <- colnames(mcols(annotation.table[[1]]))
	c.names <- c(colnames(result)[1:(ncol(result) - length(c.names))], c.names)
	c.names[c.names == "CGI.Relation"] <- "CGI Relation"
	c.names[c.names == "strand"] <- "Strand"
	c.names[c.names == "start"] <- "Start"
	c.names[c.names == "end"] <- "End"
	c.names[c.names == "seqnames"] <- "Chromosome"
	c.names[c.names == "element"] <- "ID"
	colnames(result) <- c.names
	if (add.names && (!is.null(el.names))) {
		result[["ID"]] <- el.names
		i <- which(c.names == "width")
	} else {
		i <- which(c.names %in% c("ID", "width"))
	}
	if (length(i) != 0) {
		result <- result[, -i]
	}
	return(result)
}

########################################################################################################################

#' rnb.get.assemblies
#'
#' Gets the supported genome assemblies.
#'
#' @return All supported genome assemblies in the form of a \code{character} vector. These are \code{"hg19"},
#'         \code{"mm10"}, \code{"mm9"} and \code{"rn5"}.
#'
#' @examples
#' \donttest{
#' "hg19" %in% rnb.get.assemblies()
#' }
#' @author Yassen Assenov
#' @export
rnb.get.assemblies <- function() {
	package.regex <- "^RnBeads\\.(.+)$"
	pnames <- grep(package.regex, rownames(installed.packages(noCache = TRUE, fields = character())), value = TRUE)
	sort(sub(package.regex, "\\1", pnames))
}

########################################################################################################################

#' rnb.get.annotation
#'
#' Extracts the requested annotation for the given genome.
#'
#' @param type     Name of annotation.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @return Probe, site or region annotation table. If the specified type refers to control probes, the returned value is
#'         a \code{data.frame} listing all respective control probes. Otherwise, this function returns an object of type
#'         \code{\link{GRangesList}} - a list of consistent \code{\link{GRanges}} objects, one per chromosome.
#'
#' @details
#' When the returned value is of type \code{GRangesList}, it defines the genomic positions of the requested sites,
#' probes or regions. Identifiers, if present, can be obtained using the \code{names} method. Strand information is also
#' included when applicable. Any additional annotation is stored as metadata in the respective \code{GRanges} objects.
#'
#' @seealso \code{\link{rnb.set.annotation}} for adding annotation;
#'   \code{\link{rnb.region.types}} for all loaded region types in a genome assembly
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' rnb.get.annotation("promoters")
#' }
rnb.get.annotation <- function(type = "CpG", assembly = "hg19") {
	if (!(is.character(type) && length(type) == 1 && (!is.na(type)))) {
		stop("invalid value for type")
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	load.annotations(assembly, type)
	if (type %in% names(.rnb.annotations[[assembly]][["controls"]])) {
		result <- .rnb.annotations[[assembly]][["controls"]][[type]]
	} else if (type %in% names(.rnb.annotations[[assembly]][["sites"]])) {
		result <- .rnb.annotations[[assembly]][["sites"]][[type]]
	} else if (type %in% names(.rnb.annotations[[assembly]][["regions"]])) {
		result <- .rnb.annotations[[assembly]][["regions"]][[type]]
	} else {
		stop("unsupported annotation type")
	}
	return(result)
}

########################################################################################################################

#' rnb.set.annotation
#'
#' Adds or replaces a region annotation table.
#'
#' @param type        One-element \code{character} vector giving the name of the annotation. If this region type is
#'                    already available, it will be overwritten for the current session. The type cannot be one of
#'                    \code{"CpG"}, \code{"probes450"} or \code{"controls450"}, because these names are reserved for the
#'                    annotation tables of CpG dinucleotides, and Infinium methylation and control probes, respectively.
#' @param regions     BED file defining regions (see \emph{Details}). Alternatively, the value of this parameter can be
#'                    a table of genomic regions in the form of a \code{\link{data.frame}}, containing at least the
#'                    following three columns - \code{"Chromosome"}, \code{"Start"} and \code{"End"} (notice the upper
#'                    case). The \code{"chromosome"} column must be a \code{character} or \code{factor} vector that
#'                    lists chromosome names. The \code{"start"} and \code{"end"} columns are expected to contain
#'                    genomic positions as \code{integer}s. The row names of this \code{data.frame} are used as region
#'                    identifiers.
#' @param description Optional; short description in the form of a non-empty \code{character} vector. The elements in
#'                    this vector are concatenated without a separator to form the description of the annotation.
#' @param assembly    Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported
#'                    genomes.
#' @return Invisibly, \code{TRUE} if an existing annotation was replaced and \code{FALSE} otherwise.
#'
#' @details
#' In case the parameter \code{regions} specifies an existing BED file, regions are loaded from this file. The number of
#' columns defined must be at least 3. Columns after the sixth one, if present, are dropped. The columns are given the
#' following names: \code{"chromosome"}, \code{"start"}, \code{"end"}, \code{"id"}, \code{"score"} and \code{"strand"}.
#'
#' The annotation tables in \pkg{RnBeads} focus on chromosomes \code{"chr1"}, \code{"chr2"}, ..., \code{"chr22"},
#' \code{"chrX"} and \code{"chrY"}. Regions on other chromosomes are ignored. This function also recognizes the
#' convention of chromosome names such as \code{"1"}, adopted, for example, by \href{http://www.ensembl.org/}{Ensembl}.
#' Apart from this, the region definition table is not examined in details by this function; therefore, regions located
#' on unsupported chromosomes or having invalid (e.g. negative) genomic coordinates are simply not mapped to any sites
#' or probes.
#'
#' @examples
#' \donttest{
#' my.regions <- data.frame(
#'     chromosome = c("chr1", "chr1"),
#'     start = c(49242278L, 49242372L),
#'     end = c(49242590L, 49242810L),
#'     rownames = c("BEND5E1", "CpG:38"))
#' txt <- "First exon of the BEND5 gene and an overlapping CpG island."
#' rnb.set.annotation("my regions", my.regions, txt)
#' }
#' @seealso \code{\link{rnb.get.annotation}} for extracting annotation;
#'   \code{\link{rnb.region.types}} for all loaded region types in a genome assembly
#' @author Yassen Assenov
#' @export
rnb.set.annotation <- function(type, regions, description = NULL, assembly = "hg19") {
	if (!(is.character(type) && length(type) == 1 && (!is.na(type)))) {
		stop("invalid value for type")
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	if (!is.data.frame(regions)) {
		if (!(is.character(regions) && length(regions) == 1 && (!is.na(regions)))) {
			stop("invalid value for regions")
		}
		if (!file.exists(regions)) {
			stop("invalid value for regions; file does not exist")
		}
		regions <- rnb.load.bed(regions)
	}
	if (!(ncol(regions) >= 3 && all(c("Chromosome", "Start", "End") %in% colnames(regions)) &&
			(is.character(regions[["Chromosome"]]) || is.factor(regions[["Chromosome"]])) &&
			is.integer(regions[["Start"]]) && is.integer(regions[["End"]]))) {
		stop("invalid format for regions")
	}
	if (!is.null(description)) {
		if (!(is.character(description) && length(description) != 0)) {
			stop("invalid value for description")
		}
		description <- paste(description, collapse = "")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	s.names <- c(names(.rnb.annotations[[assembly]][["controls"]]), names(.rnb.annotations[[assembly]][["sites"]]))
	if (type %in% s.names) {
		stop("invalid value for type; specified name is not allowed")
	}

	## Construct list of region annotation tables with sorted coordinates, one per chromosome
	strand.column <- which(colnames(regions) == "Strand")
	if (length(strand.column) == 0) {
		strand.column <- NULL
	}
	regions <- data.frame2GRanges(regions, strand.column = strand.column, assembly = assembly)
	regions <- GenomicRanges::split(regions, seqnames(regions))
	attr(regions, "description") <- description

	## Create mappings from the regions to all defined sites
	sites <- .rnb.annotations[[assembly]][["sites"]]
	sites <- sites[!sapply(sites, is.null)]
	mappings <- lapply(sites, function(x) { rnb.regions2sites(regions, sites = x) })

	## Add the region annotation table to the environment
	annotation.exists <- isTRUE(type %in% names(.rnb.annotations[[assembly]][["regions"]]))
	.rnb.annotations[[assembly]][["regions"]][[type]] <- regions
	## Add mappings to the environment
	.rnb.annotations[[assembly]][["mappings"]][[type]] <- mappings
	## Set annotation attributes and sizes (lengths)
	rnb.update.annotation.infos(type, assembly)
	return(invisible(annotation.exists))
}

########################################################################################################################

#' rnb.set.annotation.and.cpg.stats
#'
#' wrapper for \code{\link{rnb.set.annotation}} to accept the region format as output by \code{annotation(rnb.set)}.
#' Additionally, CpG statistics are added to the annotation.
#' @param regions a data.frame handled similarly as by \code{\link{rnb.set.annotation}} with the exception that
#'                 the genomic location columns should be specified using upper case first letters
#' @param type,description,assembly Parameters handled exactly as in \code{\link{rnb.set.annotation}}
#' @return Invisibly, \code{TRUE} if an existing annotation was replaced and \code{FALSE} otherwise.
#'
#' @seealso \code{\link{rnb.set.annotation}}
#' @author Fabian Mueller
#' @export
rnb.set.annotation.and.cpg.stats <- function(type, regions, description = NULL, assembly = "hg19"){
	## FIXME: Incorporate this function as a parameter in rnb.set.annotation
	GENOME <- .rnb.annotations[[assembly]][['GENOME']]
	rnb.require(GENOME)
	genome.data <- get(GENOME)
	regs.gr <- data.frame2GRanges(regions, chrom.column = "Chromosome", start.column = "Start",
		end.column = "End", strand.column = "Strand", assembly = assembly)
	regs.grl <- GenomicRanges::split(regs.gr, seqnames(regs.gr))

	regs.grl <- append.cpg.stats(genome.data, regs.grl)
	regs.gr <- BiocGenerics::unlist(regs.grl,use.names=FALSE)

	regs.df <- GenomicRanges::as.data.frame(regs.gr)
	colnames(regs.df)[colnames(regs.df)=="seqnames"] <- "chromosome"
	regs.df$width <- NULL

	rnb.set.annotation(type=type, regions=regs.df, description=description, assembly = assembly)
}

########################################################################################################################

#' rnb.save.annotation
#'
#' Saves the specified region annotation table and its accompanying data structures to a binary file.
#'
#' @param fname    One-element \code{character} vector giving the name of the file to contain the annotation data. If
#'                 this file already exists, it will be overwritten.
#' @param type     One-element \code{character} vector giving the name of the region annotation.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @return \code{TRUE}, invisibly.
#'
#' @details
#' This function is used in combination with \code{\link{rnb.load.annotation}} to enable fast reloading of custom region
#' annotations. If can also be used to save a build-in region annotation (e.g. before overwriting it) but not site or
#' control probe annotations.
#'
#' @seealso \code{\link{rnb.load.annotation}} for loading a saved annotation
#' @author Yassen Assenov
#' @export
rnb.save.annotation <- function(fname, type, assembly = "hg19") {
	if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname)))) {
		stop("invalid value for fname")
	}
	if (!(is.character(type) && length(type) == 1 && (!is.na(type)))) {
		stop("invalid value for type")
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	if (!(type %in% names(.rnb.annotations[[assembly]][["regions"]]))) {
		stop("unsupported annotation type")
	}

	regions <- .rnb.annotations[[assembly]][["regions"]][[type]]
	mappings <- .rnb.annotations[[assembly]][["mappings"]][[type]]
	tryCatch(save(assembly, regions, mappings, file = fname, compression_level = 9L),
		error = function(er) { stop(paste("unable to save objects to", fname)) })
	return(invisible(TRUE))
}

########################################################################################################################

#' rnb.export.annotation
#'
#' Export the annotation to a defined format (currently only bed is supported
#'
#' @param fname    One-element \code{character} vector giving the name of the file to contain the annotation data. If
#'                 this file already exists, it will be overwritten.
#' @param type     One-element \code{character} vector giving the name of the region annotation.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @param format   Output format. currently only \code{"bed"} is supported.
#' @return \code{TRUE}, invisibly.
#'
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' rnb.export.annotation(tempfile(pattern="promoters",fileext=".bed"),"promoters")
#' }
rnb.export.annotation <- function(fname, type, assembly = "hg19", format = "bed") {
	if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname)))) {
		stop("invalid value for fname")
	}
	if (!(is.character(type) && length(type) == 1 && (!is.na(type)))) {
		stop("invalid value for type")
	}
	if (!(is.character(format) && length(format) == 1 && (!is.na(format)) && format=="bed")) {
		stop("invalid value for format")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	if (!(type %in% c("CpG",names(.rnb.annotations[[assembly]][["regions"]])))) {
		stop("unsupported annotation type")
	}
	aa <- rnb.get.annotation(type,assembly)
	names(aa) <- NULL
	aa <- BiocGenerics::unlist(aa)
	names(aa) <- NULL
	aa.df <- GenomicRanges::as.data.frame(aa)
	colnames(aa.df)[colnames(aa.df)=="seqnames"] <- "chrom"
	aa.df$width <- NULL
	ids <- rownames(aa.df)
	if (length(ids)!=nrow(aa.df)){
		ids <- 1:nrow(aa.df)
	}
	if (format=="bed"){
		tab.2.export <- data.frame(chrom=aa.df$chrom,start=aa.df$start,end=aa.df$end,strand=aa.df$strand,id=ids)
		write.table(format(tab.2.export,scientific=FALSE),file=fname,
				quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE,na=".")
	}
	return(invisible(TRUE))
}

########################################################################################################################

#' rnb.export.all.annotation
#'
#' Wrapper for exporting all annotation sets
#'
#' @param out.dir  The directory to write the files to
#' @param types    One-element \code{character} vector giving the name of the region annotation.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @param format   output format. currently only \code{"bed"} is supported.
#' @return \code{TRUE}, invisibly.
#'
#' @author Fabian Mueller
#' @export
#' @examples
#' \donttest{
#' logger.start(fname=NA)
#' rnb.export.all.annotation(tempdir(),c("genes","promoters"))
#' }
rnb.export.all.annotation <- function(out.dir, types=c("CpG",rnb.region.types(assembly)), assembly = "hg19", format = "bed") {
	if (!(is.character(out.dir) && length(out.dir) == 1 && (!is.na(out.dir)))) {
		stop("invalid value for fname")
	}
	if (!(is.character(types) && (!any(is.na(types))))) {
		stop("invalid value for type")
	}
	if (!(is.character(format) && length(format) == 1 && (!is.na(format)) && format=="bed")) {
		stop("invalid value for format")
	}
	for (tt in types){
		if (logger.isinitialized()) {
			logger.start(c("Exporting Region Type",tt))
		}
		rnb.export.annotation(file.path(out.dir,paste(assembly,tt,format,sep=".")), tt, assembly = assembly, format = "bed")
		if (logger.isinitialized()) {
			logger.completed()
		}
	}
	return(invisible(TRUE))
}

########################################################################################################################

#' rnb.load.annotation
#'
#' Loads a previously saved custom region annotation from a binary (RData) file.
#'
#' @param fname One-element \code{character} vector giving the name of the file that contains the annotation data.
#' @param type  One-element \code{character} vector giving the name of the region annotation. If this annotation
#'              is already available, it will be overwritten for the current session.
#' @return Invisibly, \code{TRUE} if the annotation was loaded successfully; an error message if the objects in the
#'         given file do not encode an annotation.
#'
#' @details
#' If the region annotation cannot be loaded from the specified location, this function exits with an error message in
#' the form \code{"unable to load object from ..."}. This could happen, for example, when \code{fname} does not refer to
#' a valid RData file, or the file cannot be accessed due to security restrictions.
#'
#' If the file is loaded in the current session, but no annotation was added, the function returns invisibly one of the
#' following short failure messages:
#' \describe{
#'   \item{\code{"invalid format"}}{The RData file does not store exactly the following three objects - \code{assembly},
#'        \code{regions}, and \code{mapping}, or they are not of the expected type.}
#'   \item{\code{"unsupported assembly"}}{The specified assembly is unknown.}
#'   \item{\code{"invalid format of regions"}}{The specified region annotation table is invalid.}
#'   \item{\code{"invalid format of mappings"}}{The specified region mapping tables are invalid.}
#' }
#'
#' @seealso \code{\link{rnb.save.annotation}} for saving annotation to a binary file; \code{\link{rnb.set.annotation}}
#'          for loading an annotation from a BED file.
#' @author Yassen Assenov
#' @export
rnb.load.annotation <- function(fname, type) {
	if (!(is.character(fname) && length(fname) == 1 && (!is.na(fname)))) {
		stop("invalid value for fname")
	}
	if (!(is.character(type) && length(type) == 1 && (!is.na(type)))) {
		stop("invalid value for type")
	}
	result <- tryCatch(load(fname), error = function(err) { return(NULL) })
	if (is.null(result)) {
		stop(paste("unable to load object from", fname))
	}
	if (!setequal(c("assembly", "regions", "mappings"), result)) {
		return(invisible("invalid format"))
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		return(invisible("invalid format"))
	}
	if (!load.annotations(assembly)) {
		return(invisible("unsupported assembly"))
	}
	## TODO: validate regions and validate mappings

	## Compare currently loaded sites with the ones avaialable in the mappings
	site.names.all <- names(.rnb.annotations[[assembly]][["sites"]])
	site.names.mapped <- names(mappings)
	if (!all(site.names.mapped %in% site.names.all)) {
		return(invisible("invalid format of mappings"))
	}
	## Load all unloaded sites
	## TODO: Would it be better to simply ignore the unloaded sites?
	sites.loaded <- .rnb.annotations[[assembly]][["sites"]]
	sites.loaded <- sites.loaded[!sapply(sites.loaded, is.null)]
	site.names.loaded <- names(sites.loaded)
	for (sname in setdiff(site.names.mapped, site.names.loaded)) {
		load.annotations(assembly, sname)
	}
	## Construct additional mappings if necessary
	for (sname in setdiff(site.names.loaded, site.names.mapped)) {
		mappings[[sname]] <- rnb.regions2sites(regions, sites = sites.loaded[[sname]])
	}
	## Put the objects in the .rnb.annotation environment
	sites.all <- .rnb.annotations[[assembly]][["sites"]]
	site.names.loaded <- names(sites.all[!sapply(sites.all, is.null)])
	.rnb.annotations[[assembly]][["regions"]][[type]] <- regions
	.rnb.annotations[[assembly]][["mappings"]][[type]] <- mappings[site.names.loaded]
	rnb.update.annotation.infos(type, assembly)
	invisible(TRUE)
}

########################################################################################################################

#' rnb.load.annotation.from.db
#'
#' Loads a previously region annotation from the RnBeads resource database
#'
#' @param types  One-element \code{character} vector giving the name of the region annotation. If this annotation
#'              is already available, it will be overwritten for the current session.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @return Invisibly, \code{TRUE} if the annotation was loaded successfully; an error message if the objects in the
#'         given file do not encode an annotation.
#'
#' @details
#' This function checks whether a region annotation is present in the RnBeads resources,
#' downloads the corresponding annotation file(s) from the and then runs \code{\link{rnb.load.annotation}}
#' to import the annotation.
#' 
#' @examples
#' \donttest{
#' rnb.region.types() 
#' rnb.load.annotation.from.db(c("tiling1kb", "dynamicMethZiller2013"))
#' rnb.region.types()
#' }
#' 
#' @seealso \code{\link{rnb.load.annotation}} for loading annotation from a binary file
#' @author Fabian Mueller
#' @export
rnb.load.annotation.from.db <- function(types, assembly="hg19") {
	db.url <- "http://rnbeads.mpi-inf.mpg.de/publication/data/regiondb"
	if (!(is.character(types) && length(types) > 0 && (!any(is.na(types))))) {
		stop("invalid value for types")
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	for (tt in types){
		fname <- paste0("annotation_", assembly, "_", tt, ".RData")
		tmpFn <- tempfile(fileext=".RData")
		success <- tryCatch(
			download.file(paste(db.url, fname, sep="/"), destfile=tmpFn, mode = "wb"),
			error = function(err) { return(-1) }
		)
		if (success != 0){
			logger.error(c("Failed to download region set annotation:", tt))
		}
		rnb.load.annotation(tmpFn, tt)
		unlink(tmpFn, recursive=FALSE) # remove temp files
	}
	invisible(TRUE)
}

########################################################################################################################

#' rnb.remove.annotation
#'
#' Deletes a region annotation table. Use this function with caution; its operation cannot be undone.
#'
#' @param type     One-element \code{character} vector giving the name of the region annotation.
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @return Invisibly, \code{TRUE} if the annotation has been successfully deleted, or \code{FALSE} if the specified
#'         region type is not supported.
#'
#' @examples
#' \donttest{
#' t.regions <- rnb.get.annotation("tiling")
#' rnb.remove.annotation("tiling")
#' }
#'
#' @seealso \code{\link{rnb.get.annotation}}, \code{\link{rnb.region.types}}
#' @author Fabian Mueller
#' @export
rnb.remove.annotation <- function(type, assembly = "hg19") {
	if (!(is.character(type) && length(type) == 1 && (!is.na(type)))) {
		stop("invalid value for type")
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	if (!(type %in% names(.rnb.annotations[[assembly]][["regions"]]))) {
		warning("unsupported annotation type")
		return(invisible(FALSE))
	}
	.rnb.annotations[[assembly]][["regions"]][[type]]  <- NULL
	.rnb.annotations[[assembly]][["mappings"]][[type]] <- NULL
	i.type <- which(colnames(.rnb.annotations[[assembly]][["lengths"]]) == type)
	.rnb.annotations[[assembly]][["lengths"]] <- .rnb.annotations[[assembly]][["lengths"]][, -i.type]
	invisible(TRUE)
}

########################################################################################################################

#' rnb.region.types
#'
#' Gets the supported region annotations for a given genome assembly.
#'
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @return Region types supported by \pkg{RnBeads} in the form of a \code{character} vector. The built-in ones are
#'         \code{"cpgislands"}, \code{"genes"}, \code{"promoters"} and \code{"tiling"}. The names of all custom region
#'         definitions are also included in the returned vector.
#'
#' @examples
#' \donttest{
#' "promoters" %in% rnb.region.types() # TRUE
#' }
#'
#' @seealso \code{\link{rnb.get.annotation}}, \code{\link{rnb.set.annotation}}
#' @author Yassen Assenov
#' @export
rnb.region.types <- function(assembly = "hg19") {
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	names(.rnb.annotations[[assembly]][["regions"]])
}

########################################################################################################################

#' rnb.get.mapping
#'
#' Gets the mapping information used for a region type. These are structures used to map regions to the genomic loci (or
#' Infinium probes) that target them.
#'
#' @param region.type Region type. The built-in types are \code{"cpgislands"}, \code{"genes"}, \code{"promoters"} and
#'                    \code{"tiling"}.
#' @param target.type Target type for sites.
#' @param assembly    Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported
#'                    genomes.
#' @return \code{list} of mapping structures, one per chromosome. Every mapping structure is an object of type
#'         \code{\link{IRanges}} and stores the range of indices of all sites contained in the respective region.
#'         Regions that do not contain sites are left out of the mapping.
#' @examples
#' \donttest{
#' promoters2probes <- rnb.get.mapping("promoters", "probes450")
#' promoters2probes[["chr21"]]
#' }
#' @author Yassen Assenov
#' @export
rnb.get.mapping <- function(region.type, target.type, assembly = "hg19") {
	if (!(is.character(region.type) && length(region.type) == 1 && (!is.na(region.type)))) {
		stop("invalid value for region.type")
	}
	if (!(is.character(target.type) && length(target.type) == 1 && (!is.na(target.type)))) {
		stop("invalid value for target.type")
	}
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	if (!load.annotations(assembly, target.type)) {
		stop("unsupported target.type")
	}
	if (!(target.type %in% names(.rnb.annotations[[assembly]][["sites"]]))) {
		stop("unsupported target.type")
	}
	if (is.null(.rnb.annotations[[assembly]][["regions"]][[region.type]])) {
		stop("unsupported region.type")
	}
	.rnb.annotations[[assembly]][["mappings"]][[region.type]][[target.type]]
}

########################################################################################################################

#' rnb.get.chromosomes
#'
#' Gets the chromosome names supported for the specified assembly.
#'
#' @param assembly Genome assembly of interest. See \code{\link{rnb.get.assemblies}} for the list of supported genomes.
#' @return \code{character} vector of supported chromosomes for the specified genome assembly. The elements of the
#'         vector follow the \href{http://www.ensembl.org/}{Ensembl} convention (\code{"1"}, \code{"2"}, ...), and the
#'         names of this vector - the convention of the \href{http://genome.ucsc.edu/}{UCSC Genome Browser}
#'         (\code{"chr1"}, \code{"chr2"}, ...).
#'
#' @examples
#' \donttest{
#' "chrX" %in% names(rnb.get.chromosomes())
#' }
#' @author Pavlo Lutsik
#' @export
rnb.get.chromosomes <- function(assembly = "hg19") {
	if (!(is.character(assembly) && length(assembly) == 1 && (!is.na(assembly)))) {
		stop("invalid value for assembly")
	}
	assembly <- tolower(assembly)
	if (!load.annotations(assembly)) {
		stop("unsupported assembly")
	}
	return(.rnb.annotations[[assembly]][['CHROMOSOMES']])
}

########################################################################################################################

#' rnb.infinium.control.targets
#'
#' Extracts all control probe types in the HumanMethylation450 assay.
#'
#' @param target  A singleton of type \code{character}, specifying the microarray platform.
#' 				 \code{"probesEPIC"},\code{"probes450"} and \code{"probes27"} correspond to MethylationEPIC, 
#' 				 HumanMethylation450, and HumanMethylation27 microarrays respectively.
#'
#' @return \code{character} vector of control targets.
#'
#' @examples
#' \donttest{
#' "NEGATIVE" %in% rnb.infinium.control.targets()
#' }
#' @author Pavlo Lutsik
#' @export
rnb.infinium.control.targets <- function(target="probes450") {
	if(target=="probesEPIC"){
		return(EPIC.CONTROL.TARGETS)
	}else if(target=="probes450"){
		return(HM450.CONTROL.TARGETS)
	}else if(target=="probes27"){
		return(HM27.CONTROL.TARGETS)
	}else{
		stop("Unsupported platform")
	}
}

Try the RnBeads package in your browser

Any scripts or data that you put into this service are public.

RnBeads documentation built on March 3, 2021, 2 a.m.