### =========================================================================
### Vector objects
### -------------------------------------------------------------------------
###
### The Vector virtual class is a general container for storing a finite
### sequence i.e. an ordered finite collection of elements.
###
setClassUnion("DataTable_OR_NULL", c("DataTable", "NULL"))
setClass("Vector",
contains="Annotated",
representation(
"VIRTUAL",
elementMetadata="DataTable_OR_NULL"
)
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### parallelSlotNames()
###
### For internal use only.
###
### Must return the names of all the slots in Vector object 'x' that are
### "parallel" to 'x'. Slot 'foo' is considered to be "parallel" to 'x' if:
### (a) 'x@foo' is NULL or an object for which NROW() is equal to
### 'length(x)', and
### (b) the i-th element in 'x@foo' describes some component of the i-th
### element in 'x'.
### For example, the "start", "width", "NAMES", and "elementMetadata" slots
### of an IRanges object are parallel to the object. Note that the "NAMES"
### and "elementMetadata" slots can be set to NULL.
### The *first" slot name returned by parallelSlotNames() is used to get the
### length of 'x'.
###
setGeneric("parallelSlotNames",
function(x) standardGeneric("parallelSlotNames")
)
setMethod("parallelSlotNames", "Vector", function(x) "elementMetadata")
### Methods for Vector subclasses only need to specify the parallel slots they
### add to their parent class. See Hits-class.R file for an example.
### parallelVectorNames() is for internal use only.
setGeneric("parallelVectorNames",
function(x) standardGeneric("parallelVectorNames"))
setMethod("parallelVectorNames", "ANY",
function(x) setdiff(colnames(as.data.frame(new(class(x)))), "value"))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Getters.
###
setMethod("length", "Vector",
function(x) NROW(slot(x, parallelSlotNames(x)[[1L]]))
)
setMethod("lengths", "Vector",
function(x, use.names=TRUE)
{
if (!isTRUEorFALSE(use.names))
stop("'use.names' must be TRUE or FALSE")
ans <- elementNROWS(x) # This is wrong! See ?Vector for the details.
if (!use.names)
names(ans) <- NULL
ans
}
)
setMethod("NROW", "Vector", function(x) length(x))
setMethod("ROWNAMES", "Vector", function(x) names(x))
### 3 accessors for the same slot: elementMetadata(), mcols(), and values().
### mcols() is the recommended one, use of elementMetadata() or values() is
### discouraged.
setGeneric("elementMetadata",
function(x, use.names=FALSE, ...) standardGeneric("elementMetadata")
)
setMethod("elementMetadata", "Vector",
function(x, use.names=FALSE, ...)
{
if (!isTRUEorFALSE(use.names))
stop("'use.names' must be TRUE or FALSE")
ans <- x@elementMetadata
if (use.names && !is.null(ans))
rownames(ans) <- names(x)
ans
}
)
setGeneric("mcols",
function(x, use.names=FALSE, ...) standardGeneric("mcols")
)
setMethod("mcols", "Vector",
function(x, use.names=FALSE, ...)
elementMetadata(x, use.names=use.names, ...)
)
setGeneric("values", function(x, ...) standardGeneric("values"))
setMethod("values", "Vector", function(x, ...) elementMetadata(x, ...))
setMethod("anyNA", "Vector", function(x, recursive=FALSE) any(is.na(x)))
setMethod("is.na", "Vector", function(x) rep(FALSE, length(x)))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity.
###
.valid.Vector.length <- function(x)
{
x_len <- length(x)
if (!isSingleNumber(x_len) || x_len < 0L)
return("'length(x)' must be a single non-negative number")
if (!is.null(attributes(x_len)))
return("'length(x)' must be a single integer with no attributes")
NULL
}
.valid.Vector.parallelSlots <- function(x)
{
x_len <- length(x)
x_pslotnames <- parallelSlotNames(x)
if (!is.character(x_pslotnames)
|| anyMissing(x_pslotnames)
|| anyDuplicated(x_pslotnames)) {
msg <- c("'parallelSlotNames(x)' must be a character vector ",
"with no NAs and no duplicates")
return(paste(msg, collapse=""))
}
if (x_pslotnames[[length(x_pslotnames)]] != "elementMetadata") {
msg <- c("last string in 'parallelSlotNames(x)' ",
"must be \"elementMetadata\"")
return(paste(msg, collapse=""))
}
for (slotname in x_pslotnames) {
tmp <- slot(x, slotname)
if (!(is.null(tmp) || NROW(tmp) == x_len)) {
if (slotname == "elementMetadata") {
what <- "mcols(x)"
} else {
what <- paste0("x@", slotname)
}
msg <- c("'", what, "' is not parallel to 'x'")
return(paste(msg, collapse=""))
}
}
NULL
}
.valid.Vector.names <- function(x)
{
x_names <- names(x)
if (is.null(x_names))
return(NULL)
if (!is.character(x_names) || !is.null(attributes(x_names))) {
msg <- c("'names(x)' must be NULL or a character vector ",
"with no attributes")
return(paste(msg, collapse=""))
}
if (length(x_names) != length(x))
return("'names(x)' must be NULL or have the length of 'x'")
NULL
}
.valid.Vector.mcols <- function(x)
{
x_mcols <- mcols(x)
if (!is(x_mcols, "DataTable_OR_NULL"))
return("'mcols(x)' must be a DataTable object or NULL")
if (is.null(x_mcols))
return(NULL)
## 'x_mcols' is a DataTable object.
x_mcols_rownames <- rownames(x_mcols)
if (is.null(x_mcols_rownames))
return(NULL)
if (!identical(x_mcols_rownames, names(x)))
{
msg <- c("the rownames of DataTable 'mcols(x)' ",
"must match the names of 'x'")
return(paste(msg, collapse=""))
}
NULL
}
.valid.Vector <- function(x)
{
c(.valid.Vector.length(x),
.valid.Vector.parallelSlots(x),
.valid.Vector.names(x),
.valid.Vector.mcols(x))
}
setValidity2("Vector", .valid.Vector)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Coercion.
###
setMethod("as.logical", "Vector",
function(x) as.vector(x, mode="logical")
)
setMethod("as.integer", "Vector",
function(x) as.vector(x, mode="integer")
)
setMethod("as.numeric", "Vector",
function(x) as.vector(x, mode="numeric")
)
### Even though as.double() is a generic function (as reported by
### 'getGeneric("as.double")', it seems impossible to define methods for this
### generic. Trying to do so like in the code below actually creates an
### "as.numeric" method.
#setMethod("as.double", "Vector",
# function(x) as.vector(x, mode="double")
#)
setMethod("as.complex", "Vector",
function(x) as.vector(x, mode="complex")
)
setMethod("as.character", "Vector",
function(x) as.vector(x, mode="character")
)
setMethod("as.raw", "Vector",
function(x) as.vector(x, mode="raw")
)
setAs("Vector", "vector", function(from) as.vector(from))
setAs("Vector", "logical", function(from) as.logical(from))
setAs("Vector", "integer", function(from) as.integer(from))
setAs("Vector", "numeric", function(from) as.numeric(from))
setAs("Vector", "complex", function(from) as.complex(from))
setAs("Vector", "character", function(from) as.character(from))
setAs("Vector", "raw", function(from) as.raw(from))
setAs("Vector", "factor", function(from) as.factor(from))
setAs("Vector", "data.frame", function(from) as.data.frame(from))
### S3/S4 combo for as.data.frame.Vector
as.data.frame.Vector <- function(x, row.names=NULL, optional=FALSE, ...) {
as.data.frame(x, row.names=NULL, optional=optional, ...)
}
setMethod("as.data.frame", "Vector",
function(x, row.names=NULL, optional=FALSE, ...)
{
x <- as.vector(x)
as.data.frame(x, row.names=row.names, optional=optional, ...)
})
as.matrix.Vector <- function(x, ...) {
as.matrix(x)
}
setMethod("as.matrix", "Vector", function(x) {
as.matrix(as.vector(x))
})
classNamespace <- function(x) {
pkg <- packageSlot(class(x))
pvnEnv <- .GlobalEnv
if (!is.null(pkg)) {
pvnEnv <- getNamespace(pkg)
}
pvnEnv
}
makeFixedColumnEnv <- function(x, parent, tform = identity) {
env <- new.env(parent=parent)
pvnEnv <- classNamespace(x)
lapply(c("names", parallelVectorNames(x)), function(nm) {
accessor <- get(nm, pvnEnv, mode="function")
makeActiveBinding(nm, function() {
val <- tform(accessor(x))
rm(list=nm, envir=env)
assign(nm, val, env)
val
}, env)
})
env
}
setMethod("as.env", "Vector", function(x, enclos, tform = identity) {
addSelfRef(x, makeFixedColumnEnv(x, as.env(mcols(x), enclos, tform), tform))
})
as.list.Vector <- function(x, ...) as.list(x, ...)
setMethod("as.list", "Vector", function(x, ...) as.list(as(x, "List"), ...))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Setters.
###
setGeneric("elementMetadata<-",
function(x, ..., value) standardGeneric("elementMetadata<-"))
### NOT exported but used in packages IRanges, GenomicRanges,
### SummarizedExperiment, GenomicAlignments, and maybe more...
### 3x faster than new("DataFrame", nrows=nrow).
### 500x faster than DataFrame(matrix(nrow=nrow, ncol=0L)).
make_zero_col_DataFrame <- function(nrow)
new2("DataFrame", nrows=nrow, check=FALSE)
.normalize_mcols_replacement_value <- function(value, x)
{
x_slots <- getSlots(class(x))
## Should never happen because 'x' should always be a Vector object so
## should always have the 'elementMetadata' slot.
if (!("elementMetadata" %in% names(x_slots)))
stop(wmsg("trying to set metadata columns on an object that does ",
"not support them (i.e. with no 'elementMetadata' slot)"))
mcols_class <- x_slots[["elementMetadata"]]
if (is.null(value)) {
if (is(NULL, mcols_class))
return(NULL)
value <- make_zero_col_DataFrame(length(x))
}
value <- as(value, mcols_class, strict=TRUE)
## From here 'value' is guaranteed to be a DataTable object.
if (!is.null(rownames(value)))
rownames(value) <- NULL
V_recycle(value, x, x_what="value", skeleton_what="x")
}
setReplaceMethod("elementMetadata", "Vector",
function(x, ..., value)
{
value <- .normalize_mcols_replacement_value(value, x)
BiocGenerics:::replaceSlots(x, elementMetadata=value, check=FALSE)
}
)
setGeneric("mcols<-", function(x, ..., value) standardGeneric("mcols<-"))
setReplaceMethod("mcols", "Vector",
function(x, ..., value) `elementMetadata<-`(x, ..., value=value)
)
setGeneric("values<-", function(x, ..., value) standardGeneric("values<-"))
setReplaceMethod("values", "Vector",
function(x, value) {
elementMetadata(x) <- value
x
})
setGeneric("rename", function(x, ...) standardGeneric("rename"))
.renameVector <- function(x, ...) {
newNames <- c(...)
if (!is.character(newNames) || any(is.na(newNames))) {
stop("arguments in '...' must be character and not NA")
}
badOldNames <- setdiff(names(newNames), names(x))
if (length(badOldNames))
stop("Some 'from' names in value not found on 'x': ",
paste(badOldNames, collapse = ", "))
names(x)[match(names(newNames), names(x))] <- newNames
x
}
setMethod("rename", "vector", .renameVector)
setMethod("rename", "Vector", .renameVector)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Subsetting.
###
### The "[" and "[<-" methods for Vector objects are just delegating to
### extractROWS() and replaceROWS() for performing the real work. Most of
### the times, the author of a Vector subclass only needs to implement an
### "extractROWS" and "replaceROWS" method for his/her objects.
###
setMethod("[", "Vector", subset_along_ROWS)
### We provide a default "extractROWS" method for Vector objects that only
### subsets the individual parallel slots. That should be enough for most
### Vector derivatives that have parallelSlotNames() properly set.
setMethod("extractROWS", "Vector",
function(x, i)
{
i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE)
x_pslotnames <- parallelSlotNames(x)
ans_pslots <- lapply(setNames(x_pslotnames, x_pslotnames),
function(slotname)
extractROWS(slot(x, slotname), i))
## Does NOT validate the object before returning it, because, most of
## the times, this is not needed. There are exceptions though. See
## for example the "extractROWS" method for Hits objects.
do.call(BiocGenerics:::replaceSlots,
c(list(x), ans_pslots, list(check=FALSE)))
}
)
setReplaceMethod("[", "Vector",
function(x, i, j, ..., value)
{
if (!missing(j) || length(list(...)) > 0L)
stop("invalid subsetting")
i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE)
li <- length(i)
if (li == 0L) {
## Surprisingly, in that case, `[<-` on standard vectors does not
## even look at 'value'. So neither do we...
return(x)
}
lv <- NROW(value)
if (lv == 0L)
stop("replacement has length zero")
value <- normalizeSingleBracketReplacementValue(value, x)
if (li != lv) {
if (li %% lv != 0L)
warning("number of values supplied is not a sub-multiple ",
"of the number of values to be replaced")
value <- extractROWS(value, rep(seq_len(lv), length.out=li))
}
replaceROWS(x, i, value)
}
)
### Work on any Vector object for which c() and extractROWS() work.
### Assume 'value' is compatible with 'x'.
setMethod("replaceROWS", "Vector",
function(x, i, value)
{
i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE)
## --<1>-- Concatenate 'x' and 'value' with c() -----
## We assume that c() works on objects of class 'class(x)' and that it
## does the right thing i.e. that it returns an object of the same
## class as 'x' and of length 'length(x) + length(value)'.
ans <- c(x, value)
## --<2>-- Subset 'c(x, value)' with extractROWS() -----
idx <- replaceROWS(seq_along(x), i, seq_along(value) + length(x))
## Because of how we constructed it, 'idx' is guaranteed to be a valid
## subscript to use in 'extractROWS(ans, idx)'. By wrapping it inside a
## NativeNSBS object, extractROWS() won't waste time checking it or
## trying to normalize it.
idx <- NativeNSBS(idx, length(ans), TRUE, FALSE)
## We assume that extractROWS() works on an object of class 'class(x)'.
ans <- extractROWS(ans, idx)
## --<3>-- Restore the original decoration -----
metadata(ans) <- metadata(x)
names(ans) <- names(x)
## However, we want the replaced elements in 'x' to get their
## metadata columns from 'value' so we do not restore the original
## metadata columns. See this thread on bioc-devel:
## https://stat.ethz.ch/pipermail/bioc-devel/2015-November/008319.html
#mcols(ans) <- mcols(x)
ans
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Convenience wrappers for common subsetting operations.
###
### S3/S4 combo for window.Vector
window.Vector <- function(x, ...) window(x, ...)
Vector_window <- function(x, start=NA, end=NA, width=NA)
{
i <- RangeNSBS(x, start=start, end=end, width=width)
extractROWS(x, i)
}
setMethod("window", "Vector", Vector_window)
### S3/S4 combo for head.Vector
head.Vector <- function(x, ...) head(x, ...)
setMethod("head", "Vector", head_along_ROWS)
## S3/S4 combo for tail.Vector
tail.Vector <- function(x, ...) tail(x, ...)
setMethod("tail", "Vector", tail_along_ROWS)
setMethod("rep.int", "Vector", rep.int_along_ROWS)
## NOT exported.
revROWS <- function(x) extractROWS(x, rev(seq_len(NROW(x))))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Combining.
###
### Somewhat painful that we do not always have a DataFrame in elementMetadata
ensureMcols <- function(x) {
mc <- mcols(x)
if (is.null(mc)) {
mc <- make_zero_col_DataFrame(length(x))
}
mc
}
rbind_mcols <- function(x, ...)
{
args <- c(if (!missing(x)) list(x), list(...))
mcols_list <- lapply(args, mcols)
if (length(mcols_list) == 1L)
return(mcols_list[[1L]])
mcols_is_null <- sapply(mcols_list, is.null)
if (all(mcols_is_null))
return(NULL)
mcols_list[mcols_is_null] <- lapply(
args[mcols_is_null],
function(arg) make_zero_col_DataFrame(length(arg))
)
colnames_list <- lapply(mcols_list, colnames)
allCols <- unique(unlist(colnames_list, use.names=FALSE))
fillCols <- function(df) {
if (nrow(df))
df[setdiff(allCols, colnames(df))] <- DataFrame(NA)
df
}
do.call(rbind, lapply(mcols_list, fillCols))
}
rbindRowOfNAsToMetadatacols <- function(x) {
x_mcols <- mcols(x)
if (!is.null(x_mcols))
mcols(x)[nrow(x_mcols)+1L,] <- NA
x
}
### FIXME: This method doesn't work properly on DataTable objects if 'after'
### is >= 1 and < length(x).
setMethod("append", c("Vector", "Vector"),
function(x, values, after=length(x))
{
if (!isSingleNumber(after))
stop("'after' must be a single number")
x_len <- length(x)
if (after == 0L)
c(values, x)
else if (after >= x_len)
c(x, values)
else
c(head(x, n=after), values, tail(x, n=-after))
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Evaluating.
###
setMethod("eval", c("expression", "Vector"),
function(expr, envir, enclos = parent.frame())
eval(expr, as.env(envir, enclos))
)
setMethod("eval", c("language", "Vector"),
function(expr, envir, enclos = parent.frame())
eval(expr, as.env(envir, enclos))
)
setMethod("with", "Vector",
function(data, expr, ...)
{
safeEval(substitute(expr), data, parent.frame(), ...)
})
setReplaceMethod("column", "Vector", function(x, name, value) {
if (name %in% parallelVectorNames(x)) {
setter <- get(paste0(name, "<-"), classNamespace(x), mode="function")
setter(x, value=value)
} else {
mcols(x)[[name]] <- value
x
}
})
transform.Vector <- transformColumns
setMethod("transform", "Vector", transform.Vector)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Utilities.
###
setGeneric("expand.grid", signature="...")
BiocGenerics:::apply_hotfix73465(getGeneric("expand.grid"))
setMethod("expand.grid", "Vector",
function(..., KEEP.OUT.ATTRS = TRUE, stringsAsFactors = TRUE) {
args <- list(...)
inds <- lapply(args, seq_along)
grid <- do.call(expand.grid,
c(inds,
KEEP.OUT.ATTRS=KEEP.OUT.ATTRS,
stringsAsFactors=stringsAsFactors))
names(args) <- names(grid)
ans <- DataFrame(mapply(`[`, args, grid, SIMPLIFY=FALSE),
check.names=FALSE)
metadata(ans)$out.attrs <- attr(grid, "out.attrs")
ans
})
### FIXME: tapply method still in IRanges
setMethod("by", "Vector",
function(data, INDICES, FUN, ..., simplify = TRUE)
{
if (!is.list(INDICES)) {
INDICES <- setNames(list(INDICES),
deparse(substitute(INDICES))[1L])
}
FUNx <- function(i) FUN(extractROWS(data, i), ...)
structure(tapply(seq_len(NROW(data)), INDICES, FUNx,
simplify = simplify),
call = match.call(), class = "by")
})
diff.Vector <- function(x, ...) diff(x, ...)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.