#### c ####
#' Combine unitted elements into a unitted vector
#'
#' This function takes one or more unitted elements and combines them into a
#' single unitted vector. To enforce units integrity, the function requires that
#' all elements passed to the S3 or S4 versions of c.unitted() must have the
#' same units, at least when recursive=FALSE. When recursive=TRUE, elements may
#' include lists or data.frames, but the base elements (vectors, columns, etc.)
#' of those list elements must still be unitted.
#'
#' The S4 method for c<<unitted>> will be found first (before other c() methods)
#' whenever the first argument is unitted.
#'
#' @name unitted_c
#' @family unitted object manipulation
#' @return A unitted vector
NULL
#' @rdname unitted_c
#' @inheritParams base::c
#' @param x the first argument passed to \code{c}
#' @export
setMethod(
"c", "unitted",
function(x, ..., recursive=FALSE) {
if(missing(x)) return(NULL)
listarg <- c(list(x),list(...))
# decide on the type of join to do
ctype <- if(any(sapply(listarg, function(elem) isTRUE(is(elem, "unitted_list"))))) { "unitted_list"
} else if(any(sapply(listarg, function(elem) isTRUE(is.list(elem))))) { "list or data.frame"
} else { "atomic" }
if(ctype == "atomic") {
# If all elements are atomic, (1) check to make sure they all have the
# same units, (2) combine them using the usual c(), and (3) assign the
# resulting atomic thing the single common set of units.
newunits <- unique(get_units(listarg, recursive=TRUE))
if(length(newunits) != 1) {
stop("every element must have the same units")
}
vlist <- lapply(listarg, deunitted)
unitted(do.call("c", vlist), newunits)
# If the elements include a list (including data.frame), (1) convert each
# element to a list using as.list, (2) join the lists, and (3) don't
# assign units; any useful units will be attached to the individual
# elements of the resulting list.
# If the elements include a data.frame, (1) convert the dataframe to a
# list, (2) convert any other elements to lists using as.list
# (push.units=TRUE), (3) combine into a single list, and (4) don't assign
# units; any useful units will be attached to the individual elements of
# the resulting list.
} else if(ctype == "list or data.frame") {
# push.units is ignored by most as.list calls and is TRUE by default for unitted vectors& lists
vlist <- lapply(listarg, function(elem) as.list(elem) )
do.call("c", vlist)
# If the elements include a unitted_list, then call as.list with
# push.units=TRUE and do the list join with the requirement that all lists
# have the same units. Return a single unitted_list.
} else if(ctype == "unitted_list") {
newunits <- unique(get_units(listarg, recursive=TRUE))
if(length(newunits) != 1) {
stop("every element must have the same high-level units")
}
vlist <- lapply(listarg, deunitted, partial=TRUE)
unitted(do.call("c", vlist), newunits)
}
}
)
#' If the first argument is not unitted but later arguments are, then the
#' primitive c method will still be called, and unitted objects will be coerced
#' to their S3 data parts. So if you want to maintain units through a call to
#' \code{c()} where the first argument is not unitted, then you should call
#' \code{c.unitted} directly.
#'
#' To avoid repeating code, the S3 function simply calls the S4 method.
#'
#' @rdname unitted_c
#' @export
c.unitted <- function(..., recursive=FALSE) {
getMethod("c","unitted")(..., recursive=recursive)
}
#### .unitted_bind ####
# Main implementation of rbind and cbind for unitted objects
#
# The most reliable way to call these is with rbind.unitted() and
# cbind.unitted(). Although rbind() and cbind() often work correctly for
# combinations of two or more unitted objects of the same class (e.g., two
# unitted_data.frames or two unitted_characters), the base rbind and cbind
# methods often fail to redirect to their unitted versions when there are type
# mismatches, even minor ones, among the objects to be bound.
#
# Known issues:
#
# cbind/rbind(unitteddataframe, dataframe) or cbind/rbind(dataframe,
# unitteddataframe) does not get routed to these unitted methods; your options
# in this case are to call c/rbind(unitteddataframe, u(dataframe)) or to call
# c/rbind.unitted(unitteddataframe, dataframe).
#
# The unitted cbind and rbind do not implement deparse.level=2 -- this would
# require too much duplication of the base rbind and cbind code for not enough
# reward.
.unitted_bind <- function(..., fun.name=c("cbind","rbind")) {
# Our basic strategy is to create local, deunitted copies of each element to be
# bound. It helps for these copies to have the same names as the originals (for
# deparse.level = 1 in particular); if the originals were given as expressions,
# however, we need to rename them to a local variable name so that we can store
# their deunitted values. The argument TAG names (as opposed to the variable
# names) should be kept as in the original call, again to permit rbind(...,
# deparse.level=1) to work its magic. Further, arguments that were orignally
# expressions should again be passed to rbind as expressions, except that these
# new expressions should evaluate to deunitted objects.
# Get a list of the arguments in ..., names and values
evaluated_args <- list(...)
# Figure out where the original rbind calling frame is relative to this one;
# it depends on whether we arrived here via regular rbind (calling_frame=-2)
# or by a direct call to rbind.unitted[_something] (calling_frame=-1). Assume
# the user didn't call .unitted_bind directly, because they shouldn't.
sys_calls <- as.character(sys.calls())
calling_frame <- ifelse(isTRUE(paste0(fun.name,"(deparse.level, ...)") %in% sys_calls), -2, -1)
# Find, clean, and check deparse.level. rbind and cbind don't pass on
# deparse.level to any specialized bind functions, including our unitted ones.
# So we have to look back through the call stack. This works if the
# deparse.level was passed to rbind or cbind as a value. But if it was passed
# as a symbol, we're out of luck and need to give a warning and make an
# assumption.
deparse.level <- sys.call(calling_frame)[["deparse.level"]]
if(is.null(deparse.level)) {
# deparse.level=1 is the default to rbind/cbind, though it's hidden from us
# by their funny function dispatch
deparse.level <- 1L
} else if(is.symbol(deparse.level)) {
warning("deparse.level passed to unitted ",fun.name," as an expression cannot be read; assuming deparse.level=1")
deparse.level <- 1L
} else {
deparse.level <- as.integer(deparse.level)
}
stopifnot(0 <= deparse.level, deparse.level <= 2)
if(deparse.level == 2) {
warning("deparse.level=2 has no unitted implementation; defaulting to deparse.level=1")
deparse.level <- 1L
}
# Inspect the arguments passed to the rbind/cbind call in the calling_frame.
# Often, args_to_bind will contain the original expressions used to define the
# arguments, in which case we can pass these on to rbind or cbind, where they
# may be used to name the rows or columns (depending on deparse.level).
# However, users could possibly call rbind() from within another function,
# passing along arguments using "...". In this case, I believe we can't do
# better than deparse.level=0, because I don't know how to reliably identify
# the expressions passed into the first "...".
if(deparse.level > 0L) {
args_to_bind <- as.list(sys.call(calling_frame)[-1L])
# We're in trouble (re: deparsing) if and only if there's a '...' in args_to_bind
if(any(sapply(args_to_bind,
function(arg) {
if(is.symbol(arg)) arg == as.symbol("...") else FALSE
}))) {
warning("arguments could not be deparsed; forcing deparse.level to 0")
deparse.level <- 0L
}
}
# We should have reduced the deparse.level possibilities to 1L or 0L by now
stopifnot(deparse.level %in% c(0L, 1L))
# Come up with a list of the elements (in one form or another, depending on
# deparse.level) to be bound together. Consider any argument not named "deparse.level".
if(deparse.level == 0L) {
# (We're checking deparse.level again because it might have changed just above.)
args_to_bind <- evaluated_args
}
if(!is.null(names(args_to_bind))) {
args_to_bind <- args_to_bind[which(names(args_to_bind) != "deparse.level")]
}
# Return NULL if it's just that easy
if(length(args_to_bind) == 0) return(NULL)
# Now assign each argument a variable name in which the deunitted copy will be
# stored, and a symbol or expression with which to call it in our upcoming
# call to non-unitted rbind/cbind.
arg_names <- character(length(args_to_bind))
if(deparse.level == 0L) {
# Create a placeholder symbol in which to store the deunitted copy of each arg
for(i in seq_along(args_to_bind)) {
arg_names[i] <- paste0("XX",i,"XX")
args_to_bind[[i]] <- substitute((a),list(a=as.symbol(arg_names[i]))) # wrap the name in () so it's called as an expression
}
} else {
# Replace any non-symbols (i.e., expressions) with placeholder symbols, like we did for deparse.level==0 just above
for(i in seq_along(args_to_bind)) {
arg <- args_to_bind[[i]]
if(is.symbol(arg)) {
arg_names[i] <- as.character(arg)
# Check to make sure we won't be overwriting another argument; this
# check is a touch conservative, but conflicts should still be rare
if(grepl("XX(.*)XX", arg_names[i])) {
stop("sorry - symbols passed to unitted ",fun.name," can't have the pattern XX*XX")
}
# args_to_bind[[i]] <- arg # already done
} else {
arg_names[i] <- paste0("XX",i,"XX") # this could be a problem if one of the args is already XX##
args_to_bind[[i]] <- substitute((a),list(a=as.symbol(arg_names[i]))) # wrap the name in () so it's called as an expression
}
}
}
# Evaluate each argument, collect its units, and put a deunitted version in
# an empty environment called v_frame
newunits <- list()
v_frame <- new.env()
for(i in seq_along(args_to_bind)) {
newunits[[i]] <- get_unitbundles(evaluated_args[[i]])
assign(arg_names[i], deunitted(evaluated_args[[i]]), pos=v_frame)
}
if(fun.name=="cbind" & any(sapply(evaluated_args, is.data.frame))) {
newunits <- unlist(newunits, recursive=FALSE)
} else {
# In most cases - rbind, or cbind for non-data.frames - if there's more than
# one unit, we shouldn't be binding.
newunits <- unique(newunits)
if(length(newunits) > 1) {
# Compare the remaining (unique) newunits to see if they're equivalent
# according to ==.unitbundle. In particular, unitbundle("") == NA
error_message <- "Every element must have the same units"
for(nu1 in seq_len(length(newunits)-1)) {
for(nu2 in (nu1+1):length(newunits)) {
if(is.list(newunits[[nu1]])) { # usually data.frames
if(length(newunits[[nu1]]) != length(newunits[[nu2]])) {
stop(error_message)
} else {
for(nu_elem in seq_along(newunits[[nu1]])) {
if(newunits[[nu1]][[nu_elem]] != newunits[[nu2]][[nu_elem]]) {
stop(error_message)
}
}
}
} else {
if(newunits[[nu1]] != newunits[[nu2]]) {
stop(error_message)
}
}
}
}
}
newunits <- newunits[[1]]
}
# Add deparse.level to the list of arguments to be send back to rbind/cbind
assign("deparse.level", deparse.level, pos=v_frame)
args_to_bind <- c(args_to_bind, list(deparse.level=substitute(deparse.level)))
# Evaluate rbind or cbind in the context of the deunitted copies in v_frame
bound <- do.call(fun.name, args_to_bind, envir=v_frame)
unitted(bound, newunits)
}
#### rbind ####
#' Bind unitted objects by row or column
#'
#' Combines unitted objects as if they were not unitted, but enforces unit
#' consistency across the objects to be combined.
#'
#' \code{deparse.level} is an integer controlling the construction of labels, as in
#' the default rbind and cbind methods. Only deparse.level = 0 and 1 are
#' available for unitted rbind/cbind calls.
#'
#' @name unitted_bind
#' @aliases unitted_rbind rbind bind
#' @rdname unitted_bind
#' @export
#' @family unitted object manipulation
#'
#' @param ... unitted vectors, matrices, or data.frames
rbind.unitted <- function(...) {
.unitted_bind(..., fun.name="rbind")
}
# The specific unitted_xxx functions get discovered by rbind(), while the
# generic rbind.unitted never does. So we need an rbind function for every
# unitted subclass. The rbind.unitted function defined above is also useful,
# however, for when someone wants to explicitly specify a unitted type of rbind.
for(subclass in names(getClass("unitted")@subclasses)) {
assign(paste0("rbind.",subclass), function(...) {
.unitted_bind(..., fun.name="rbind")
})
}
#### cbind ####
#' @aliases unitted_cbind cbind
#' @rdname unitted_bind
#' @export
cbind.unitted <- function(...) {
.unitted_bind(..., fun.name="cbind")
}
# Same logic as for rbind methods.
for(subclass in names(getClass("unitted")@subclasses)) {
assign(paste0("cbind.",subclass), function(...) {
.unitted_bind(..., fun.name="cbind")
})
}
#### merge ####
setGeneric("merge")
#' Merge unitted data.frames by one or more common columns
#'
#' Merges unitted data.frames, ensuring units compatibility among the common
#' columns
#'
#' @name unitted_merge
#'
#' @seealso \code{base::\link{merge}}
#' @family unitted object manipulation
NULL
#' @rdname unitted_merge
#' @param x first unitted object to merge
#' @param y second unitted object to merge
#' @param ... other arguments passed to \code{c}
#' @importFrom utils capture.output
#' @export
setMethod(
"merge",
c(x="unitted_data.frame", y="unitted_data.frame"),
function(x, y, ...) {
# Do the basic merge without units; any errors will be thrown now before we get into units checks
merged_xy <- merge(v(x), v(y), ...)
# Now try to merge units
xu <- do.call(data.frame, c(as.list(get_units(x)), list(stringsAsFactors=FALSE)))
yu <- do.call(data.frame, c(as.list(get_units(y)), list(stringsAsFactors=FALSE)))
dots <- list(...)
if(length(dots) > 0) {
dots <- dots[which(!(names(dots) %in% c("all","all.x","all.y")))]
}
merged_units <- do.call(merge, c(list(xu, yu, all=T),dots))
# Check on merged units
if(nrow(merged_units) != 1) {
message("Attempting to merge with these conflicting units:")
message(paste(capture.output(print(merged_units)),collapse="\n"))
stop("Units conflict in merge")
}
if(!all(names(merged_units) == names(merged_xy))) {
message("Names of merged data.frame:")
message(paste(names(merged_xy),collapse=" "))
message("Names of merged units:")
message(paste(names(merged_units),collapse=" "))
stop("Couldn't reconcile names of merged data.frames and merged units")
}
# If the units look good, send them out with the merged data
unitted(merged_xy, unlist(merged_units[1,]))
}
)
#' @rdname unitted_merge
#' @export
setMethod(
"merge",
c(x="unitted", y="ANY"),
function(x, y, ...) {
stop("merge for unitted, ANY not yet implemented")
}
)
#' @rdname unitted_merge
#' @export
setMethod(
"merge",
c(x="ANY", y="unitted"),
function(x, y, ...) {
stop("merge for ANY, unitted not yet implemented")
}
)
#### rep ####
#' Replicate elements, maintaining the original units
#'
#' Wrapper for non-unitted rep() methods.
#'
#' @name unitted_rep
#' @rdname unitted_rep
#' @param x object to replicate
#' @param ... other objects passed to \code{rep}
#' @export
#' @family unitted object manipulation
rep.unitted <- function(x, ...) {
unitted(rep(deunitted(x), ...), get_unitbundles(x))
}
#' @rdname unitted_rep
#' @export
rep.unitted_data.frame <- function(x, ...) {
rep(deunitted(x, partial=TRUE), ...)
# less efficient, I think:
# mapply(
# function(elem, units) unitted(elem, units),
# rep(deunitted(x), ...),
# rep(get_unitbundles(x), ...),
# SIMPLIFY=FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.