#' @include 01-parse.R 02-unitbundle.R
NULL
## Unitted object construction
#### Superclass Definition ####
#' A fusion of data and units
#'
#' A unitted object is a data object - a vector, data.frame, array, etc. - and
#' its associated units. These units will be propagated through addition,
#' multiplication, and many other common operations on data. Unitted objects are
#' created by calling \code{\link{unitted}()} or \code{\link{u}()}, and the data
#' can be retrieved by calling \code{\link{deunitted}()} or \code{\link{v}()}.
#' Units can be extracted as character strings with \code{\link{get_units}()}.
#' Most of the time, you'll find that unitted objects can do everything that
#' their un-unitted data could do, but with smart units to make your analyses
#' more readable and less vulnerable to typos or omissions.
#'
#' Every unitted object contains a .Data 'slot', though this is not formally
#' recognized as such in the documentation.
#'
#' @name unitted-class
#' @rdname unitted-class
#' @exportClass unitted
#'
#' @slot units The units. Depending on the .Data type, the units slot may
#' contain a single units bundle (e.g., "kg ha^-1"), a matrix or array of
#' bundles, or a list of bundles. Any of these may be retrieved with
#' \code{\link{get_units}}.
#'
#' @seealso \code{\link{unitted}} for class and subclass construction,
#' \code{\link{deunitted}} for recovering the data from the unitted object,
#' and \code{\link{unitted_Ops}} for smart units behaviors.
#'
#' @examples
#' # unitted object creation
#' miles_vec <- unitted(1:5,"mi")
#' hours_vec <- unitted(7,"hr")
#'
#' # unitted object manipulation
#' miles_vec / hours_vec
#' miles_vec^2
#' hours_vec + hours_vec
#' hours_vec == hours_vec
setClass("unitted", slots=c(units="ANY"))
#### Generic Constructor ####
#' Attach units to data
#'
#' A \code{unitted} object is the fusion of data and units. The \code{unitted()}
#' function, or its alias \code{u()}, constructs an object inheriting from the
#' \code{\linkS4class{unitted}} class.
#'
#' The short constructor name (\code{u}) makes it easy to write clean code that
#' enforces units consistency throughout your data analysis. The underlying
#' constructor function is called \code{unitted()} but aliased to \code{u()}.
#' Similarly, units can be removed from data using the \code{\link{deunitted}()}
#' function or its alias \code{\link{v}()}.
#'
#' @name u
#' @rdname unitted
#' @export u
#'
#' @param object A vector, data.frame, array, matrix, list, or S4 object
#' containing data
#' @param units A specification of the units to attach to \code{object}. The
#' class and form of \code{units} depends on the class of \code{object}; see
#' Details
#' @param ... Further arguments passed to unitbundle() for each unitted vector
#' or element created
u <- function(object, units=NA, ...) {
unitted(object, units, ...)
}
#' @name unitted
#' @rdname unitted
#' @exportMethod unitted
setGeneric(
"unitted",
function(object, units=NA, ...) {
standardGeneric("unitted")
}
)
#### Subclass Definitions ####
# These definitions must occur after the generic unitted constructor is defined.
#' Define a custom subclass of \code{unitted}
#'
#' There is already a unitted class defined for each of the basic classes
#' (numeric, logical, character, ...), many of the most common S3 data
#' structures (factor, Date, POSIXct), and both lists and data.frames. And for
#' the most part, only the numeric types will make sense as unitted types.
#' However, there may still be cases where you need yet another class for yet
#' another data type. \code{new_unitted_class} helps you define such a class.
#'
#' Your new class will have to be S4; I have found no way to circumvent this
#' requirement. However, a close S4 counterpart can be constructed for nearly
#' any S3 class. See the examples.
#'
#' Your new class should also have a .Data part to it, because only those
#' computations that apply to the .Data part will affect and be affected by the
#' units. The default behavior of unitted subclasses expects a single unitbundle
#' in the units slot, which means a single set of units per object. Data.frames
#' and lists are the notable exceptions.
#'
#' It is also important to distinguish between S3/S4 classes and S3/S4 methods.
#' Even though you must use an S4 class, you may use any combination of S3 and
#' S4 methods; all of these will be correctly dispatched on objects inheriting
#' from the unitted class, providing that you define your classes correctly and
#' that the dispatch behavior you want is within the abilities of the particular
#' dispatch system (S3 or s4).
#'
#' @param superclass.name The character name of the S4 class that will form the
#' core of the new unitted class. The new class will have the name
#' "unitted_[superclass.name]".
#' @param overwrite Logical. If TRUE, the new setClass call will be executed
#' even if the same superclass.name has already been defined during this
#' session.
#' @examples
#' # only works during package build, at least at the moment
new_unitted_class <- function(superclass.name, overwrite=FALSE) {
# manage the superclass class definition
if(superclass.name %in% c('tbl_df')) setOldClass(superclass.name) # special case
class_def <- getClassDef(superclass.name)
if(is.null(class_def)) {
stop(paste0(superclass.name," must be registered with setOldClass or defined with setClass before calling new_unitted_class()"))
}
# check whether the proposed class already exists
new_name <- paste0("unitted_",superclass.name)
if(new_name %in% names(getClass("unitted")@subclasses)) {
if(overwrite)
warning(paste(new_name,"was already a registered unitted class; calling setClass again anyway"))
else
stop(paste(new_name,"is already a registered unitted class; set overwrite=TRUE to proceed anyway"))
}
# create the class (special case: ordered needs extra info)
if(superclass.name == 'ordered') {
setClass(new_name, contains=c("unitted", superclass.name), slots=list(.Data='integer', levels='character', units='unitbundle'),
validity=function(object) TRUE)
} else {
setClass(new_name, contains=c("unitted", superclass.name))
}
# create the constructor named unitted by specific to this class
setMethod("unitted", superclass.name, function(object, units) {
new(new_name, object, units=unitbundle(units))
})
# return the new class name
new_name
}
# Class and constructor definitions for common unitted classes
#
# Don't be fooled! This unassuming code block is the heart of unitted class
# definitions.
sapply(c("character","complex","logical","numeric","raw","NULL",
"factor","Date","POSIXct","POSIXlt",
"list","tbl_df", #"data.frame",
"array","matrix","ts",
"expression","name","function"),
new_unitted_class)
# setOldClass("difftime")
# new_unitted_class("difftime")
#' A data.frame with units attached to each column
#'
#' Data frame columns can be extracted with units and cells can be modified with
#' units checking.
#'
#' @name unitted_data.frame-class
#' @rdname unitted_data.frame-class
#' @exportClass unitted_data.frame
setClass("unitted_data.frame", contains=c("unitted", "data.frame"))
#### Specific Constructors ####
#' @details \subsection{Data.frames}{
#'
#' In a call to \code{unitted(object, units, ...)} where \code{object} is a
#' non-unitted data.frame, \code{object} may contain columns that are
#' non-unitted, unitted, or a mix.
#'
#' \code{units} may be one of three possibilities: (1) a character vector of
#' unit strings or NAs, one per column of the data.frame, (2) a list of the
#' same length as \code{ncol(object)} where each element is a units
#' specification of one of the forms permitted by \code{\link{unitbundle}()},
#' or (3) either NA or missing, indicating that units should be inferred from
#' current units of the data.frame columns.
#'
#' If \code{object} contains some unitted columns, the units of those columns
#' will be preserved if and only if the corresponding element of \code{units}
#' is NA. To overwrite a previously unitted column's units with empty units,
#' specify the new units for that column with "". Any non-unitted column with
#' a corresponding NA in \code{units} is given units of "".
#'
#' Known issue: Attaching units to a data.frame with \code{unitted()} creates
#' row names for the data.frame even if they were absent before. This is a
#' known issue with no known resolution. In other respects, unitted
#' data.frames behave very much like non-unitted data.frames.
#'
#' }
#'
#' @rdname unitted
setMethod(
"unitted", "data.frame",
function(object, units, ...) {
#print("u(data.frame)")
if(length(units) != ncol(object)) {
fail <- TRUE
if(length(units) == 1) {
if(!isS4(units[[1]])) {
if(isTRUE(is.na(units[[1]]))) {
units <- rep(NA, ncol(object))
fail <- FALSE
}
}
}
if(fail) stop("Number of units must equal number of data.frame columns")
}
# Overwrite units when current column units are absent and/or units[col] is not NA
for(col in 1:ncol(object)) {
object[,col] <- list(unitted(object[,col], units[[col]]))
}
# Known bug: The following line creates row names for data.frames even if
# they were absent before. This is a property of S4 data.frames that doesn't
# make a lot of sense to me, but that I also don't see how to skirt. So for
# now, I accept that unitted data.frames may be slightly different from
# plain S3 data.frames in this respect.
return(new("unitted_data.frame", object, units=NA))
}
)
#' tbl_dfs work very similarly to data.frames
#'
#' @rdname unitted
setMethod(
"unitted", "tbl_df",
function(object, units, ...) {
#print("u(tbl_df)")
if(length(units) != ncol(object)) {
fail <- TRUE
if(length(units) == 1) {
if(!isS4(units[[1]])) {
if(isTRUE(is.na(units[[1]]))) {
units <- rep(NA, ncol(object))
fail <- FALSE
}
}
}
if(fail) stop("Number of units must equal number of data.frame columns")
}
# Overwrite units when current column units are absent and/or units[col] is not NA
for(col in 1:ncol(object)) {
object[[col]] <- unitted(object[[col]], units[[col]])
}
# Known bug: The following line creates row names for data.frames even if
# they were absent before. This is a property of S4 data.frames that doesn't
# make a lot of sense to me, but that I also don't see how to skirt. So for
# now, I accept that unitted data.frames may be slightly different from
# plain S3 data.frames in this respect.
return(new("unitted_tbl_df", object, units=NA))
}
)
#' @details \subsection{Arrays and matrices}{
#'
#' Arrays and matrices are currently only permitted to have one unit apiece.
#' This may change in the future; if this is a feature you want implemented,
#' please create an issue on GitHub with information about how you would use
#' multiple units in an array or matrix if the option were available.
#'
#' }
#'
#' @rdname unitted
setMethod(
"unitted", "array",
function(object, units, ...) {
# This method is currently identical to the simple constructor defined above.
# This one serves as a placeholder for my intention to implement multi-unit
# arrays and matrices. I am thinking about efficient ways to accomplish this.
# The simplest is to store a parallel array that contains unitbundles rather
# than numeric values. A step up would be to use factors and/or hash tables to
# store the results of arithmetic operations between unique pairs of
# unitbundles during matrix or array arithmetic, reducing the total number of
# computations. It might also be possible to allow only some of the dimensions
# of a matrix or array to be unitted, implying that other dimensions have the
# same unit all along them. But that's an advanced feature for down the road.
#print("u(array)")
#http://stackoverflow.com/questions/11857658/assignment-of-s4-r-objects-to-a-matrix-why-does-this-work
new("unitted_array", object, units=unitbundle(units))
}
)
#' @details \subsection{Lists}{
#'
#' Even non-unitted lists may contain unitted elements (or a mixture of
#' unitted and non-unitted elements). However, there are advantages to
#' defining the list itself as unitted - specifically, operations such as
#' print(), show(), or arithmetic will treat the entire list as unitted,
#' displaying and applying units as appropriate to each element.
#'
#' A unitted list, unlike a list of unitted elements, may have exactly one
#' unitbundle for the entire list.
#'
#' }
#'
#' @rdname unitted
setMethod(
"unitted", "list",
function(object, units, ...) {
#print("u(list)")
#warning("The implementation of unitted lists is currently primitive - one unit bundle per list.")
new("unitted_list", object, units=unitbundle(units))
}
)
#' @details \subsection{Already-unitted objects}{
#'
#' The \code{unitted} or \code{u} function may be used to replicate an
#' already-unitted object or replace its units with new ones. When
#' \code{object} is unitted and the \code{units} argument is \code{NA}, the
#' constructor returns the data untouched. If the \code{units} argument is
#' anything else, the call to \code{unitted()} replaces the units of
#' \code{object} with the new units.
#'
#' }
#'
#' @rdname unitted
setMethod(
"unitted", "unitted",
function(object, units, ...) {
#print("u(unitted)")
if(isTRUE(isS4(units))) {
callGeneric(deunitted(object), units, ...)
} else if(!isTRUE(is.na(units))) {
callGeneric(deunitted(object), units, ...)
} else {
return(object)
}
}
)
#### as.data.frame ####
#' Construct a unitted element of a data.frame
#'
#' \code{data.frames} are constructed by applying \code{as.data.frame()} to each
#' element. Thus, \code{as.data.frame.unitted()} is called whenever an argument
#' to \code{data.frame()} is unitted. \code{as.data.frame.unitted()} can handle
#' inputs of any subclass of \code{unitted}. Data.frame elements constructed
#' with as.data.frame.unitted continue to store their units as members of the
#' complete data.frame, although those units may not be visible until/unless the
#' data.frame is itself made unitted by a call to \code{u(mydf)}.
#'
#' @param x A unitted object
#' @param ... Other arguments passed to \code{as.data.frame()}
#' @return A unitted data.frame element
#' @export
as.data.frame.unitted <- function(x, ...) {
.unitted_as.data.frame(object=x, ...)
}
setGeneric(".unitted_as.data.frame", function(object, ...) {
standardGeneric(".unitted_as.data.frame")
})
setMethod(
".unitted_as.data.frame", "ANY",
function(object, ...) {
# Vectors (including lists unless overridden) are the simple and typical
# case. Remove the units, route to a new as.data.frame call, and then add
# units back to the resulting column of the 1-column data.frame (but not to
# the data.frame itself)
df <- do.call("as.data.frame",list(list(deunitted(object)), ...))
df[[1]] <- unitted(df[[1]], get_unitbundles(object))
names(df) <- NULL
return(df)
}
)
setMethod(
".unitted_as.data.frame", "array",
function(object, ...) {
df <- do.call("as.data.frame",list(list(deunitted(object)), ...))
for(col in 1:dim(df)[2]) {
df[[col]] <- unitted(df[[col]], get_unitbundles(object))
}
names(df) <- NULL
return(df)
}
)
setMethod(
".unitted_as.data.frame", "data.frame",
function(object, ...) {
return(object)
}
)
#' Override the default S3 for as.data.frame.tbl_df
#'
#' Exporting this function seems to make it available for dispatch but still
#' hidden. From here we could route back to the S4 dispatch system with
#' ".unitted_as.data.frame(object=x, ...)", but that seems like a waste of
#' processing time since we come here first.
#'
#' @param x The unitted_tbl_df to convert to a data.frame
#' @param ... Additional arguments passed to as.data.frame
#' @examples
#' x <- tibble::as_tibble(u(data.frame(x=u(1:3,"k"), y=u(3:5, "g"))))
#' as.data.frame(x)
#' @export
as.data.frame.unitted_tbl_df <- function(x, ...) {
return(u(as.data.frame(v(x), ...), get_unitbundles(x)))
}
#### Deconstructors ####
#' Extract data from a unitted object
#'
#' Unitted objects consist of data, units, and unitted class information. The
#' function \code{deunitted()} and its alias \code{v()} remove the units and
#' class information or, equivalently, extract the data from the unitted object.
#'
#' @name v
#' @rdname deunitted
#' @export v
#' @seealso \code{\link{u}()} and \code{\link{unitted}()} for the construction
#' of unitted objects; \code{\linkS4class{unitted}} for the definition of the
#' unitted class
#'
#' @param object A unitted object
#' @param ... Other arguments passed to \code{deunitted}
#' @return A non-unitted data object
v <- function(object, ...) {
deunitted(object, ...)
}
#' @name deunitted
#' @rdname deunitted
#' @exportMethod deunitted
setGeneric("deunitted", function(object, ...) {
standardGeneric("deunitted")
})
#' @rdname deunitted
#'
#' @details In general, \code{deunitted()} and \code{v()} simply extract the
#' data part of the unitted object.
setMethod(
"deunitted", "unitted",
function(object, ...) {
# And what if the object's class is unitted extending another S4 class?
return(S3Part(object, strictS3=TRUE))
}
)
#' @rdname deunitted
setMethod(
"deunitted", "unitted_factor",
function(object, ...) {
if('ordered' %in% object@.S3Class) {
object@.S3Class <- object@.S3Class[-which(object@.S3Class == 'ordered')]
fac <- S3Part(object, strictS3=TRUE)
ordered(fac)
} else {
return(S3Part(object, strictS3=TRUE))
}
}
)
#' @rdname deunitted
setMethod(
"deunitted", "unitted_NULL",
function(object, ...) {
return(NULL)
}
)
#' @rdname deunitted
#'
#' @details With unitted data.frames and lists, you have a choice between
#' partial and complete deunitting (specified by the \code{partial} argument).
#' Complete deunitting removes units and unitted class attributes both from
#' the container (the data.frame or list) and its elements (the columns or
#' list elements). Partial deunitting only removes the container units and
#' unitted class attributes.
#'
#' @param partial logical. Should the data.frame or list be fully deunitted, such that
#' no elements will be left with units, or partially deunitted, such that the
#' object that is returned is not itself unitted but may have unitted
#' elements?
#' @examples
#' x <- u(data.frame(x = u(1:500,"A"), y = u(runif(500),"B"), z = u(500:1,"C")))
#' v(x)
setMethod(
"deunitted", "unitted_data.frame",
function(object, partial=FALSE, ...) {
if(!partial) {
object@.Data <- lapply(object@.Data, function(col) { deunitted(col) })
}
return(S3Part(object, strictS3=TRUE))
}
)
#' @rdname deunitted
#' @import tibble
#' @examples
#' x <- tibble::as_tibble(u(data.frame(x = u(1:500,"A"), y = u(runif(500),"B"), z = u(500:1,"C"))))
#' str(v(x))
#' str(v(x, partial=TRUE))
setMethod(
"deunitted", "unitted_tbl_df",
function(object, partial=FALSE, ...) {
if(!partial) {
return(as_tibble(lapply(S3Part(object, strictS3=TRUE), function(col) { deunitted(col) })))
} else {
return(S3Part(object, strictS3=TRUE))
}
}
)
#' @rdname deunitted
setMethod(
"deunitted", "unitted_list",
function(object, partial=FALSE, ...) {
if(!partial) {
object@.Data <- lapply(object@.Data, function(col) { deunitted(col) })
}
return(S3Part(object, strictS3=TRUE))
}
)
#' @rdname deunitted
#'
#' @details Non-unitted data.frames, tbl_dfs, and lists may also be deunitted:
#' This operation always removes units from the data.frame columns or list
#' elements.
setMethod(
"deunitted", "data.frame",
function(object, ...) {
as.data.frame(lapply(object, function(col) { deunitted(col) }))
}
)
#' @import tibble
#' @rdname deunitted
setMethod(
"deunitted", "tbl_df",
function(object, ...) {
as_tibble(lapply(object, function(col) { deunitted(col) }))
}
)
#' @rdname deunitted
setMethod(
"deunitted", "list",
function(object, ...) {
lapply(object, function(elem) { deunitted(elem) })
}
)
#' @rdname deunitted
#'
#' @details If \code{object} is neither a list nor a data.frame and is not
#' unitted, it will be returned intact.
setMethod(
"deunitted", "ANY",
function(object, ...) {
object
}
)
#### Helper functions ####
# ' The internal, reasonably efficient, unsafe (little error checking) method for
# ' setting an object's units
# '
# ' \code{.set_units()} of a non-unitted class creates a new unitted object with
# ' the specified units.
# '
# ' \code{.set_units()} of a unitted vector, array, or matrix class accepts a
# ' single unitbundle to associate with that object. NA is NOT an acceptable
# ' value of new.units.
# '
# ' \code{.set_units()} of a unitted data.frame accepts a list of unitbundles (or
# ' NAs), one per data.frame column in \code{columns}. NA is NOT an acceptable
# ' value of new.units. The length of new.units is not checked.
# '
# ' @param object The object whose units should be changes
# ' @param new.units The new unit or units in their internal representation
# ' (currently a unitbundle)
# ' @param columns (data.frames only) The numeric column numbers to which
# ' new.units should be applied.
setGeneric(
".set_units",
function(object, new.units, ...) {
unitted(object, new.units)
}
)
setMethod(
".set_units", "unitted",
function(object, new.units) {
object@units <- new.units
object
}
)
setMethod(
".set_units", "unitted_data.frame",
function(object, new.units, columns) {
for(col in columns) {
if(!is.na(new.units[[col]])) {
object@.Data[,col] <- .set_units(object@.Data[,col], new.units[[col]])
}
}
object
}
)
#' The internal, reasonably efficient method for acquiring an object's units in
#' the internal representation
#'
#' get_unitbundles() of a non-unitted class returns NA.
#'
#' get_unitbundles() of a unitted vector, array, or matrix class returns the single
#' unitbundle associated with that object.
#'
#' get_unitbundles() of a data.frame or unitted_data.frame returns a named list of
#' unitbundles (or NAs), one per data.frame column.
#'
#' @param object The object whose units should be returned
#' @param ... other arguments passed to class-specific methods
#' @return A unitbundle or list of unitbundles, each representing one set of
#' units
setGeneric(
"get_unitbundles",
function(object, ...) {
NA
}
)
setMethod(
"get_unitbundles", "unitted",
function(object, ...) {
object@units
}
)
setMethod(
"get_unitbundles", "data.frame",
function(object, recursive=TRUE, ...) {
if(recursive) {
lapply(object, function(col) { get_unitbundles(col) })
} else {
NA
}
}
)
#' @importFrom stats setNames
setMethod(
"get_unitbundles", "unitted_data.frame",
function(object, recursive=TRUE, ...) {
if(recursive) {
setNames(unlist(lapply(object@.Data, get_unitbundles)), object@names)
} else {
NA
}
}
)
#' @importFrom stats setNames
setMethod(
"get_unitbundles", "unitted_tbl_df",
function(object, recursive=TRUE, ...) {
if(recursive) {
setNames(unlist(lapply(S3Part(object, strictS3=TRUE), get_unitbundles)), object@names)
} else {
NA
}
}
)
setMethod(
"get_unitbundles", "list",
function(object, recursive=TRUE, ...) {
if(recursive) {
lapply(object, function(col) { get_unitbundles(col) })
} else {
NA
}
}
)
#' @importFrom stats setNames
setMethod(
"get_unitbundles", "unitted_list",
function(object, recursive=FALSE, ...) {
if(recursive) {
# '@names' is not technically a slot for S4 lists, but I think this is an
# efficient way to access names anyway.
setNames(unlist(lapply(object@.Data, get_unitbundles)), object@names)
} else {
object@units
}
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.