#------------------------------------------------------------------------------#
# Defines the following
#
# - useful class unions
# - getDescriptors: a method that gets descriptors
# - .describe: the internal method for applying a descriptor
# - describe: applies a descriptor to a variable
globalVariables(c("view", "set"))
# Classes used internally to define methods for signatures with possibly missing
# arguments.
#' Stype classes
#'
#' S4 classes uses in defining methods.
#' @name stype_classes
#' @keywords internal
NULL
#' @rdname stype_classes
#' @export
setClassUnion("groupVar", c("character", "factor"))
#' @rdname stype_classes
#' @export
setClassUnion("stypeWeightVar", c("v_continuous", "v_continuous_nonneg",
"v_proportion"))
#' @rdname stype_classes
#' @export
setClassUnion("weightVar", c("numeric"))
#' @rdname stype_classes
#' @export
setClassUnion("categorical", c("factor", "ordered"))
#' @rdname stype_classes
#' @export
setClassUnion("Missing", c("missing", "NULL"))
#' @rdname stype_classes
#' @export
setClassUnion("maybeGroup", c("Missing", "groupVar"))
#' @rdname stype_classes
#' @export
setClassUnion("maybeWeight", c("Missing", "weightVar"))
#' @rdname stype_classes
#' @export
setClassUnion("maybeDescriptor", c("missing", "NULL", "list"))
#' @rdname stype_classes
#' @export
setClassUnion("describable", c("integer", "logical", "numeric", "factor",
"ordered", "character", "v_rcensored"))
#' @rdname stype_classes
#' @export
setClassUnion("simpleStype", c("v_count", "v_binary", "v_continuous",
"v_continuous_nonneg",
"v_proportion", "v_nominal", "v_ordered"))
#' @rdname stype_classes
#' @export
setClassUnion("complexStype", c("v_rcensored"))
#' @rdname stype_classes
#' @export
setClassUnion("v_stype", c("v_count", "v_binary", "v_continuous",
"v_continuous_nonneg",
"v_nominal", "v_ordered", "v_proportion",
"v_rcensored"))
#' @rdname stype_classes
#' @export
setClassUnion("tbl_stype", c("tbl_analysis"))
#' @rdname stype_classes
#' @export
setClassUnion("stype", c("v_count", "v_binary", "v_continuous",
"v_continuous_nonneg",
"v_nominal", "v_ordered", "v_proportion",
"v_rcensored", "tbl_analysis"))
#' Descriptors
#' @name descriptors
#' @description
#' Lists of functions to used to \code{\link{describe}} a \code{describeable}
#' vector to create a \code{\link{data_summary}} object. A \code{descriptor} is
#' a function of \code{x} (and optionally \code{g} (a grouping vector and/or
#' \code{w} a numeric weight vector)).
#'
#' \code{describable} vectors include: integer, logical, numeric, factor,
#' ordered, and v_rcensored.
#'
#' @importFrom stats IQR median sd quantile var cov weighted.mean
#' @keywords internal
NULL
#' @rdname descriptors
standardDescriptors <- list(
n = function(x, ...) length(x),
has_missing = function(x, ...) anyNA(x),
n_nonmissing = function(x, ...) sum(!is.na(x)),
n_missing = function(x, ...) sum(is.na(x)),
proportion_missing = function(x, ...) mean(is.na(x)),
is_constant = function(x, ...) {
if (anyNA(x)) {
# If any values are NA, return TRUE if all are NA
sum(is.na(x)) == length(x)
} else {
# Otherwise, check that value is constant
all(x[1] == x)
}
}
)
#' @rdname descriptors
logicalDescriptors <- list(
num_0 = function(x, ...) sum(!x, na.rm = TRUE),
num_1 = function(x, ...) sum(x, na.rm = TRUE),
proportion = function(x, ...) mean(x, na.rm = TRUE),
variance = function(x, ...) var(x, na.rm = TRUE)
)
#' @rdname descriptors
wlogicalDescriptors <- list(
weighted_proportion = function(x, w, ...) {
stats::weighted.mean(x = x, w = w, ...)
}
)
#' @rdname descriptors
categoricalDescriptors <- list(
table = function(x, ...) table(x, useNA = "always"),
ptable = function(x, ...) prop.table(table(x, useNA = "always")),
ptableNoNA = function(x, ...) prop.table(table(x, useNA = "no")),
levels = function(x, ...) levels(x)
)
#' @rdname descriptors
characterDescriptors <- list(
n_unique = function(x, ...) length(unique(x)),
max_char = function(x, ...) if(length(x) == 0) 0 else max(nchar(x)),
min_char = function(x, ...) if(length(x) == 0) 0 else min(nchar(x))
)
#' @rdname descriptors
numericDescriptors <- list(
sum = function(x, ...) sum(x, na.rm = TRUE),
mean = function(x, ...) mean(x, na.rm = TRUE),
variance = function(x, ...) var(x, na.rm = TRUE),
median = function(x, ...) median(x, na.rm = TRUE),
iqr = function(x, ...) IQR(x, na.rm = TRUE),
min = function(x, ...) {
if (length(x) == 0 || all(is.na(x))) NA_real_ else min(x, na.rm = TRUE)
},
max = function(x, ...) {
if (length(x) == 0 || all(is.na(x))) NA_real_ else max(x, na.rm = TRUE)
},
qtiles = function(x, ...) {
quantile(
x,
probs = c(0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99),
na.rm = TRUE)
}
)
#' @rdname descriptors
wnumericDescriptors <- list(
weighted_mean = function(x, w, ...){ stats::weighted.mean(x = x, w = w, ...) }
)
#' @rdname descriptors
groupedDescriptors <- list(
smd = function(x, g, w = NULL, ...) smd::smd(x, g, w, na.rm = TRUE)
)
#' @rdname descriptors
rcensoredDescriptors <- list(
time_info = function(x, ...) get_persontime(x),
outcome_info = function(x, ...) get_outcomeinfo(x),
censor_info = function(x, ...) get_censorinfo(x),
eair = function(x, ...) eair(x)
)
#' @rdname descriptors
analysisDescriptors <- list(
n_obs = function(x, ...) nrow(x),
n_col = function(x, ...) ncol(x),
n_stypes = function(x, ...) { sum(purrr::map_lgl(x, is_stype) ) },
n_tagged = function(x, ...) { sum(purrr::map_lgl(x, is_tagged) ) }
)
#' Get person-time info for a data_summary
#' @param x a \code{\link{v_rcensored}} vector
#' @keywords internal
get_persontime <- function(x, ...){
get_from_field("time", "sum", "person_time")(x)
}
#' Get outcome info for a data_summary
#' @param x a \code{\link{v_rcensored}} vector
#' @keywords internal
get_outcomeinfo <- function(x, ...){
vctrs::vec_c(
get_from_field("outcome", "num_1", "n_events")(x),
get_from_field("outcome_reason", "table",
.after = function(z) list(outcome_reasons = z[[1]]) )(x)
)
}
#' Get censoring info for a data_summary
#' @param x a \code{\link{v_rcensored}} vector
#' @keywords internal
get_censorinfo <- function(x, ...){
vctrs::vec_c(
get_from_field("censored", "num_1", "n_censored")(x),
get_from_field("censor_reason", "table",
.after = function(z) { list(censor_reasons = z[[1]]) } )(x)
)
}
#' Exposure-Adjusted Incidence Rate
#'
#' Computes the exposure-adjusted incidence rate and variance using He et al.
#' (2015).
#'
#' @param x a \code{\link{v_rcensored}} vector
#' @references
#' He X, Chen L, Lei L, Xia HA, Lee MLT (2015) A Simple Method for
#' Estimating Confidence Intervals for Exposure Adjusted Incidence Rate and
#' Its Applications to Clinical Trials. J Biom Biostat 6: 238.
#' doi:10.4172/2155-6180.1000238
#' @return a \code{list} containing:
#' \itemize{
#' \item eair the point estimate
#' \item eair_variance the variance estimate
#' }
#' @export
eair <- function(x){
outc <- vctrs::field(x, "outcome")
time <- vctrs::field(x, "time")
nevents <- get_data_summary(outc, "num_1")
pt <- get_data_summary(time, "sum")
# proportion of subjects that have event prior to censor or end of followup
a_hat <- get_data_summary(outc, "proportion")
a_sigma <- get_data_summary(outc, "variance")
# mean followup time
b_hat <- get_data_summary(time, "mean")
b_sigma <- get_data_summary(time, "variance")
cov_ab <- cov(outc, time)
sigma <- matrix(c(a_sigma, cov_ab, cov_ab, b_sigma), ncol = 2, byrow = TRUE)
L <- c(1 , -a_hat/b_hat)
list(
eair = nevents/pt,
eair_variance = drop((1/(b_hat^2)) * t(L) %*% sigma %*% L)
)
}
#' getDescriptors
#'
#' Returns a list of functions to be applied to a variable.
#'
#' @param x a vector of data
#' @param g a vector of groupings
#' @param w a vector of weights
# @export
#' @keywords internal
setGeneric("getDescriptors", function(x, g, w) standardGeneric("getDescriptors"))
#' @rdname getDescriptors
#' @export
setMethod(
f = "getDescriptors",
signature = c("logical", "maybeGroup", "maybeWeight"),
definition = function(x, g, w){
vctrs::vec_c(
standardDescriptors,
logicalDescriptors,
`if`(is(g, "groupVar"), groupedDescriptors, NULL),
`if`(is(w, "weightVar"), wlogicalDescriptors, NULL)
)
}
)
#' @rdname getDescriptors
#' @export
setMethod(
f = "getDescriptors",
signature = c("categorical", "maybeGroup", "maybeWeight"),
definition = function(x, g, w){
vctrs::vec_c(
standardDescriptors,
categoricalDescriptors,
`if`(is(g, "groupVar"), groupedDescriptors, NULL)
)
}
)
#' @rdname getDescriptors
#' @export
setMethod(
f = "getDescriptors",
signature = c("character", "maybeGroup", "maybeWeight"),
definition = function(x, g, w){
vctrs::vec_c(
standardDescriptors,
characterDescriptors,
`if`(is(g, "groupVar"), groupedDescriptors, NULL)
)
}
)
#' @rdname getDescriptors
#' @export
setMethod(
f = "getDescriptors",
signature = c("numeric", "maybeGroup", "maybeWeight"),
definition = function(x, g, w){
vctrs::vec_c(
standardDescriptors,
numericDescriptors,
`if`(is(g, "groupVar"), groupedDescriptors, NULL),
`if`(is(w, "weightVar"), wnumericDescriptors, NULL)
)
}
)
#' @rdname getDescriptors
#' @export
setMethod(
f = "getDescriptors",
signature = c("v_rcensored", "maybeGroup", "maybeWeight"),
definition = function(x, g, w){
vctrs::vec_c(
standardDescriptors,
rcensoredDescriptors
)
}
)
# Describe a variable (internal method)
#
# The internal method for applying a descriptive function on x and, optionally,
# g and/or w. The methods define the signature patterns for applying the
# functions.
setGeneric(".describe", function(f, x, g, w, ...) standardGeneric(".describe"))
.describeMethods <- list(
list(
sig = c("function", "describable", "Missing", "Missing"),
bod = quote(f(x, ...))
),
list(
sig = c("function", "describable", "groupVar", "Missing"),
bod = quote(f(x = x, g = g, ...))
),
list(
sig = c("function", "describable", "Missing", "numeric"),
bod = quote(f(x = x, w = w, ...))
),
list(
sig = c("function", "describable", "groupVar", "numeric"),
bod = quote(f(x = x, g = g, w = w, ...) )
)
)
purrr::walk(
.x = .describeMethods,
.f = function(l){
def <- getGeneric(".describe")
body(def) <- l$bod
setMethod(f = ".describe", signature = l$sig, definition = def)
}
)
#' Derive Variable Statistics
#'
#' Return data derived from the data. For each type of stype vector, a
#' reasonable set of default summary statistics will be provided based on the
#' data type that is provided as an input. For example, if the data is
#' continuous then statistics like the mean, variance, and various quantiles
#' will be provided, whereas for a nominal variable the counts of the various
#' levels are provided (among other information). Custom summary statistics can
#' be appended to the base statistics by providing a list of additional
#' functions as an input.
#'
#' @param x a vector a values
#' @param g a vector a groupings (optional)
#' @param w a vector of weights (optional)
#' @param .descriptors an optional list of functions. Each function should have
#' a signature of the form `function(v, ...)`, and where `v` is expected to be
#' of the result of calling `as_canonical(x)` on the `x` input to `describe`,
#' and `...` is expected to be the unchanged value of the input to `...` in
#' `describe`. If the input list is named then elements in the return object
#' corresponding to the result of evaluating the supplied functions will have
#' the corresponding names.
#' @param ... additional arguments passed on to the functions in the list
#' provided by `.descriptors`.
#' @importFrom purrr reduce map
#' @importFrom stats setNames
#' @importFrom methods is
#' @export
setGeneric(
name = "describe",
def = function(x, g = NULL, w = NULL, .descriptors, ...) standardGeneric("describe")
)
#' @rdname describe
#' @export
setMethod(
f = "describe",
signature = c("describable", "maybeGroup", "maybeWeight", "maybeDescriptor"),
definition = function(x, g, w, .descriptors, ...){
descriptors <- `if`(
missing(.descriptors) || methods::is(.descriptors, "Missing"),
getDescriptors(x, g, w),
vctrs::vec_c(getDescriptors(x, g, w), .descriptors)
)
data_summary(
purrr::map(
.x = descriptors,
.f = function(f) .describe(f, x = x, g = g, w = w, ...))
)
}
)
#' @rdname describe
#' @export
setMethod(
f = "describe",
signature = c("simpleStype", "maybeGroup", "maybeWeight", "maybeDescriptor"),
definition = function(x, g, w, .descriptors, ...){
cl <- match.call()
cl <- lapply(as.list(cl), eval, sys.parent())
cl$x <- as_canonical(cl$x)
eval(as.call(cl))
}
)
#' @rdname describe
#' @export
setMethod(
f = "describe",
signature = c("tbl_df", "missing", "missing", "maybeDescriptor"),
definition = function(x, g, w, .descriptors, ...){
analysisDescriptors <- `if`(
missing(.descriptors) || methods::is(.descriptors, "Missing"),
analysisDescriptors,
vctrs::vec_c(analysisDescriptors, .descriptors)
)
desc <- purrr::map(analysisDescriptors, ~ .x(x))
data_summary(desc)
}
)
#' @rdname describe
#' @importFrom vctrs field
#' @importFrom purrr flatten
#' @export
setMethod(
f = "describe",
signature = c("v_rcensored", "maybeGroup", "maybeWeight", "maybeDescriptor"),
definition = function(x, g, w, .descriptors, ...){
standdesc <-
purrr::map(
.x = standardDescriptors,
.f = function(f) .describe(f, x = as_canonical(vctrs::field(x, "time")), ...)
)
rcensoredDescriptors <- `if`(
missing(.descriptors) || methods::is(.descriptors, "Missing"),
rcensoredDescriptors,
vctrs::vec_c(rcensoredDescriptors, .descriptors)
)
rcensdesc <-
purrr::flatten(purrr::map(
.x = rcensoredDescriptors,
.f = function(f) .describe(f, x = x, g = g, w = w, ...)
))
data_summary(
vctrs::vec_c(
standdesc,
rcensdesc
)
)
}
)
#' Change Weights for stype Vector Elements
#'
#' Return a stype vector where the elements are weighted according to the
#' specified inputs. By default, all of the weights for a stype vector are
#' effectively constant.
#' @inheritParams describe
#' @export
setGeneric(
name = "weight",
def = function(x, w, .descriptors = list()) standardGeneric("weight")
)
#' @rdname weight
#' @export
setMethod(
f = "weight",
signature = c("stype", "stypeWeightVar", "maybeDescriptor"),
definition = function(x, w, .descriptors){
w <- as_canonical(w)
weight(x = x, w = w, .descriptors = .descriptors)
}
)
#' @rdname weight
#' @export
setMethod(
f = "weight",
signature = c("stype", "weightVar", "maybeDescriptor"),
definition = function(x, w, .descriptors){
cl <- swap_function(match.call(), describe)
attr(x, "data_summary") <- eval(cl, sys.parent())
x
}
)
#' Create a function to get data from a field in a stype record type vector
#'
#' @param field_name the name of \code{field} from which extract data
#' @param what an indexing vector (\code{character} or \code{integer}) identifying
#' elements in the \code{data_summary} to extract
#' @param new_names a \code{character} vector of names with which to label the
#' elements of result calling the returned function
#' @param .after an optional function applied to the result
#' @noRd
get_from_field <- function(field_name, what, new_names = what, .after = identity){
stopifnot(length(what) == length(new_names))
function(x){
out <- get_data_summary(vctrs::field(x, field_name))[what]
.after(setNames(out, new_names))
}
}
#' Get the summary from a stype variable
#'
#' @param x the object from which to get the \code{data_summary}
#' @param element either \code{NULL} to get the full \code{data_summary}
#' or a length 1 \code{character} to select a particular element of the summary
#' @export
setGeneric(
name = "get_data_summary",
def = function(x, element) standardGeneric("get_data_summary")
)
#' @rdname get_data_summary
#' @export
setMethod(
f = "get_data_summary",
signature = c("stype", NULL),
definition = function(x, element){
# browser()
if (is_not_computed(attr(x, "data_summary"))) {
describe(x, .descriptors = attr(x, "extra_descriptors"))
} else {
attr(x, "data_summary")
}
}
)
#' @rdname get_data_summary
#' @export
setMethod(
f = "get_data_summary",
signature = c("stype", "character"),
definition = function(x, element){ get_data_summary(x)[[element]] }
)
#' Get data summaries from a list of stypes
#'
#' @param x the object from which to get the \code{data_summaries}
#' @param element either \code{NULL} to get the full \code{data_summary}
#' or a length 1 \code{character} to select a particular element of the summary
#' @export
setGeneric(
name = "get_data_summaries",
def = function(x, element) standardGeneric("get_data_summaries")
)
#' @rdname get_data_summaries
#' @export
setMethod(
f = "get_data_summaries",
signature = c("list", NULL),
definition = function(x, element){ purrr::map(x, get_data_summary) }
)
#' @rdname get_data_summaries
#' @export
setMethod(
f = "get_data_summaries",
signature = c("list", "character"),
definition = function(x, element){
purrr::map(x,~ get_data_summary(.x, element))
}
)
#' Get the internal name from a stype variable
#'
#' @param x the object from which to get the \code{internal_name} attribute
#' @export
setGeneric(
name = "get_internal_name",
def = function(x) standardGeneric("get_internal_name")
)
#' @rdname get_internal_name
#' @export
setMethod(
f = "get_internal_name",
signature = c("stype"),
definition = function(x){ attr(x, "internal_name") }
)
#' Check that an object is a stype vector
#'
#' @param object any \code{R} object
#' @export
is_stype <- function(object) {
is(object, "stype")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.