Nothing
#' Gather all columns representing functional measurements into a `tfd`-object
#'
#' Similar in spirit to [tidyr::gather()], but does NOT put the values in the
#' gathered columns into one very long "value"-column while labeling the different
#' original columns in a very long "key"-column -- instead it creates a `tfd`-column
#' containing the functional measurements of the columns given in `...`.
#'
#' @param data a data frame -- note that `dplyr` does not handle matrix columns well,
#' if `data` contains more than one of those, `tf_gather` will fail...
#' @param ... A selection of columns to collect as a `tfd` object. Each column
#' represents measurements of a functional variable at a specific `arg`-val.
#' Can also be the name of a matrix-valued column, but see above.
#' If empty, all variables are selected. You can supply bare variable names,
#' select all variables between x and z with x:z, exclude y with -y. For more
#' options, see the [dplyr::select()] documentation.
#' @param key the name of the created `tfd`-column. Defaults to `".tfd"`, but
#' the function will try to guess the name based on the column names of the
#' gathered columns in this case. If a common prefix of all column names
#' is found, this is used instead. You also get a message about this.
#' @param arg optional. Argument values for the functions. If not provided, will be guessed from the column names as well.
#' See also [tf::tfd()].
#' @param evaluator optional. A function accepting arguments x, arg, evaluations. See [tf::tfd()] for details.
#' @param domain optional. Range of possible `arg`-values. See [tf::tfd()] for details.
#' @returns a modified `data.frame` with a `tfd` column replacing the `...`.
#' @import dplyr
#' @export
#' @seealso [dplyr::select()]
#' @family tidyfun data wrangling functions
#' @examples
#' d <- tf_spread(growth[1:5, ]) |> dplyr::mutate(id = 1:dplyr::n())
#' dplyr::glimpse(d)
#' # tidyselect syntax for column selection:
#' tf_gather(d, starts_with("height"))
#' tf_gather(d, height_1:height_18)
#' tf_gather(d, -gender, -id)
#' # custom key name and arg values:
#' tf_gather(d, starts_with("height"), key = "height")
#' tf_gather(d, starts_with("height"), arg = seq(0, 1, length.out = 31))
tf_gather <- function(
data,
...,
key = ".tfd",
arg = NULL,
domain = NULL,
evaluator = tf_approx_linear
) {
key_var <- as_name(enexpr(key))
search_key <- isTRUE(key == ".tfd")
if (...length() == 0) {
gather_vars <- names(data)
} else {
gather_vars <- names(eval_select(expr(c(...)), data))
}
if (is_empty(gather_vars)) {
return(data)
}
# turn matrix column into regular columns:
if (
length(gather_vars) == 1 && is.matrix(data[[gather_vars]]) && search_key
) {
key_var <- gather_vars
search_key <- FALSE
cli::cli_inform("creating new {.cls tfd}-column {.val {key_var}}")
}
tfd_data <- data |>
select(all_of(gather_vars)) |>
as.matrix()
if (search_key) {
# see also find_arg: will interpret separating-dashes as minus-signs
# regex adapted from https://www.regular-expressions.info/floatingpoint.html
found_key <- unique(sub(
"[-+]?(0|(0\\.[0-9]+)|([1-9][0-9]*\\.?[0-9]*))([eE][-+]?[0-9]+)?$",
"",
colnames(tfd_data)
))
# assume trailing 0's are padding:
found_key <- sub("[0]+$", "", found_key)
# assume trailing punctuation is separator:
found_key <- sub("[[:punct:]]$", "", found_key)
# check again for uniqueness of resulting value ...
found_key <- unique(found_key)
if (length(found_key) == 1 && all(found_key != "")) {
key_var <- found_key
cli::cli_inform("creating new {.cls tfd}-column {.val {key_var}}")
}
}
data |>
select(-all_of(gather_vars)) |>
mutate(
!!key_var := tfd(
tfd_data,
arg = arg,
domain = domain,
evaluator = {{ evaluator }}
)
)
}
#' Spread a `tf`-column into many columns representing the
#' function evaluations.
#'
#' Similar in spirit to [tidyr::spread()], but does NOT shorten,
#' just widens the data frame -- a `tf`-column is spread out into many columns
#' containing the functional measurements.
#'
#' @param data a data frame with at least one `tf`-column
#' @param value the name of the `tf`-column to 'spread'/evaluate.
#' You can supply bare variable names etc., see the [dplyr::select()]
#' documentation. Also works without this if there's only one `tf` in `data`,
#' see examples.
#' @param arg (Semi-)optional. A vector of `arg`-values on which to evaluate the
#' functions. If not provided, uses the default `arg`s. Should be
#' specified for `tf_irreg`, otherwise *all* observed gridpoints are used for
#' *every* function.
#' @param sep separating character used to create column names for the new columns,
#' defaults to `"_"` for column names "<name of the `tf`>_<`arg`-value>".
#' Set to NULL to get column names that only contain the `arg`-value.
#' @param interpolate `interpolate`-argument for evaluating the functional data.
#' Defaults to FALSE, i.e., `tfd`s are *not* inter/extrapolated on unobserved
#' `arg`-values.
#' @returns a wider dataframe with the `tf`-column spread out into many columns
#' each containing the functional measurements for one `arg`-value.
#' @importFrom tidyselect vars_pull
#' @export
#' @family tidyfun data wrangling functions
#' @examples
#' d <- dplyr::tibble(g = 1:3)
#' d$f <- tf_rgp(3, 11L)
#' tf_spread(d, f)
#' tf_spread(d, -g)
#' tf_spread(d)
tf_spread <- function(data, value, arg, sep = "_", interpolate = FALSE) {
if (missing(value)) {
tf_cols <- which(map_lgl(data, is_tf))
if (length(tf_cols) == 0) {
cli::cli_warn(
"{.arg value} {.val {tf_var}} is not a column of class {.cls tf}. Nothing's happening here."
)
return(data)
}
if (length(tf_cols) == 1) {
value <- tf_cols
} else {
cli::cli_abort(
"More than one {.cls tf} found, specify which one to spread in {.arg value}."
)
}
}
tf_var <- vars_pull(names(data), !!enquo(value))
tf <- data[[tf_var]]
if (!is_tf(tf)) {
cli::cli_warn(
"{.arg value} {.val {tf_var}} is not a column of class {.cls tf}. Nothing's happening here."
)
return(data)
}
if (missing(arg)) {
arg <- tf_arg(tf)
if (is_irreg(tf)) {
arg <- unlist(arg, use.names = TRUE) |> unique() |> sort()
cli::cli_warn(
"no explicit {.arg arg} for irregular {.val {tf_var}} provided -- using all {length(arg)}, distinct observed argument values.",
)
}
}
tf_eval <- tf[, arg, matrix = TRUE, interpolate = interpolate] |>
as.data.frame()
if (!is.null(sep)) {
colnames(tf_eval) <- paste0(tf_var, sep, arg)
}
data |>
select(-all_of(tf_var)) |>
bind_cols(tf_eval)
}
# ------------------------------------------------------------------------------
#' Turn "long" tables into tidy data frames with `tf`-objects
#'
#' Similar in spirit to [tidyr::nest()]. This turns tables in "long" format,
#' where one column (`.id`) defines the unit of observation, one column (`.arg`)
#' defines the evaluation grids of the functional observations, and other columns (`...`)
#' define the values of the functions at those points into a (much shorter) table containing
#' `tfd`-objects. All other variables are checked for constancy over `.id` and
#' appended as well.
#'
#' `domain` and `evaluator` can be specified as lists or vectors
#' if you are nesting multiple functional data columns with different properties.
#' Because this interface captures evaluator names as text, supply the evaluator
#' as a string rather than a bare function name.
#'
#' @param data a data frame
#' @param ... A selection of columns. If empty, all variables except the
#' `.id` and `.arg` columns are selected. You can supply bare variable names,
#' select all variables between `x` and `z` with `x:z`, exclude `y` with `-y`.
#' For more options, see the [dplyr::select()] documentation.
#' @param .id the (bare or quoted) name of the column defining the different
#' observations. Defaults to "id".
#' @param .arg the (bare or quoted) name of the column defining the `arg`-values
#' of the observed functions. Defaults to "arg".
#' @inheritParams tf_gather
#' @returns a data frame with (at least) `.id` and `tfd` columns
#' @export
#' @family tidyfun data wrangling functions
#' @seealso [tfd()] for details on `domain` and `evaluator`.
#' @examples
#' d <- dplyr::tibble(id = rep(1:3, each = 5), arg = rep(1:5, 3), value = rnorm(15))
#' tf_nest(d, .id = id, .arg = arg)
tf_nest <- function(
data,
...,
.id = "id",
.arg = "arg",
domain = NULL,
evaluator = "tf_approx_linear"
) {
if (!is.data.frame(data)) {
cli::cli_abort(
"{.arg data} must be data frame, not {.obj_type_friendly {data}}."
)
}
if (inherits(data, "grouped_df")) {
cli::cli_abort(c(
"{.fn tf_nest} does not work for {.cls grouped_df}.",
i = "{.fn ungroup} your data before nesting."
))
}
arg_var <- as_name(enexpr(.arg))
id_var <- as_name(enexpr(.id))
if (...length() == 0) {
value_vars <- setdiff(names(data), c(id_var, arg_var))
} else {
value_vars <- names(eval_select(expr(c(...)), data))
}
n_value_vars <- length(value_vars)
if (n_value_vars == 0) {
return(data)
}
# homogenize inputs:
if (!length(evaluator) %in% c(1, n_value_vars)) {
cli::cli_abort(
"{.arg evaluator} length must be 1 or {n_value_vars}, not {length(evaluator)}."
)
}
if (!is.list(domain)) {
domain <- replicate(n_value_vars, domain, simplify = FALSE)
} else {
if (!length(domain) %in% c(1, n_value_vars)) {
cli::cli_abort(
"{.arg domain} length must be 1 or {n_value_vars}, not {length(domain)}."
)
}
}
evaluator <- as.list(evaluator)
if (!length(evaluator) %in% c(1, n_value_vars)) {
cli::cli_abort(
"{.arg evaluator} length must be 1 or {n_value_vars}, not {length(evaluator)}."
)
}
remaining <- setdiff(names(data), c(id_var, arg_var, value_vars))
# check that nesting is possible without information loss
ret <- data |>
select(all_of(c(id_var, remaining))) |>
group_by(across(all_of(id_var)))
not_constant <- ret |>
summarise(across(everything(), n_distinct)) |>
select(-all_of(id_var)) |>
summarise(across(everything(), \(x) !all(x == 1L))) |>
select(where(isTRUE))
if (ncol(not_constant) > 0) {
cli::cli_abort(
"Can't nest - columns {.val {names(not_constant)}} are not constant for all levels of {.arg id}"
)
}
# keep first line of every id-level:
ret <- ret |> slice(1) |> ungroup()
tfd_list <- pmap(
list(value_vars, evaluator, domain),
function(x, y, z) {
data |>
select(all_of(c(id_var, arg_var, x))) |>
tfd(evaluator = !!y, domain = z)
}
)
names(tfd_list) <- value_vars
# re-index to make sure order is correct,
# use <character> index so that numeric ids are interpreted correctly
# as names (f["1000",]), not index positions (f[1000, ])
id_index <- ret |> pull(id_var) |> as.character()
for (v in value_vars) {
ret[[v]] <- tfd_list[[v]][id_index, ]
}
ret
}
#-------------------------------------------------------------------------------
#' Turn (data frames with) `tf`-objects / list columns into "long" tables.
#'
#' Similar in spirit to [tidyr::unnest()], the reverse of [tf_nest()].
#' The `tf`-method simply turns a single `tfd` or `tfb` vector into a "long" [tibble::tibble()].
#'
#' - Caution: this uses slightly different defaults for names of unnested columns
#' than `tidyr::unnest()`.
#' - For `data.frames`, include an ID column with a unique row identifier before
#' unnesting. Without it, arg-value pairs cannot be matched back to their
#' original functions after unnesting.
#'
#' @param data a data.frame or a `tf`-object
#' @param arg optional values for the `arg` argument of
#' [tf::tf_evaluate()]
#' @param interpolate return function values for `arg`-values not on original grid?
#' Defaults to `TRUE`.
#' @param ... not used currently
#' @inheritParams tidyr::unnest
#' @returns a "long" data frame with `tf`-columns expanded into `arg, value`-
#' columns.
#' @seealso [tf_evaluate.data.frame()]
#' @export
#' @family tidyfun data wrangling functions
#' @examples
#' d <- dplyr::tibble(id = 1:3)
#' d$f <- tf_rgp(3, 11L)
#' tf_unnest(d, f)
tf_unnest <- function(data, cols, arg, interpolate = TRUE, ...) {
UseMethod("tf_unnest")
}
#' @export
#' @importFrom tibble tibble
#' @importFrom tidyr unnest
#' @rdname tf_unnest
tf_unnest.tf <- function(data, cols, arg, interpolate = TRUE, ...) {
if (missing(arg)) {
arg <- tf::ensure_list(tf_arg(data))
}
tmp <- data[, arg, matrix = FALSE, interpolate = interpolate]
id <- unique_id(names(data)) %||% seq_along(data)
id <- ordered(id, levels = id) # don't reshuffle
tidyr::unnest(tibble::tibble(id = id, data = tmp), cols = data)
}
#' @export
#' @rdname tf_unnest
tf_unnest.data.frame <- function(
data,
cols,
arg,
interpolate = TRUE,
keep_empty = FALSE,
ptype = NULL,
names_sep = "_",
names_repair = "check_unique",
...
) {
if (missing(cols)) {
tf_cols <- names(data)[map_lgl(data, is_tf)]
cols <- expr(!!!syms(tf_cols))
cli::cli_warn(
"{.arg cols} is now required. Please use {.code cols = {expr_text(cols)}}"
)
}
ret <- tf_evaluate.data.frame(data, {{ cols }}, arg = arg) |>
tidyr::unnest(
cols = {{ cols }},
keep_empty = keep_empty,
ptype = ptype,
names_sep = names_sep,
names_repair = names_repair
)
ret
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.