Nothing
#' Convert columns of data frame to Boolean or fuzzy sets
#'
#' Convert the selected columns of the data frame into either dummy
#' logical columns, or into membership degrees of fuzzy sets, while leaving
#' the remaining columns untouched. Each column selected for transformation
#' typically yields in multiple columns in the output.
#'
#' Transformations performed by this function are typically useful as a
#' preprocessing step before using the [dig()] function or some of its
#' derivatives ([dig_correlations()], [dig_paired_baseline_contrasts()],
#' [dig_associations()]).
#'
#' The transformation of selected columns differ based on the type. Concretely:
#' - **logical** column `x` is transformed into pair of logical columns,
#' `x=TRUE` and`x=FALSE`;
#' - **factor** column `x`, which has levels `l1`, `l2`, and `l3`, is transformed
#' into three logical columns named `x=l1`, `x=l2`, and `x=l3`;
#' - **numeric** column`x` is transformed accordingly to `.method` argument:
#' - if `.method="crisp"`, the column is first transformed into a factor
#' with intervals as factor levels and then it is processed as a factor
#' (see above);
#' - for other `.method` (`triangle` or `raisedcos`), several new columns
#' are created, where each column has numeric values from the interval
#' \eqn{[0,1]} and represents a certain fuzzy set (either triangular or
#' raised-cosinal).
#' Details of transformation of numeric columns can be specified with
#' additional arguments (`.breaks`, `.labels`, `.right`).
#'
#' @param .data the data frame to be processed
#' @param .what a tidyselect expression (see
#' [tidyselect syntax](https://tidyselect.r-lib.org/articles/syntax.html))
#' specifying the columns to be transformed
#' @param ... optional other tidyselect expressions selecting additional
#' columns to be processed
#' @param .breaks for numeric columns, this has to be either an integer scalar
#' or a numeric vector. If `.breaks` is an integer scalar, it specifies
#' the number of resulting intervals to break the numeric column to
#' (for `.method="crisp"`) or the number of target fuzzy sets (for
#' `.method="triangle"` or `.method="raisedcos`). If `.breaks` is a vector,
#' the values specify the borders of intervals (for `.method="crisp"`)
#' or the breaking points of fuzzy sets.
#' @param .labels character vector specifying the names used to construct
#' the newly created column names. If `NULL`, the labels are generated
#' automatically.
#' @param .na if `TRUE`, an additional logical column is created for each
#' source column that contains `NA` values. For column named `x`, the
#' newly created column's name will be `x=NA`.
#' @param .keep if `TRUE`, the original columns being transformed remain
#' present in the resulting data frame.
#' @param .method The method of transformation for numeric columns. Either
#' `"crisp"`, `"triangle"`, or `"raisedcos"` is required.
#' @param .right If `.method="crisp"`, this argument specifies if the
#' intervals should be closed on the right (and open on the left) or
#' vice versa.
#' @return A tibble created by transforming `.data`.
#' @author Michal Burda
#' @examples
#' # transform logical columns and factors
#' d <- data.frame(a = c(TRUE, TRUE, FALSE),
#' b = factor(c("A", "B", "A")),
#' c = c(1, 2, 3))
#' partition(d, a, b)
#'
#' # transform numeric columns to logical columns (crisp transformation)
#' partition(CO2, conc:uptake, .method = "crisp", .breaks = 3)
#'
#' # transform numeric columns to fuzzy sets (triangle transformation)
#' partition(CO2, conc:uptake, .method = "triangle", .breaks = 3)
#'
#' # complex transformation with different settings for each column
#' CO2 |>
#' partition(Plant:Treatment) |>
#' partition(conc,
#' .method = "raisedcos",
#' .breaks = c(-Inf, 95, 175, 350, 675, 1000, Inf)) |>
#' partition(uptake,
#' .method = "triangle",
#' .breaks = c(-Inf, 7.7, 28.3, 45.5, Inf),
#' .labels = c("low", "medium", "high"))
#' @export
partition <- function(.data,
.what = everything(),
...,
.breaks = NULL,
.labels = NULL,
.na = TRUE,
.keep = FALSE,
.method = "crisp",
.right = TRUE) {
.must_be_data_frame(.data)
.must_be_numeric_vector(.breaks, null = TRUE)
.must_be_character_vector(.labels, null = TRUE)
.must_be_flag(.na)
.must_be_flag(.keep)
.must_be_enum(.method, c("crisp", "triangle", "raisedcos"))
.must_be_flag(.right)
emptydf <- as_tibble(data.frame(matrix(NA, nrow = nrow(.data), ncol = 0)))
call <- current_env()
if (!is.null(.breaks)) {
.breaks <- sort(.breaks)
}
sel <- enquos(.what, ...)
sel <- lapply(sel,
eval_select,
data = .data,
allow_rename = FALSE,
allow_empty = TRUE,
error_call = current_env())
sel <- unlist(sel)
if (length(sel) <= 0) {
return(as_tibble(.data))
}
res <- lapply(seq_along(sel), function(i) {
colname <- names(sel)[i]
colindex <- sel[i]
res <- emptydf
x <- .data[[colindex]]
if (is.logical(x)) {
res <- tibble(a = !is.na(x) & x,
b = !is.na(x) & !x)
colnames(res) <- paste0(colname, "=", c("T", "F"))
} else if (is.factor(x)) {
res <- .partition_factor(x, colname)
} else if (is.numeric(x)) {
if (is.null(.breaks)) {
cli_abort(c("{.arg .breaks} must not be NULL in order to partition numeric column {.var {colname}}."),
call = call)
}
if (.method == "crisp") {
pp <- .prepare_crisp(x, colname, .breaks, .labels, .right, call)
xx <- cut(x, breaks = pp$breaks, labels = pp$labels, right = .right)
res <- .partition_factor(xx, colname)
} else if (.method == "triangle") {
pp <- .prepare_fuzzy(x, colname, .breaks, .labels, call)
res <- .partition_fuzzy(x, pp, colname, triangle_)
} else if (.method == "raisedcos") {
pp <- .prepare_fuzzy(x, colname, .breaks, .labels, call)
res <- .partition_fuzzy(x, pp, colname, raisedcos_)
}
} else {
cli_abort(c("Unable to partition column {.var {colname}}.",
"i"="Column selected for partitioning must be a factor, logical, or numeric.",
"x"="The column {.var {colname}} is a {.cls {class(x)}}."),
call = call)
}
if (.na) {
nas <- is.na(x)
if (any(nas)) {
res[paste0(colname, "=NA")] <- nas
}
}
res
})
res <- do.call(cbind, res)
keeped <- if (.keep) .data else .data[-sel]
res <- cbind(keeped, res)
as_tibble(res)
}
.partition_factor <- function(x, colname) {
res <- lapply(levels(x), function(lev) !is.na(x) & x == lev)
names(res) <- paste0(colname, "=", levels(x))
as_tibble(res)
}
.prepare_crisp <- function(x, colname, breaks, labels, right, call) {
if (length(breaks) == 1) {
.check_scalar_breaks(breaks, call)
br <- .determine_crisp_breaks(x, breaks)
} else {
br <- breaks
}
if (is.null(labels)) {
lb <- .determine_crisp_labels(br, right)
} else {
if (length(labels) != length(br) - 1) {
if (length(breaks) == 1) {
cli_abort(c("If {.arg .breaks} is scalar, the length of {.arg .labels} must be equal to the value of {.var .breaks}.",
"i"="The length of {.arg .labels} is {length(labels)}.",
"i"="{.arg .breaks} is scalar value {breaks}."),
call = call)
} else {
cli_abort(c("If {.arg .breaks} is non-scalar, the length of {.arg .labels} must be equal to the length of {.var .breaks} - 1.",
"i"="The length of {.arg .labels} is {length(labels)}.",
"i"="The length of {.arg .breaks} is {length(breaks)}."),
call = call)
}
}
lb <- labels
}
list(breaks = br, labels = lb)
}
.prepare_fuzzy <- function(x, colname, breaks, labels, call) {
if (length(breaks) == 1) {
.check_scalar_breaks(breaks, call)
br <- .determine_fuzzy_breaks(x, breaks)
} else {
if (length(breaks) < 3) {
cli_abort(c("If {.arg .breaks} is non-scalar, it must be a vector with at least 3 elements.",
"i"="The length of {.arg .breaks} is {length(breaks)}."),
call = call)
}
br <- breaks
}
if (is.null(labels)) {
lb <- .determine_fuzzy_labels(br)
} else {
if (length(labels) != length(br) - 2) {
if (length(breaks) == 1) {
cli_abort(c("If {.arg .breaks} is scalar, the length of {.arg .labels} must be equal to the value of {.var .breaks}.",
"i"="The length of {.arg .labels} is {length(labels)}.",
"i"="{.arg .breaks} is {breaks}."),
call = call)
} else {
cli_abort(c("If {.arg .breaks} is non-scalar, the length of {.arg .labels} must be equal to the length of {.var .breaks} - 2.",
"i"="The length of {.arg .labels} is {length(labels)}.",
"i"="The length of {.arg .breaks} is {length(breaks)}."),
call = call)
}
}
lb <- labels
}
list(breaks = br, labels = lb)
}
.partition_fuzzy <- function(x, pp, colname, fun) {
res <- lapply(seq_along(pp$labels), function(i) {
ii <- seq(from = i, length.out = 3)
fun(x, pp$breaks[ii])
})
names(res) <- paste0(colname, "=", pp$labels)
as_tibble(res)
}
.check_scalar_breaks <- function(breaks, call) {
if (breaks <= 1 || !is_integerish(breaks)) {
cli_abort(c("If {.arg .breaks} is a single value, it must be a natural number greater than 1.",
"i"="You've supplied {breaks}."),
call = call)
}
}
.determine_crisp_breaks <- function(x, breaks) {
breaks <- seq(from = min(x, na.rm = TRUE), to = max(x, na.rm = TRUE), length.out = breaks + 1)
c(-Inf, breaks[c(-1, -length(breaks))], Inf)
}
.determine_fuzzy_breaks <- function(x, breaks) {
breaks <- seq(from = min(x, na.rm = TRUE), to = max(x, na.rm = TRUE), length.out = breaks)
c(-Inf, breaks, Inf)
}
.determine_crisp_labels <- function(breaks, right) {
l <- signif(breaks[-length(breaks)], 3)
r <- signif(breaks[-1], 3)
ll <- ifelse(right, "(", "[")
rr <- ifelse(right, "]", ")")
paste0(ll, l, ";", r, rr)
}
.determine_fuzzy_labels <- function(breaks) {
l <- signif(breaks[-(length(breaks) - 0:1)], 3)
c <- signif(breaks[-c(1, length(breaks))], 3)
r <- signif(breaks[-(1:2)], 3)
paste0("(", l, ";", c, ";", r, ")")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.