Nothing
#' Univariate Grid
#'
#' @description
#' Creates evaluation grid for any numeric or non-numeric vector `z`.
#'
#' For discrete `z` (non-numeric, or numeric with at most `grid_size` unique values),
#' this is simply `sort(unique(z))`.
#'
#' Otherwise, if `strategy = "uniform"` (default), the evaluation points form a regular
#' grid over the trimmed range of `z`. By trimmed range we mean the
#' range of `z` after removing values outside `trim[1]` and `trim[2]` quantiles.
#' Set `trim = 0:1` for no trimming.
#'
#' If `strategy = "quantile"`, the evaluation points are quantiles over a regular grid
#' of probabilities from `trim[1]` to `trim[2]`.
#'
#' Quantiles are calculated via the inverse of the ECDF, i.e., via
#' `stats::quantile(..., type = 1`).
#'
#' @param z A vector or factor.
#' @param grid_size Approximate grid size.
#' @param trim The default `c(0.01, 0.99)` means that values outside the
#' 1% and 99% quantiles of non-discrete numeric columns are removed before calculation
#' of grid values. Set to `0:1` for no trimming.
#' @param strategy How to find grid values of non-discrete numeric columns?
#' Either "uniform" or "quantile", see description of [univariate_grid()].
#' @param na.rm Should missing values be dropped from the grid? Default is `TRUE`.
#' @returns A vector or factor of evaluation points.
#' @seealso [multivariate_grid()]
#' @export
#' @examples
#' univariate_grid(iris$Species)
#' univariate_grid(rev(iris$Species)) # Same
#'
#' x <- iris$Sepal.Width
#' univariate_grid(x, grid_size = 5) # Uniform binning
#' univariate_grid(x, grid_size = 5, strategy = "quantile") # Quantile
univariate_grid <- function(z, grid_size = 49L, trim = c(0.01, 0.99),
strategy = c("uniform", "quantile"), na.rm = TRUE) {
strategy <- match.arg(strategy)
uni <- unique(z)
if (!is.numeric(z) || length(uni) <= grid_size) {
out <- if (na.rm) sort(uni) else sort(uni, na.last = TRUE)
return(out)
}
# Non-discrete numeric
if (strategy == "quantile") {
p <- seq(trim[1L], trim[2L], length.out = grid_size)
g <- stats::quantile(z, probs = p, names = FALSE, type = 1L, na.rm = TRUE)
out <- unique(g)
} else {
# strategy = "uniform"
if (trim[1L] == 0 && trim[2L] == 1) {
r <- range(z, na.rm = TRUE)
} else {
r <- stats::quantile(z, probs = trim, names = FALSE, type = 1L, na.rm = TRUE)
}
out <- seq(r[1L], r[2L], length.out = grid_size)
}
if (!na.rm && anyNA(z)) {
out <- c(out, NA)
}
return(out)
}
#' Multivariate Grid
#'
#' This function creates a multivariate grid. Each column of the input `x` is turned
#' (independently) into a vector of grid values via [univariate_grid()].
#' Combinations are then formed by calling [expand.grid()].
#'
#' @inheritParams univariate_grid
#' @param x A vector, matrix, or data.frame to turn into a grid of values.
#' @param grid_size Controls the approximate grid size. If `x` has p columns, then each
#' (non-discrete) column will be reduced to about the p-th root of `grid_size` values.
#' @returns A vector, matrix, or data.frame with evaluation points.
#' @seealso [univariate_grid()]
#' @examples
#' multivariate_grid(iris[1:2], grid_size = 4)
#' multivariate_grid(iris$Species) # Works also in the univariate case
#' @export
multivariate_grid <- function(x, grid_size = 49L, trim = c(0.01, 0.99),
strategy = c("uniform", "quantile"), na.rm = TRUE) {
strategy <- match.arg(strategy)
p <- NCOL(x)
if (p == 1L) {
if (is.data.frame(x)) {
x <- x[[1L]]
}
out <- univariate_grid(
x, grid_size = grid_size, trim = trim, strategy = strategy, na.rm = na.rm
)
return(out)
}
grid_size <- ceiling(grid_size^(1/p)) # take p's root of grid_size
is_mat <- is.matrix(x)
if (is_mat) {
x <- as.data.frame(x)
}
out <- expand.grid(
lapply(
x,
FUN = univariate_grid,
grid_size = grid_size,
trim = trim,
strategy = strategy,
na.rm = na.rm
)
)
if (is_mat) as.matrix(out) else out
}
#' Checks Consistency of Grid
#'
#' Internal function used to check if a grid of values is consistent with `v` and data.
#'
#' @noRd
#' @keywords internal
#'
#' @param g Grid of values (either a vector/factor, a matrix, or data.frame).
#' @param v Vector of variable names to be represented by the grid `g`.
#' @param X_is_matrix Logical flag indicating whether the background data is a matrix.
#' or a data.frame. `g` must be consistent with this.
#' @returns An error message or `TRUE`.
check_grid <- function(g, v, X_is_matrix) {
p <- length(v)
if (p != NCOL(g)) {
stop("NCOL(grid) must equal length(v)")
}
if (p == 1L) {
if (is.list(g)) {
stop("'grid' should be a vector/factor")
}
} else {
stopifnot(
is.matrix(g) || is.data.frame(g),
is.matrix(g) == X_is_matrix,
!is.null(colnames(g)),
all(v == colnames(g))
)
}
TRUE
}
#' Bin into Quantiles
#'
#' Internal function. Applies [cut()] to quantile breaks.
#'
#' @noRd
#' @keywords internal
#'
#' @param x A numeric vector.
#' @param m Number of intervals.
#' @returns A factor, representing binned `x`.
qcut <- function(x, m) {
p <- seq(0, 1, length.out = m + 1L)
g <- stats::quantile(x, probs = p, names = FALSE, type = 1L, na.rm = TRUE)
cut(x, breaks = unique(g), include.lowest = TRUE)
}
#' Approximate Vector
#'
#' Internal function. Approximates values by the average of the two closest quantiles.
#'
#' @noRd
#' @keywords internal
#'
#' @param x A vector or factor.
#' @param m Number of unique values.
#' @returns An approximation of `x` (or `x` if non-numeric or discrete).
approx_vector <- function(x, m = 50L) {
if (!is.numeric(x) || length(unique(x)) <= m) {
return(x)
}
p <- seq(0, 1, length.out = m + 1L)
q <- unique(stats::quantile(x, probs = p, names = FALSE, na.rm = TRUE))
mids <- (q[-length(q)] + q[-1L]) / 2
return(mids[findInterval(x, q, rightmost.closed = TRUE)])
}
#' Approximate df or Matrix
#'
#' Internal function. Calls `approx_vector()` to each column in matrix or data.frame.
#'
#' @noRd
#' @keywords internal
#'
#' @param X A matrix or data.frame.
#' @param m Number of unique values.
#' @returns An approximation of `X` (or `X` if non-numeric or discrete).
approx_matrix_or_df <- function(X, v = colnames(X), m = 50L) {
stopifnot(
m >= 2L,
is.data.frame(X) || is.matrix(X)
)
if (is.data.frame(X)) {
X[v] <- lapply(X[v], FUN = approx_vector, m = m)
} else { # Matrix
X[, v] <- apply(X[, v, drop = FALSE], MARGIN = 2L, FUN = approx_vector, m = m)
}
return(X)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.