Nothing
#' Fast Index Generation
#'
#' For not too small m much faster than `rep(seq_len(m), each = each)`.
#'
#' @noRd
#' @keywords internal
#'
#' @param m Integer. See `each`.
#' @param each Integer. How many times should each value in `1:m` be repeated?
#' @returns Like `x`, but converted to matrix.
#' @examples
#' rep_each(10, 2)
#' rep(1:10, each = 2) # Dito
rep_each <- function(m, each) {
out <- .col(dim = c(each, m))
dim(out) <- NULL
out
}
#
# # Same as rep.int(seq_len(m), times)
# rep_times <- function(m, times) {
# out <- .row(dim = c(m, times))
# dim(out) <- NULL
# out
# }
#' Fast OHE
#'
#' Turns vector/factor into its One-Hot-Encoding.
#' Ingeniouly written by Mathias Ambuehl.
#'
#' Working with integers instead of doubles would be slightly faster, but at the price
#' of potential integer overflows in subsequent calculations.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Object representing model predictions.
#' @returns Like `x`, but converted to matrix.
fdummy <- function(x) {
x <- as.factor(x)
lev <- levels(x)
out <- matrix(0, nrow = length(x), ncol = length(lev))
out[cbind(seq_along(x), as.integer(x))] <- 1
colnames(out) <- lev
out
}
#' Weighted Version of colMeans()
#'
#' Internal function used to calculate column-wise weighted means.
#'
#' @noRd
#' @keywords internal
#'
#' @param x A vector, factor, or matrix-like.
#' @param w Optional case weights.
#' @returns A vector of column means.
wcolMeans <- function(x, w = NULL) {
if (is.factor(x)) {
if (is.null(w)) {
return(colMeans_factor(x))
}
x <- fdummy(x)
} else if (!is.matrix(x)) {
x <- as.matrix(x)
}
if (is.null(w)) {
return(colMeans(x))
}
if (length(w) != nrow(x)) {
stop("w must be of length nrow(x).")
}
if (!is.double(w)) {
w <- as.double(w)
}
colSums(x * w) / sum(w)
}
#' colMeans() for Factors
#'
#' Internal function used to calculate proportions of factor levels.
#' It is less memory-hungry than `colMeans(fdummy(x))`, and much faster.
#' A weighted version via `rowsum(w, x)` is not consistently faster than
#' `wcolMeans(fdummy(x))`, so we currently focus on the non-weighted case.
#' Furthermore, `rowsum()` drops empty factor levels.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Factor-like.
#' @returns Named vector.
colMeans_factor <- function(x) {
x <- as.factor(x)
lev <- levels(x)
out <- tabulate(x, nbins = length(lev)) / length(x)
names(out) <- lev
out
}
#' Grouped Column Means
#'
#' Internal workhorse to aggregate predictions over fixed-length groups.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Vector, factor or matrix-like.
#' @param ngroups Number of groups (`x` was stacked that many times).
#' @param w Optional vector with case weights of length `NROW(x) / ngroups`.
#' @returns A (g x K) matrix, where g is the number of groups, and K = NCOL(x).
wrowmean <- function(x, ngroups = 1L, w = NULL) {
if (ngroups == 1L) {
return(t.default(wcolMeans(x, w = w)))
}
# Prep w
if (!is.null(w)) {
if (length(w) != NROW(x) %/% ngroups) {
stop("w must be of length NROW(x) / ngroups.")
}
if (!is.double(w)) {
w <- as.double(w)
}
}
# Very fast case for factors w/o weights and vectors/1d-matrices
if (is.factor(x)) {
if (is.null(w)) {
return(rowmean_factor(x, ngroups = ngroups))
}
x <- fdummy(x)
}
if (is.vector(x) || (is.matrix(x) && ncol(x) == 1L)) {
return(wrowmean_vector(x, ngroups = ngroups, w = w))
}
# General version
wrowmean_matrix(x, ngroups = ngroups, w = w)
}
#' (w)rowmean() for Factors (without weights)
#'
#' Grouped `colMeans_factor()` for equal sized groups.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Factor-like.
#' @param ngroups Number of subsequent, equals sized groups.
#' @returns Matrix with column names.
rowmean_factor <- function(x, ngroups = 1L) {
x <- as.factor(x)
lev <- levels(x)
n_bg <- length(x) %/% ngroups
dim(x) <- c(n_bg, ngroups)
out <- t.default(apply(x, 2L, FUN = tabulate, nbins = length(lev)))
colnames(out) <- lev
out / n_bg
}
#' wrowmean() for Vectors
#'
#' Grouped means over fixed-length groups for vectors or 1d matrices.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Vector or matrix with one column.
#' @param ngroups Number of subsequent, equals sized groups.
#' @param w Optional vector of case weights of length `NROW(x) / ngroups`.
#' @returns Matrix.
wrowmean_vector <- function(x, ngroups = 1L, w = NULL) {
if (!(is.vector(x) || (is.matrix(x) && ncol(x) == 1L))) {
stop("x must be a vector or a single column matrix")
}
nm <- if (is.matrix(x)) colnames(x)
dim(x) <- c(length(x) %/% ngroups, ngroups)
out <- if (is.null(w)) colMeans(x) else colSums(x * w) / sum(w)
dim(out) <- c(ngroups, 1L)
if (!is.null(nm)) {
colnames(out) <- nm
}
out
}
#' wrowmean() for Matrices
#'
#' Grouped column means over fixed-length groups for matrices.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Matrix-like.
#' @param ngroups Number of subsequent, equals sized groups.
#' @param w Optional vector of case weights of length `NROW(x) / ngroups`.
#' @returns Matrix.
wrowmean_matrix <- function(x, ngroups = 1L, w = NULL) {
if (!is.matrix(x)) {
x <- as.matrix(x)
}
n_bg <- nrow(x) %/% ngroups
g <- rep_each(ngroups, each = n_bg) # rep(seq_len(ngroups), each = n_bg)
# Even faster: cbind(collapse::fmean(x, g = g, w = w))
if (is.null(w)) {
out <- rowsum(x, group = g, reorder = FALSE) / n_bg
} else {
# w is recycled over rows and columns
out <- rowsum(x * w, group = g, reorder = FALSE) / sum(w)
}
rownames(out) <- NULL
out
}
#' Grouped wcolMeans()
#'
#' Internal function used to calculate grouped column-wise weighted means along with
#' corresponding (weighted) counts.
#'
#' @noRd
#' @keywords internal
#'
#' @param x A vector, factor or matrix-like object.
#' @param g Optional grouping variable.
#' @param w Optional case weights.
#' @returns A list with two elements: "M" represents a matrix of grouped (column)
#' means, and "w" is a vector of corresponding group counts/weights.
#' @examples
#' with(iris, gwColMeans(Sepal.Width, g = Species, w = Sepal.Length))
gwColMeans <- function(x, g = NULL, w = NULL) {
if (is.null(g)) {
M <- t.default(wcolMeans(x, w = w))
denom <- if (is.null(w)) NROW(x) else sum(w)
return(list(M = M, w = denom))
}
# Now the interesting case
if (is.factor(x)) {
x <- fdummy(x)
} else if (!is.vector(x) && !is.matrix(x)) {
x <- as.matrix(x)
}
if (is.null(w)) {
w <- rep.int(1, NROW(x))
} else {
if (!is.double(w)) {
w <- as.double(w)
}
x <- x * w # w is correctly recycled over columns
}
denom <- as.numeric(rowsum(w, group = g))
list(M = rowsum(x, group = g) / denom, w = denom)
}
#' Weighted Mean Centering
#'
#' Internal function used to center each column of an object by subtracting its
#' possibly weighted mean. Vector input is turned into a matrix with one column.
#'
#' @noRd
#' @keywords internal
#'
#' @param x Vector or matrix-like.
#' @param w Optional vector of case weights.
#' @returns Centered version of `x` (vectors are turned into single-column matrix).
wcenter <- function(x, w = NULL) {
if (!is.matrix(x)) {
x <- as.matrix(x)
}
# sweep(x, MARGIN = 2L, STATS = wcolMeans(x, w = w)) # Slower
x - matrix(wcolMeans(x, w = w), nrow = nrow(x), ncol = ncol(x), byrow = TRUE)
}
#' Fast Row Subsetting (from kernelshap)
#'
#' Internal function used to row-subset data.frames.
#' Brings a massive speed-up for data.frames. All other classes (tibble, data.table,
#' matrix) are subsetted in the usual way.
#'
#' @noRd
#' @keywords internal
#'
#' @param x A matrix-like object.
#' @param i Logical or integer vector of rows to pick.
#' @returns Subsetted version of `x`.
rep_rows <- function(x, i) {
if (!(all(class(x) == "data.frame"))) {
return(x[i, , drop = FALSE]) # matrix, tibble, data.table, ...
}
# data.frame
out <- lapply(x, function(z) if (length(dim(z)) != 2L) z[i] else z[i, , drop = FALSE])
attr(out, "row.names") <- .set_row_names(length(i))
class(out) <- "data.frame"
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.