#' Generate Random, Non-Overlapping Partition of Indices
#'
#' @param n Number of elements to partition.
#'
#' @param fraction A numeric in (0,1] specifying the size of each set
#' relative to `n`. If `NULL`, argument `size` is used.
#'
#' @param size An integer in \eqn{{1, 2, ..., n}} specifying the size of each
#' set. If `NULL`, argument `fraction` is used.
#'
#' @param warn If `TRUE`, a warning is produced if the sets produced are not
#' of equal size.
#'
#' @return A list of random non-overlapping (disjoint) sets where each
#' element holds indices in \eqn{{1, 2, ..., n}} and where the union of all
#' sets is \eqn{{1, 2, ..., n}}.
#'
#' @importFrom parallel splitIndices
#' @export
sample_partitions <- function(n, fraction = NULL, size = NULL, warn = TRUE) {
stop_if_not(is.numeric(n), length(n) == 1L, !is.na(n), n > 0)
stop_if_not(!is.null(fraction) || !is.null(size))
## Number of partitions
if (!is.null(fraction)) {
stop_if_not(is.numeric(fraction), length(fraction) == 1L, !is.na(fraction),
fraction > 0, fraction <= 1)
npartitions <- as.integer(ceiling(1 / fraction))
} else if (!is.null(size)) {
stop_if_not(is.numeric(size), length(size) == 1L, !is.na(size),
size >= 1, size <= n)
npartitions <- as.integer(ceiling(n / size))
}
## Sanity checks
stop_if_not(is.integer(npartitions), length(npartitions) == 1L,
!is.na(npartitions), npartitions > 0L, npartitions <= n)
## Produce shuffled indices in 1:n
idxs <- sample.int(n, size = n, replace = FALSE)
partitions <- splitIndices(nx = n, ncl = npartitions)
partitions <- lapply(partitions, FUN = function(partition) idxs[partition])
if (warn) {
sizes <- unique(lengths(partitions))
if (length(sizes) > 1L) {
sizes <- sort(sizes)
warning(sprintf("sample_partitions(n = %g, ...) produced %d sets of unequal sizes: %d and %d",
n, length(partitions), sizes[1], sizes[2]))
}
}
partitions
}
#' Generate Random, Non-Overlapping Similarly-Weighted Partition of Indices
#'
#' @param w Numeric vector of `n` non-negative, finite weights.
#' Weights are normalized such that `sum(w)` equals one.
#'
#' @param fraction A numeric in (0,1] specifying the size of each set
#' relative to `n`. If `NULL`, argument `size` is used.
#'
#' @param size An integer in \eqn{{1, 2, ..., n}} specifying the size of each
#' set. If `NULL`, argument `fraction` is used.
#'
#' @param w_tolerance Maximum allowed difference between target weight of
#' each set (e.g. `fraction * 1`) and the actual total weight
#' of the set (i.e. `sum(w[set])`. If _all_ sets are within
#' the tolerance, the sample is accepted, otherwise rejected.
#'
#' @param max_rejections The maximum number of rejections before giving up.
#'
#' @param warn If `TRUE`, a warning is produced if the sets produced
#' are not of equal size.
#'
#' @return A list of \eqn{P} random non-overlapping (disjoint) sets where
#' each element holds indices in \eqn{{1, 2, ..., n}} and where the union of
#' all sets is \eqn{{1, 2, ..., n}}.
#' Attribute `weights` is a numeric vector of length \eqn{P} where each element
#' is he sum of all normalized weight of the corresponding set. The
#' partition found is such that \eqn{|weight_i - 1/P| \leq w_{tolerance}|}
#' is true for each set.
#' Attribute `count` gives the number of internal samples produced before
#' arriving at an accepted sample.
#' If no accepted sample was found, the `NA` is returned
#' (with `count` attribute set).
#'
#' @importFrom parallel splitIndices
#' @export
sample_partitions_similar_weights <- function(w, fraction = NULL, size = NULL, w_tolerance = 0.01, max_rejections = 100L, warn = TRUE) {
stop_if_not(is.numeric(w), length(w) > 0, !anyNA(w), all(w > 0))
stop_if_not(is.numeric(w_tolerance), length(w_tolerance) == 1L,
!is.na(w_tolerance), w_tolerance >= 0, w_tolerance <= 1)
stop_if_not(!is.null(fraction) || !is.null(size))
stop_if_not(is.numeric(max_rejections), length(max_rejections) == 1L,
!is.na(max_rejections), max_rejections >= 0)
## Normalize weights
w <- w / sum(w)
n <- length(w)
## Number of partitions
if (!is.null(fraction)) {
stop_if_not(is.numeric(fraction), length(fraction) == 1L, !is.na(fraction),
fraction > 0, fraction <= 1)
npartitions <- as.integer(ceiling(1 / fraction))
} else if (!is.null(size)) {
stop_if_not(is.numeric(size), length(size) == 1L, !is.na(size),
size >= 1, size <= n)
npartitions <- as.integer(ceiling(n / size))
}
## Sanity checks
stop_if_not(is.integer(npartitions), length(npartitions) == 1L,
!is.na(npartitions), npartitions > 0L, npartitions <= n)
## Partioning
pidxs <- splitIndices(nx = n, ncl = npartitions)
## Target weight per set
w_target <- 1 / npartitions
idxs <- NULL
ready <- FALSE
count <- 0L
while (!ready && count < max_rejections) {
## Produce shuffled indices in 1:n
idxs <- sample.int(n, size = n, replace = FALSE)
## Shuffle weights accordingly
w_idxs <- w[idxs]
## Calculate weights per set
ws <- lapply(pidxs, FUN = function(partition) sum(w_idxs[partition]))
ws <- unlist(ws, use.names = FALSE)
ready <- all(abs(ws - w_target) <= w_tolerance)
count <- count + 1L
}
if (ready) {
partitions <- lapply(pidxs, FUN = function(partition) idxs[partition])
stopifnot(length(ws) == length(partitions))
attr(partitions, "weights") <- ws
} else {
partitions <- NA
}
attr(partitions, "w_target") <- w_target
attr(partitions, "w_tolerance") <- w_tolerance
attr(partitions, "counts") <- count
partitions
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.