#' Simulate from \code{PlackettLuce} fitted objects
#'
#' @inheritParams stats::simulate
#'
#' @param multinomial use multinomial sampling anyway? Default is
#' \code{FALSE}. see Details.
#' @param max_combinations a positive number. Default is
#' \code{20000}. See Details.
#' @param seed an object specifying if and how the random number
#' generator should be initialised. Either \code{NULL} or an
#' integer that will be used in a call to \code{set.seed} before
#' simulating the rankings. If set, the value is saved as the
#' \code{seed} attribute of the returned value. The default,
#' \code{NULL}, will not change the random generator state, and
#' return \code{.Random.seed} as the \code{seed} attribute.
#'
#' @details
#'
#' If \code{multinomial} is \code{FALSE} (default) and there are no
#' tie parameters in the object (i.e. \code{length(object$ties) == 1}),
#' then rankings are sampled by ordering exponential random variates
#' with rate 1 scaled by the estimated item-worth parameters
#' \code{object$coefficients} (see, Diaconis, 1988, Chapter 9D for
#' details).
#'
#' In all other cases, the current implementation uses direct
#' multinomial sampling, and will throw an error if there are more
#' than \code{max_combinations} combinations of items that the sampler
#' has to decide from. This is a hard-coded exit to prevent issues
#' relating to the creation of massive objects in memory.
#'
#' If \code{length(object$ties) > 1} the user's setting for
#' \code{multinomial} is ignored and \code{simulate.PlackettLuce} operates as if
#' \code{multinomial} is \code{TRUE}.
#'
#' @return A \code{data.frame} of \code{\link{rankings}} objects of the same
#' dimension as \code{object$rankings}.
#'
#' @references
#'
#' Diaconis (1988). \emph{Group Representations in Probability and
#' Statistics}. Institute of Mathematical Statistics Lecture Notes
#' 11. Hayward, CA.
#'
#' @examples
#' R <- matrix(c(1, 2, 0, 0,
#' 4, 1, 2, 3,
#' 2, 1, 1, 1,
#' 1, 2, 3, 0,
#' 2, 1, 1, 0,
#' 1, 0, 3, 2), nrow = 6, byrow = TRUE)
#' colnames(R) <- c("apple", "banana", "orange", "pear")
#' mod <- PlackettLuce(R)
#' simulate(mod, 5)
#'
#' s1 <- simulate(mod, 3, seed = 112)
#' s2 <- simulate(mod, 2, seed = 112)
#'
#' identical(s1[1:2], s2[1:2])
#'
#' @importFrom stats rexp runif simulate
#' @method simulate PlackettLuce
#' @export
simulate.PlackettLuce <- function(object, nsim = 1L, seed = NULL,
multinomial = FALSE,
max_combinations = 2e+04, ...) {
if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
runif(1L)
if (is.null(seed))
RNGstate <- get(".Random.seed", envir = .GlobalEnv)
else {
R.seed <- get(".Random.seed", envir = .GlobalEnv)
set.seed(seed)
RNGstate <- structure(seed, kind = as.list(RNGkind()))
on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
}
rankings <- unclass(object$rankings)
N <- ncol(rankings)
n_rankings <- nrow(rankings)
id <- seq(length(object$coefficients) - length(object$ties) + 1L)
alpha <- object$coefficients[id]
delta <- numeric(max(object$ties))
delta[object$ties] <- c(1.0, unname(object$coefficients[-id]))
opt <- seq_len(N)
sets <- as.list(numeric(n_rankings))
for (i in seq_len(n_rankings)) {
sets[[i]] <- opt[rankings[i,] > 0L]
}
len <- lengths(sets)
## If there are no ties use Louis Gordon (1983, Annals of Stat);
## Diaconis (1988, Chapter 9D)
if (length(object$ties) == 1L & !multinomial) {
sampler <- function(objects) {
v <- numeric(N)
len <- length(objects)
ordering <- objects[order(rexp(len, rate = 1L)/alpha[objects],
decreasing = FALSE)]
v[ordering] <- seq.int(len)
v
}
}
else {
## NOTE, IK 10/12/2017: This is an inefficient computation
## with potentially massive objects FIX, IK 10/12/2017: Remove
## dependence on all combinations. For now a preventive stop
## if n_combinations is more than max_combinations
n_combinations <- sum(choose(N, object$ties))
if (n_combinations > max_combinations) {
stop(paste("simulate.PlackettLuce needs to decide between",
n_combinations,
"combinations of items, but 'max_combinations' is",
max_combinations))
}
## Get all possible combinations of objects
combinations <- NULL
for (j in object$ties) {
combinations <- c(combinations, combn(opt, j, simplify = FALSE))
}
## Unormalized probabilities of all combinations
probs <- vapply(combinations, function(z) {
delta[length(z)] * prod(alpha[z])^(1L/length(z))
}, 1)
## NOTE, IK 10/12/2017: Normalization is done internally by sample.int
sampler <- function(objects) {
v <- numeric(N)
j <- 1L
indices <- rep(TRUE, n_combinations)
while (length(objects)) {
## find out which combinations have all of their objects
## included in `objects`
indices[indices] <- vapply(combinations[indices], function(x) {
all(x %in% objects)
}, TRUE)
## sample, after setting the probability of all other
## combinations to zero
sampled <- combinations[[sample.int(n_combinations, 1L,
prob = probs * indices)]]
## remove the sampled objects from `objects`
objects <- objects[-match(sampled, objects, nomatch = 0L)]
v[sampled] <- j
j <- j + 1L
}
v
}
}
out <- replicate(nsim, {
R <- t(vapply(sets, sampler, numeric(N)))
colnames(R) <- colnames(rankings)
as.rankings(R)
}, simplify = FALSE)
out <- data.frame(out)
names(out) <- paste("sim", seq_len(nsim), sep = "_")
attr(out, "seed") <- RNGstate
out
}
## TODO, IK, 10/12/2017:
## * Handling of weights
## * Avoid constructing all combinations
## * Exploit ideas for multinomial sampling with massive number of categories
## * Add parallelization capabilities
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.