Nothing
#' Estimate Partition Function
#'
#' Estimate the logarithm of the partition function of the Mallows rank model. Choose
#' between the importance sampling algorithm described in
#' \insertCite{vitelli2018}{BayesMallows} and the IPFP algorithm for computing an
#' asymptotic approximation described in \insertCite{mukherjee2016}{BayesMallows}.
#'
#' @param method Character string specifying the method to use in order to
#' estimate the logarithm of the partition function. Available options are
#' \code{"importance_sampling"} and \code{"asymptotic"}.
#'
#' @param alpha_vector Numeric vector of \eqn{\alpha} values over which to
#' compute the importance sampling estimate.
#'
#' @param n_items Integer specifying the number of items.
#'
#' @param metric Character string specifying the distance measure to use.
#' Available options are \code{"footrule"} and \code{"spearman"} when
#' \code{method = "asymptotic"} and in addition \code{"cayley"}, \code{"hamming"},
#' \code{"kendall"}, and \code{"ulam"} when \code{method = "importance_sampling"}.
#'
#' @param nmc Integer specifying the number of Monte Carlo samples to use in the
#' importance sampling. Only used when \code{method = "importance_sampling"}.
#'
#' @param degree Integer specifying the degree of the polynomial used to
#' estimate \eqn{\log(\alpha)} from the grid of values provided by the
#' importance sampling estimate.
#'
#' @param n_iterations Integer specifying the number of iterations to use in the
#' asymptotic approximation of the partition function. Only used when
#' \code{method = "asymptotic"}.
#'
#' @param K Integer specifying the parameter \eqn{K} in the
#' asymptotic approximation of the partition function. Only used when
#' \code{method = "asymptotic"}.
#'
#' @return A vector of length \code{degree} which can be supplied to the
#' \code{logz_estimate} argument of \code{\link{compute_mallows}}.
#'
#' @param cl Optional computing cluster used for parallelization, returned
#' from \code{parallel::makeCluster}. Defaults to \code{NULL}. Only used when
#' \code{method = "importance_sampling"}.
#'
#' @param seed Optional random number seed.
#'
#' @export
#'
#' @references \insertAllCited{}
#'
#' @example /inst/examples/estimate_partition_function_example.R
#' @family preprocessing
#'
estimate_partition_function <- function(method = "importance_sampling",
alpha_vector, n_items, metric,
nmc, degree, n_iterations, K, cl = NULL,
seed = NULL) {
stopifnot(degree < length(alpha_vector))
if (method == "importance_sampling") {
if (!is.null(cl)) {
# Split nmc into each cluster
nmc_vec <- rep(floor(nmc / length(cl)), length(cl))
i <- 1
while (sum(nmc_vec) != nmc) {
nmc_vec[i] <- nmc_vec[i] + 1
if (i > length(cl)) break
}
parallel::clusterExport(cl, c("alpha_vector", "n_items", "metric", "seed"),
envir = environment()
)
estimates <- parallel::parLapply(cl, nmc_vec, function(x) {
if (!is.null(seed)) set.seed(seed)
compute_importance_sampling_estimate(
alpha_vector = alpha_vector, n_items = n_items,
metric = metric, nmc = x
)
})
log_z <- rowMeans(do.call(cbind, estimates))
} else {
if (!is.null(seed)) set.seed(seed)
log_z <- as.numeric(
compute_importance_sampling_estimate(
alpha_vector = alpha_vector, n_items = n_items,
metric = metric, nmc = nmc
)
)
}
# Compute the estimate at each discrete alpha value
estimate <- data.frame(
alpha = alpha_vector,
log_z = log_z
)
} else if (method == "asymptotic") {
stopifnot(metric %in% c("footrule", "spearman"))
estimate <- data.frame(
alpha = alpha_vector,
log_z = as.numeric(
asymptotic_partition_function(
alpha_vector = alpha_vector, n_items = n_items,
metric = metric, K = K, n_iterations = n_iterations
)
)
)
}
# Fit a regression model
form <- stats::as.formula(paste(
"log_z ~ ",
paste("I( alpha^", seq(from = 1, to = degree, by = 1), ")",
collapse = "+"
)
))
stats::lm(form, data = estimate)$coefficients
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.