obtain_clusters <- function(x) { #nocov start
clusters <- suppressWarnings(map(2:6, ~fanny(x, .)))
which_clust <- which.max(map_dbl(clusters, ~.$silinfo$avg.width))
points_clustered <- map(1:(which_clust+1), function(i) {
x[clusters[[which_clust]]$clustering == i,]
})
cluster_data <- map(points_clustered, function(a) {
return(list(mu = apply(a, 2, mean),
sigma = var(a),
siginv = tryCatch(
chol2inv(chol(var(a))),
error = function(e) {
return(ginv(var(a)))
}
)
))
})
return(list(
clusters = cluster_data,
total = list(
mu = apply(x, 2, mean), sigma = var(x),
siginv = tryCatch(
chol2inv(chol(var(x))),
error = function(e) {
return(ginv(var(x)))
})
))
)
}
predict_cluster <- function(x, clusters) {
vals <- do.call('rbind', map(clusters, function(c) {
point_diffs <- data.matrix(sweep(x, 2, c$mu))
return(diag(point_diffs %*% c$siginv %*% t(point_diffs)))
}))
if (is.null(dim(vals))) return(which.min(vals))
return(apply(vals, 2, which.min))
}
idemc_step <- function(ems, targets, points, point_imps, ladder, clusters,
order_active, ranges, M = 10, pm = 0.9, w = 0.8) {
input_names <- names(ems[[1]]$ranges)
in_range <- function(x, ranges) {
is_in <- map_lgl(seq_along(ranges), ~x[[.]] >= ranges[[.]][1] &&
x[[.]] <= ranges[[.]][2])
return(all(is_in))
}
proposal <- points
prop_imps <- point_imps
n <- length(points)
nmutate <- M
ncross <- ceiling((n+1)/2)
nex <- n+1
do_mutate <- runif(1)
if (do_mutate <= pm) {
## Do the mutation
for (j in seq_along(points)) {
if (j == 1) {
proposal[[j]] <- setNames(
data.frame(
matrix(
map_dbl(
clusters[[j]]$ranges,
~runif(1, .[[1]], .[[2]])
), nrow = 1
)
), input_names
)
prop_imps[[j]] <- nth_implausible(ems, proposal[[j]], targets, max_imp = Inf)
}
else {
for (i in 1:M) {
which_clust <- predict_cluster(points[[j]], clusters[[j]]$clusters)
this_clust <- clusters[[j]]$clusters[[which_clust]]
clust_mean <- this_clust$mu
clust_var <- this_clust$sigma
all_mean <- clusters[[j]]$total$mu
all_var <- clusters[[j]]$total$sigma
if (runif(1) < w)
new_point <- mvtnorm::rmvnorm(1, mean = clust_mean, sigma = clust_var)
else
new_point <- mvtnorm::rmvnorm(1, mean = all_mean, sigma = all_var)
new_clust <- predict_cluster(new_point, clusters[[j]]$clusters)
new_var <- clusters[[j]]$clusters[[new_clust]]$sigma
new_imp <- nth_implausible(ems, setNames(data.frame(matrix(new_point, nrow = 1)),
input_names),
targets, max_imp = Inf)
if (new_imp <= ladder[[j]] && in_range(new_point, ranges)) {
dp <- unlist(proposal[[j]])
np <- new_point[1,]
accept_num <- (w * mvtnorm::dmvnorm(dp, np, clust_var) +
(1-w) * mvtnorm::dmvnorm(dp, np, all_var))
accept_denom <- (w * mvtnorm::dmvnorm(np, dp, new_var) +
(1-w) * mvtnorm::dmvnorm(np, dp, all_var))
prob_accept <- min(accept_num/accept_denom, 1)
if (!is.numeric(prob_accept)) prob_accept <- 0
accepted <- if (runif(1) <= prob_accept) new_imp else NULL
}
else accepted <- NULL
if (!is.null(accepted)) {
proposal[[j]] <- setNames(data.frame(matrix(new_point, nrow = 1)),
input_names)
prop_imps[[j]] <- new_imp
}
}
}
}
}
## Do the crossover
else {
for (i in seq_len(ncross)) {
index1 <- sample(1:n, 1, prob = map_dbl(1:n, ~2*./(n*(n+1))))
if (n == 2)
index2 <- ifelse(index1 == 1, 2, 1)
else
index2 <- sample((1:n)[-index1], 1,
prob = map_dbl((1:n)[-index1],
~2*(n+1-.)/((n-2)*(n+1)+2*index1)))
x1 <- proposal[[index1]][,order_active]
x2 <- proposal[[index2]][,order_active]
c_point <- sample(1:(length(proposal[[1]])-1), 1)
y1 <- unlist(c(x1[1:c_point], x2[(c_point+1):length(x2)]),
use.names = FALSE)
y2 <- unlist(c(x2[1:c_point], x1[(c_point+1):length(x1)]),
use.names = FALSE)
point_df <- rbind.data.frame(x1, x2) |> setNames(input_names[order_active])
point_df <- point_df[,input_names]
imps <- nth_implausible(ems, point_df, targets, max_imp = Inf)
if (imps[[1]] <= ladder[[index1]] &&
imps[[2]] <= ladder[[index2]] &&
in_range(point_df[1,], ranges) &&
in_range(point_df[2,], ranges)) {
proposal[[index1]] <- point_df[1,]
proposal[[index2]] <- point_df[2,]
prop_imps[[index1]] <- imps[[1]]
prop_imps[[index2]] <- imps[[2]]
}
}
}
## Do the exchange
for (i in seq_len(nex)) {
index1 <- sample(n, 1)
if (index1 == 1) index2 <- 2
else if (index1 == n) index2 <- n-1
else index2 <- index1 + sample(c(-1, 1), 1)
if (index1 > index2) {
index1 <- index1-1
index2 <- index2+1
}
if (prop_imps[[index1]] <= ladder[[index2]]) {
temp_point <- proposal[[index1]]
proposal[[index1]] <- proposal[[index2]]
proposal[[index2]] <- temp_point
temp_imp <- prop_imps[[index1]]
prop_imps[[index1]] <- prop_imps[[index2]]
prop_imps[[index2]] <- temp_imp
}
}
return(list(points = proposal, imps = prop_imps))
}
#' IDEMC Point Generation
#'
#' Performs Implausibility-driven Evolutionary Monte Carlo
#'
#' This method for generating points is focused on finding non-implausible
#' regions that are either extremely small relative to the initial parameter
#' domain, or have interesting structure (particularly disconnected structure)
#' that would potentially be overlooked by more standard point generation methods.
#' The method is robust but computationally intensive, compared to normal methods,
#' and should not be used as a default - see \code{\link{generate_new_design}} for
#' less computationally expensive methods.
#'
#' The IDEMC method operates on an 'implausibility ladder', in the vein of common
#' annealing methods. Each rung of the ladder is characterised by the implausibility
#' threshold, and determinations are made about the structure of the points in each
#' rung using clustering. One step of the evolutionary algorithm can consist of the
#' following steps:
#'
#' Mutation. A point is modified using a random-walk proposal, which can be a global
#' move or a within-cluster move. Within-cluster moves are chosen with probability
#' \code{w}. The move is retained if the new point satisfies the implausibility
#' constraints of the rung.
#'
#' Crossover. Points are re-organised in descending order of how active each variable
#' is for the emulated outputs, and two different rungs are selected randomly. The
#' points are 'mixed' using one-point real crossover at a random crossover point,
#' producing two new points. The move is retained if both new points satisfy the
#' relevant implausibility constraints of their rung.
#'
#' Exchange. Two adjacent rungs are chosen and their points are swapped. The move
#' is retained if the higher-implausibility rung is appropriate for being in the
#' lower implausibility rung.
#'
#' At a given step, one of mutation or crossover is performed, with probability
#' of mutation being chosen determined by \code{pm}. If mutation is chosen, then
#' \code{M} mutation moves are performed; else \code{(n+1)/2} crossover moves are
#' performed on the \code{n} rungs. Exchange is always performed and \code{n+1} such
#' moves are performed.
#'
#' The choice of 'implausibility ladder' and clusters can be determined a priori,
#' or else this function performs a burn-in phase to determine them. Points are
#' generated using the idemc steps at the current rungs, and the next ladder rung
#' implausibility is chosen by requiring that a proportion \code{p} of points from
#' the previous rung are accepted in the new one. At each stage, \code{s} idemc
#' steps are performed. Once the final rung has implausibility no larger than the
#' desired \code{cutoff}, a final set of \code{sn} idemc steps are performed across
#' all rungs to determine final clusters.
#'
#' @importFrom cluster fanny
#'
#' @param ems The emulators to evaluate implausibility on
#' @param N The desired number of final points to generate
#' @param targets The target values for the emulated outputs
#' @param cutoff The desired implausibility cutoff of the final proposal
#' @param s The number of points to generate at intermediate burn-in steps
#' @param sn The number of points to generate at the final burn-in stage
#' @param p The proportion of space that should remain between ladder rungs
#' @param thin The thinning factor: a factor T means N*T points are generated to obtain N
#' @param pm The probability that an idemc step will use mutation moves
#' @param w The probability of local random walks in the mutation step
#' @param M The number of mutations to perform in an IDEMC step
#' @param detailed If TRUE, points proposed at every rung will be returned
#' @param verbose Should information about burn-in be displayed during the process?
#' @param get_burnt If TRUE, the procedure stops after burn-in, returning seeding for a full IDEMC proposal
#' @param burnt If provided, this is assumed to be the result of a burn-in (or a priori analysis)
#'
#' @return Either a list of data.frames, one per rung, or a single data.frame of points.
#'
#' @seealso \code{\link{generate_new_design}} for more standard point generation methods
#'
#' @references
#' Vernon & Williamson (2013) \doi{10.48550/arXiv.1309.3520}
#'
#' @export
#'
#' @examples
#' \donttest{ # Excessive runtime
#' idemc_res <- idemc(SIREmulators$ems, 200, SIREmulators$targets, s = 100, p = 0.3)
#' }
#'
idemc <- function(ems, N, targets, cutoff = 3, s = max(500, ceiling(N/5)),
sn = s, p = 0.4, thin = 1, pm = 0.9, w = 0.8, M = 10,
detailed = FALSE, verbose = interactive(),
get_burnt = FALSE, burnt = NULL) {
ems <- collect_emulators(ems)
ranges <- getRanges(ems, FALSE)
order_active <- order(
apply(do.call('rbind', map(ems, "active_vars")), 2, sum),
decreasing = TRUE)
if (is.null(burnt) ||
is.null(burnt$points) ||
is.null(burnt$clusters) ||
is.null(burnt$imps) ||
is.null(burnt$ladder)) {
# Burn-in stage one: identify all rungs
points <- setNames(do.call('cbind.data.frame',
map(ranges, ~runif(s, .[[1]], .[[2]]))),
names(ranges))
clusters_list <- list(list(ranges = ranges))
ladder = c(Inf)
imps <- nth_implausible(ems, points, targets)
imp_cutoff <- sort(imps)[floor(p*s)+1]
orig_imp <- max(imps)
point_imps <- c(Inf)
points_list <- list(points[nrow(points),])
while (imp_cutoff > cutoff) {
if (verbose) cat(imp_cutoff, "\n")
next_points <- points[imps <= imp_cutoff,]
next_cluster <- obtain_clusters(next_points)
points_list[[length(points_list)+1]] <- next_points[nrow(next_points),]
all_points <- list()
for (i in seq_along(points_list)) {
df <- data.frame(matrix(0, nrow = s+1, ncol = length(ranges))) |>
setNames(names(ranges))
df[1,] <- points_list[[i]]
all_points[[length(all_points)+1]] <- df
}
clusters_list[[length(clusters_list)+1]] <- next_cluster
ladder <- c(ladder, imp_cutoff)
point_imps <- c(point_imps, imps[imps <= imp_cutoff][sum(imps <= imp_cutoff)])
all_imps <- matrix(0, nrow = s+1, ncol = length(points_list))
all_imps[1,] <- point_imps
if (!verbose || !requireNamespace("progressr", quietly = TRUE)) {
for (i in seq_len(s)) {
these_points <- map(all_points, ~.[i,])
idemc_res <- idemc_step(ems, targets, these_points, all_imps[i,],
ladder, clusters_list, order_active, ranges,
M = M, pm = pm, w = w)
all_imps[i+1,] <- idemc_res$imps
for (j in seq_along(idemc_res$points)) {
all_points[[j]][i+1,] <- idemc_res$points[[j]]
}
}
}
else {
progressr::with_progress({
prog <- progressr::progressor(steps = s)
for (i in seq_len(s)) {
these_points <- map(all_points, ~.[i,])
idemc_res <- idemc_step(ems, targets, these_points, all_imps[i,],
ladder, clusters_list, order_active, ranges,
M = M, pm = pm, w = w)
all_imps[i+1,] <- idemc_res$imps
for (j in seq_along(idemc_res$points)) {
all_points[[j]][i+1,] <- idemc_res$points[[j]]
}
prog(message = sprintf("IDEMC generation (%g rungs) step %g", length(all_points), i))
}
})
}
points <- all_points[[length(all_points)]]
clusters_list <- map(seq_along(all_points), function(i) {
if (i == 1) return(list(ranges = ranges))
return(obtain_clusters(all_points[[i]]))
})
imps <- all_imps[,ncol(all_imps)]
imp_cutoff <- sort(imps)[floor(p*s)+1]
points_list <- map(all_points, ~.[nrow(.),])
}
if (imp_cutoff < cutoff) imp_cutoff <- cutoff
if (verbose) cat(imp_cutoff, "\n")
next_points <- points[imps <= imp_cutoff,]
next_cluster <- obtain_clusters(next_points)
points_list[[length(points_list)+1]] <- next_points[nrow(next_points),]
all_points <- list()
for (i in seq_along(points_list)) {
df <- data.frame(matrix(0, nrow = sn+1, ncol = length(ranges))) |>
setNames(names(ranges))
df[1,] <- points_list[[i]]
all_points[[length(all_points)+1]] <- df
}
clusters_list[[length(clusters_list)+1]] <- next_cluster
ladder <- c(ladder, imp_cutoff)
point_imps <- c(point_imps, imps[imps <= imp_cutoff][sum(imps <= imp_cutoff)])
all_imps <- matrix(0, nrow = sn+1, ncol = length(points_list))
all_imps[1,] <- point_imps
# Burn-in stage two: generate points to determine clusters
if (sn != s) {
if (!verbose || !requireNamespace("progressr", quietly = TRUE)) {
for (i in 1:sn) {
these_points <- map(all_points, ~.[i,])
idemc_res <- idemc_step(ems, targets, these_points, all_imps[i,],
ladder, clusters_list, order_active, ranges,
M = M, pm = pm, w = w)
all_imps[i+1,] <- idemc_res$imps
for (j in seq_along(idemc_res$points))
all_points[[j]][i+1,] <- idemc_res$points[[j]]
}
}
else {
progressr::with_progress({
prog <- progressr::progressor(steps = sn)
for (i in 1:sn) {
these_points <- map(all_points, ~.[i,])
idemc_res <- idemc_step(ems, targets, these_points, all_imps[i,],
ladder, clusters_list, order_active, ranges,
M = M, pm = pm, w = w)
all_imps[i+1,] <- idemc_res$imps
for (j in seq_along(idemc_res$points))
all_points[[j]][i+1,] <- idemc_res$points[[j]]
prog(message = sprintf("IDEMC burn-in Final %g", i))
}
})
}
points <- all_points[[length(all_points)]]
points_list <- map(all_points, ~.[nrow(.),])
clusters_list <- map(seq_along(all_points), function(i) {
if (i == 1) return(list(ranges = ranges))
return(obtain_clusters(all_points[[i]]))
})
point_imps <- all_imps[nrow(all_imps),]
}
# Burn-in complete. Generate actual points
if (verbose) cat("Burn-in completed: implausibility ladder is (",
round(orig_imp, 3), "; ", paste0(round(ladder[-1], 3),
collapse = "; "), ").\n",
sep = "")
if (get_burnt)
return(
list(points = points_list,
clusters = clusters_list,
imps = point_imps,
ladder = ladder)
)
}
else {
points_list <- burnt$points
clusters_list <- burnt$clusters
point_imps <- burnt$imps
ladder <- burnt$ladder
}
if (verbose) cat("Performing final point generation...\n")
all_points <- list()
for (i in seq_along(points_list)) {
df <- data.frame(matrix(0, nrow = N*thin+1, ncol = length(ranges))) |>
setNames(names(ranges))
df[1,] <- points_list[[i]]
all_points[[length(all_points)+1]] <- df
}
all_imps <- matrix(0, nrow = N*thin+1, ncol = length(points_list))
all_imps[1,] <- point_imps
if (!verbose || !requireNamespace("progressr", quietly = TRUE)) {
for (i in seq_len(N*thin)) {
these_points <- map(all_points, ~.[i,])
idemc_res <- idemc_step(ems, targets, these_points, all_imps[i,],
ladder, clusters_list, order_active, ranges,
M = M, pm = pm, w = w)
all_imps[i+1,] <- idemc_res$imps
for (j in seq_along(idemc_res$points))
all_points[[j]][i+1,] <- idemc_res$points[[j]]
}
}
else {
progressr::with_progress({
prog <- progressr::progressor(steps = N*thin)
for (i in seq_len(N*thin)) {
these_points <- map(all_points, ~.[i,])
idemc_res <- idemc_step(ems, targets, these_points, all_imps[i,],
ladder, clusters_list, order_active, ranges,
M = M, pm = pm, w = w)
all_imps[i+1,] <- idemc_res$imps
for (j in seq_along(idemc_res$points))
all_points[[j]][i+1,] <- idemc_res$points[[j]]
prog(message = sprintf("IDEMC Generation Step %g", i))
}
})
}
all_points <- map(all_points, ~.[-1,])
all_imps <- all_imps[-1]
if (thin > 1) {
all_points <- map(all_points, ~.[seq(thin, N*thin, by = thin),])
all_imps <- all_imps[seq(thin, N*thin, by = thin),]
}
if (detailed) return(all_points)
return(all_points[[length(all_points)]])
} #nocov end
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.