# Helper Functions for uniform sphere sampling
#'
#' @importFrom stats rexp rnorm
runifs <- function(n, d, c = rep(0, d), r = 1) {
init_points <- matrix(rnorm(n*d, 0, 1), nrow = n, byrow = T)
exps <- rexp(n, 0.5)
denoms <- (exps + apply(init_points, 1, function(x) sum(x^2)))^(0.5)
swept <- sweep(init_points, 1, denoms, "/")
return(t(apply(swept, 1, function(x) x*r + c)))
}
punifs <- function(x, c = rep(0, length(x)), r = 1) {
return(ifelse(sum((x-c)^2 / r^2) <= 1, 1, 0))
}
#' Generate Simulator Runs
#'
#' A wrapper for a variety of sampling methods.
#' Given a set of trained emulators, finds the next set of points that will be informative
#' for the next wave of emulators.
#'
#' If the method is 'lhs', this creates a new training set using LHS, and
#' then finds the trace of the variance matrix of the emulators across these points (this is
#' broadly equivalent to using V-optimality). We repeat this for \code{n_runs}, and select
#' the configuration that minimises the mean of the variances across the emulators.
#' If observations are given, then these are used to ensure that the new sample points are
#' non-implausible according to the current emulators.
#'
#' If the method is 'slice', then a known set of non-implausible points \code{plausible_set}
#' must be provided.
#' It then applies slice sampling, using implausibility as a measure of success.
#'
#' If the method is 'optical', then the optical depth of the space in each parameter direction
#' is calculated (using a known set of non-implausible points \code{plausible_set}),
#' and used as a distribution for that parameter. Points are sampled from the
#' collection of distributions and non-implausible points generated are filtered out. From the
#' remaining points, a sample of the required size is generated using maximin criterion.
#'
#' If the method is 'importance', importance sampling is used. Starting from a set of
#' non-implausible (preferably space-filling) points, points are sampled from a distribution
#' around the points, and included in the output based on a weighted measure gained from the
#' mixture distribution of the initial points. The set \code{plausible_set} must be specified.
#' If \code{burn_in} is TRUE, then a burn-in phase is used to determine the optimal parameters
#' for the proposal distribution.
#'
#' Note that the \code{plausible_set} parameter size differs between the methods that use
#' it. The optical set should be as large as possible
#' in order to accurately represent the optical depth in each parameter direction; the set for
#' importance sampling and slice sampling should be smaller (and probably smaller than the
#' desired number of output points) in order to expedite the initial set-up of the sampling
#' strategy.
#'
#' For any sampling strategy, the parameters \code{emulators}, \code{ranges} and
#' \code{z} must be specified.
#'
#' If \code{line_sample} is \code{TRUE}, then the boundaries of the space are explored as follows.
#' The plausible set provided (or that generated by LHS with rejection) is used as a base set, and
#' lines are chosen connecting points in the set. A number of points are sampled along these
#' lines (extending beyond the given points) and are tested for non-implausibility. Any that lie
#' on the edge of the non-implausible region are added to the set.
#'
#' These methods will not necessarily work if the target space is very small, or it may miss
#' parts of the target space if it is disconnected. For such target spaces, consider using the
#' much more computationally intensive \code{\link{IDEMC}}.
#'
#' @importFrom mvtnorm dmvnorm rmvnorm
#' @importFrom stats setNames runif dist
#'
#' @param emulators A list of \code{\link{Emulator}} objects, trained on the design points
#' @param ranges The ranges of the input parameters
#' @param n_points Optional. Specifies how many additional points are required. Default: 10*(number of emulators)
#' @param z Checks implausibility of sample points to restrict to only non-implausible points.
#' @param method Any of 'lhs', 'slice', 'optical'.
#' @param cutoff Optional. If z is given, this is the implausibility cutoff for the filtering. Default = 3
#' @param nth Optiional. To be passed to the n parameter of nth implausible. Default = 1.
#' @param include_line Should line sampling be applied after point generation? Default: TRUE.
#' @param plausible_set Optional - a set of non-implausible points from which to start.
#' @param burn_in If importance sampling, should a burn-in phase be used? Default: FALSE
#' @param verbose Should progress statements by made? Default: TRUE
#' @param ... Any parameters that need to be passed to a particular method (see below)
#'
#' @return A \code{data.frame} containing the set of new points to simulate at.
#'
#' @seealso \code{\link{IDEMC}} for point generation in small target regions.
#'
#' @export
#'
#' @examples
#' ranges <- list(aSI = c(0.1, 0.8), aIR = c(0, 0.5), aSR = c(0, 0.05))
#' ems <- emulator_from_data(GillespieSIR, output_names = c('nS', 'nI', 'nR'),
#' ranges = ranges, quadratic = TRUE)
#' trained_ems <- purrr::map(seq_along(ems),
#' ~ems[[.x]]$adjust(GillespieSIR, c('nS', 'nI', 'nR')[[.x]]))
#' targets <- list(
#' list(val = 281, sigma = 10.43),
#' list(val = 30, sigma = 11.16),
#' list(val = 689, sigma = 14.32)
#' )
#' non_imp_points <- GillespieImplausibility[GillespieImplausibility$I <= 4, names(ranges)]
#' \donttest{
#' pts_default <- generate_new_runs(trained_ems, ranges, 10, targets, cutoff = 3)
#' pts_lhs <- generate_new_runs(trained_ems, ranges, 10, targets, cutoff = 3, method = 'lhs')
#' pts_slice <- generate_new_runs(trained_ems, ranges, 10, targets,
#' method = 'slice', cutoff = 4, plausible_set = non_imp_points, include_line = FALSE)
#' pts_optical <- generate_new_runs(trained_ems, ranges, 10, targets,
#' method = 'optical', cutoff = 4, plausible_set = non_imp_points, include_line = FALSE)
#' non_imp_sample <- non_imp_points[sample(seq_along(non_imp_points[,1]), 20),]
#' pts_importance <- generate_new_runs(trained_ems, ranges, 10, targets,
#' method = 'importance', cutoff = 4, plausible_set = non_imp_sample, include_line = FALSE)}
generate_new_runs <- function(emulators, ranges, n_points = 10*length(ranges), z, method = 'importance', include_line = TRUE, cutoff = 3, nth = 1, plausible_set, burn_in = FALSE, verbose = TRUE, ...) {
if (missing(plausible_set) || method == 'lhs') {
if (method != 'lhs' && verbose) print("Performing LH sampling with rejection...")
points <- lhs_generation(emulators, ranges, n_points, z, cutoff = cutoff, verbose = verbose, nth = nth)
if(nrow(points) == 0) {
warning("No points generated from initial LHS.")
return(points)
}
been_checked <- TRUE
}
else {
points <- plausible_set
been_checked <- FALSE
}
if (method == 'lhs') {
if (verbose) print("Performing LH sampling with rejection...")
return(points)
}
if (include_line) {
if (verbose) print("Performing line sampling...")
points <- line_sample(emulators, points, z, ranges, nth = nth)
}
if (method == 'importance') {
if (verbose) print("Performing importance sampling...")
if (!burn_in)
sd <- purrr::map_dbl(ranges, ~.[[2]]-.[[1]])/4
else
sd <- NULL
points <- importance_sample(emulators, ranges, n_points, z, cutoff, sd, plausible_set = points, burn_in = burn_in, checked = been_checked, nth = nth, ...)
}
else if (method == 'slice') {
if (verbose) print("Performing slice sampling...")
points <- slice_generation(emulators, ranges, n_points, z, cutoff, x = unlist(points[sample(nrow(points), 1),]), ...)
}
else if (method == 'optical')
{
if (verbose) print("Performing optical depth sampling...")
points <- optical_depth_generation(emulators, ranges, n_points, z, cutoff, plausible_set = points)
}
else stop("Unknown method used. Options are 'lhs', 'slice', 'optical', 'importance'.")
return(points)
}
# A function to perform lhs sampling
lhs_generation <- function(emulators, ranges, n_points, z, n_runs = 100, cutoff = 3, verbose = TRUE, nth = 1, measure.method = 'V_optimal') {
current_trace = NULL
out_points <- NULL
new_points <- eval_funcs(scale_input, setNames(data.frame(2*(lhs::randomLHS(n_points*20, length(ranges))-1/2)), names(ranges)), ranges, FALSE)
if (!missing(z)) new_points <- new_points[nth_implausible(emulators, new_points, z, n = nth)<=cutoff,]
if (!"data.frame" %in% class(new_points)) new_points <- setNames(data.frame(new_points), names(ranges))
if (length(new_points[,1]) < n_points) {
if (verbose) message(cat("Only", length(new_points[,1]), "points generated."))
if (length(new_points[,1]) == 0) return(new_points)
return(setNames(data.frame(new_points), names(ranges)))
}
#else message(cat(length(new_points[,1]), "non-implausible points generated. Applying V-optimality..."))
for (i in 1:n_runs)
{
test_points <- new_points[sample(seq_along(new_points[,1]), n_points),]
if(!"data.frame" %in% class(test_points)) test_points <- setNames(data.frame(test_points), names(ranges))
if (measure.method == 'maximin') {
measure <- min(dist(test_points))
if (is.null(current_trace) || measure > current_trace) {
out_points <- test_points
current_trace <- measure
}
}
else if (measure.method == 'V_optimal') {
measure <- mean(purrr::map_dbl(seq_along(emulators), ~sum(emulators[[.x]]$get_cov(test_points))))
if (is.null(current_trace) || measure < current_trace) {
out_points <- test_points
current_trace <- measure
}
}
else stop("Unknown optimality method specified.")
}
return(setNames(data.frame(out_points), names(ranges)))
}
# A function to perform slice sampling
slice_generation <- function(emulators, ranges, n_points, z, cutoff = 3, x) {
J <- function(x) {
for (i in 1:length(emulators)) {
if (!emulators[[i]]$implausibility(x, z[[i]], cutoff)) return(FALSE)
}
return(TRUE)
}
out_points <- x
x0 <- x_new <- c(out_points, use.names = F)
for (i in 1:n_points) {
for (j in 1:length(ranges)) {
xl = ranges[[j]][1]
xr = ranges[[j]][2]
repeat {
x_new[j] <- runif(1, min = xl, max = xr)
if (J(data.frame(matrix(x_new, nrow = 1)) %>% setNames(names(ranges)))) break
ifelse(x_new[j]<x0[j], xl <- x_new[j], xr <- x_new[j])
}
}
out_points <- cbind(out_points, x_new)
x0 <- x_new
}
return(setNames(data.frame(t(out_points), row.names = NULL), names(ranges))[2:n_points+1,])
}
# A function to perform optical depth sampling
optical_depth_generation <- function(emulators, ranges, n_points, z, n_runs = 100, cutoff = 3, plausible_set, ...) {
get_depth <- function(plausible_set, ranges, var_name, nbins = 100) {
output <- c()
varseq <- seq(ranges[[var_name]][1], ranges[[var_name]][2], length.out = (nbins+1))
for (i in 1:(length(varseq)-1)) {
opdepth <- length(plausible_set[plausible_set[[var_name]]>=varseq[i] & plausible_set[[var_name]]<varseq[i+1],1])/length(plausible_set[,1])
output <- c(output, (varseq[i]+varseq[i+1])/2, opdepth)
}
return(setNames(data.frame(t(matrix(output, nrow=2))), c('bin', 'prob')))
}
output <- c()
for (i in 1:length(ranges)) {
probs <- get_depth(plausible_set, ranges, names(ranges)[i], ...)
new_pts <- sample(probs$bin, n_points*10, prob = probs$prob, replace = TRUE) + runif(n_points*10, min = 0, max = probs$bin[2]-probs$bin[1])
output <- c(output, new_pts)
}
df <- setNames(data.frame(matrix(output, nrow = n_points*10)), names(ranges))
df <- df[nth_implausible(emulators, df, z) <= cutoff,]
if (length(df[,1]) < n_points)
{
message(cat("Only", length(df[,1]), "points generated from LHS with rejection."))
return(df)
}
cdist <- 0
for (i in 1:n_runs) {
tempset <- df[sample(seq_along(df[,1]), n_points),]
tempdist <- min(c(dist(tempset)))
if (tempdist > cdist) {
outset <- tempset
cdist <- tempdist
}
}
message(cat("Minimum distance:", cdist))
return(outset)
}
importance_sample <- function(ems, ranges, n_points, z, cutoff = 3, sd = NULL, distro = 'sphere', plausible_set, burn_in = FALSE, checked = FALSE, nth = 1) {
J <- function(dat) {
if (nth == 1) {
t_dat <- dat
for (i in 1:length(ems)) {
t_dat <- t_dat[ems[[i]]$implausibility(t_dat, z[[i]], cutoff),]
}
return(rownames(dat) %in% rownames(t_dat))
}
else {
imps <- nth_implausible(ems, dat, z, n = nth)
return(rownames(dat) %in% rownames(dat[imps < cutoff,]))
}
}
range_func <- function(dat, ranges) {
apply(dat, 1, function(x) all(purrr::map_lgl(seq_along(ranges), ~x[.]>=ranges[[.]][1] && x[.]<=ranges[[.]][2])))
}
if (!burn_in) {
if (is.null(sd)) {
if (distro == 'normal') {
lengths <- purrr::map_dbl(ranges, ~.[2]-.[1])
sd <- diag((lengths/6)^2, length(lengths))
}
else sd = purrr::map_dbl(ranges, ~.[[2]]-.[[1]])/3
}
}
if (is.null(sd)) {
if (distro == 'sphere') {
print("Performing burn-in..")
upper_n_p <- 500
sd_temp <- mean(purrr::map_dbl(ranges, ~.[[2]]-.[[1]]))/3
sd_int <- sd_temp/10
old_vol <- importance_sample(ems, ranges, max(upper_n_p, n_points), z, sd = sd_temp, plausible_set = plausible_set, burn_in = TRUE)
new_vol <- old_vol
while(old_vol <= new_vol) {
if (new_vol == 0) upper_n_p <- 2 * upper_n_p
sd_temp <- sd_temp + sd_int
old_vol <- new_vol
new_vol <- importance_sample(ems, ranges, max(upper_n_p, n_points), z, sd = sd_temp, plausible_set = plausible_set, burn_in = TRUE)
}
return(importance_sample(ems, ranges, n_points, z, sd = sd_temp, plausible_set = plausible_set))
}
}
new_points <- if (checked) plausible_set else plausible_set[J(plausible_set),]
if (distro == 'normal') {
weights <- apply(plausible_set, 1, function(x) 1/nrow(plausible_set) * sum(apply(plausible_set, 1, function(y) dmvnorm(x, mean = y, sigma = sd))))
min_w <- min(weights)
}
if (nrow(new_points) < n_points) {
repeat {
which_points <- plausible_set[sample(1:nrow(plausible_set), 10*n_points, replace = TRUE),]
if (distro == 'normal')
proposal_points <- t(apply(which_points, 1, function(x) rmvnorm(1, mean = unlist(x, use.names = F), sigma = sd)))
else {
proposal_points <- t(apply(which_points, 1, function(x) runifs(1, length(plausible_set), unlist(x, use.names = F), r = sd)))
}
p_rest1 <- proposal_points[range_func(setNames(data.frame(proposal_points), names(ranges)), ranges), ]
proposal_restrict <- p_rest1[J(setNames(data.frame(p_rest1), names(ranges))),]
possibleError <- tryCatch(
if (nrow(proposal_restrict) < 2) next,
error = function(e) e
)
if (inherits(possibleError, "error")) next
if (distro == 'normal') {
t_weights <- apply(proposal_restrict, 1, function(x) 1/nrow(plausible_set) * sum(apply(plausible_set, 1, function(y) dmvnorm(x, mean = y, sigma = sd))))
p_weights <- min_w/t_weights
}
else
p_weights <- apply(proposal_restrict, 1, function(x) sum(apply(plausible_set, 1, function(y) punifs(x, y, r = sd))))
unifs <- runif(length(p_weights))
new_points <- rbind(new_points, setNames(data.frame(proposal_restrict[unifs < 1/p_weights,]), names(ranges)))
uniqueness <- row.names(unique(signif(new_points, 6)))
new_points <- new_points[uniqueness,]
if (burn_in || nrow(new_points) >= n_points) break
}
}
if (burn_in) return(nrow(plausible_set) * nrow(new_points)/(10*n_points) * pi^(length(ranges)) * sd^(length(ranges)) / gamma(length(ranges)/2 + 1))
return(new_points[sample(nrow(new_points), n_points),])
}
# Line sampling
line_sample <- function(ems, sample_points, z, ranges, nlines = 20, ppl = 26, cutoff = 3, nth = 1) {
if (nrow(sample_points) < 2) return(sample_points)
nlines <- min(nrow(sample_points)*(nrow(sample_points)-1)/2, nlines)
if (ppl%%4 == 1) ppl <- ppl+1
range_func <- function(x, ranges) {
all(purrr::map_lgl(seq_along(ranges), ~x[.]>=ranges[[.]][1] && x[.]<=ranges[[.]][2]))
}
s_lines <- lapply(1:(10*nlines), function(x) {
pts <- sample_points[sample(nrow(sample_points), 2),]
pt_dist <- sqrt(sum(pts[1,] - pts[2,])^2)
return(list(p1 = pts[1,], p2 = pts[2,], dist = pt_dist))
})
top_points <- s_lines[order(purrr::map_dbl(s_lines, ~.$dist), decreasing = TRUE)][1:nlines]
selected_points <- array(0, dim = c(nlines * ppl, length(sample_points)))
sampled_points <- do.call('rbind', lapply(top_points, function(x) do.call('rbind', lapply(seq(-1, 1, length.out = ppl), function(y) (x[[1]]+x[[2]])/2 + y*(x[[2]]-x[[1]])))))
imps <- nth_implausible(ems, sampled_points, z, n = nth) < cutoff
in_rg <- apply(sampled_points, 1, range_func, ranges)
include_points <- purrr::map_lgl(seq_along(imps), function(x) {
if ((x %% ppl == 0 || x %% ppl == 1) && imps[x] && in_rg[x]) return(TRUE)
if (imps[x] && in_rg[x] && (!imps[x-1] || !imps[x+1])) return(TRUE)
return(FALSE)
})
out_df <- rbind(sample_points, sampled_points[include_points,])
uniqueness <- row.names(unique(signif(out_df, 6)))
return(out_df[uniqueness,])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.