Nothing
#' Explore parameter space
#'
#' The function is aimed at getting an idea of how the parameter space
#' of a model behaves, so that parameter identifiability problems and correlations
#' between parameters can be explored. Therefore, the function samples a large
#' number of parameter sets by randomly drawing from each parameter's 95%
#' confidence interval (generated by [lik_profile()]). It then
#' checks how many of the parameter sets are within acceptable limits by comparing
#' the likelihood ratio of a parameter set vs. the original parameter set against
#' a chi-square distribution as degrees of freedom (df) the total number of profile
#' parameters (outer rim) or one df (inner rim). If needed, the function resamples
#' until at least `nr_accept` parameters sets are within the inner rim
#'
#' @param x a list of [caliset] objects
#' @param par best fit parameters from joined calibration
#' @param res output of 'lik_profile()' function
#' @param output character vector, name of output column of [simulate()] that
#' is used in calibration
#' @param sample_size number of samples to draw from each parameter interval
#' @param max_runs max number of times to redraw samples (within a smaller space), and repeat the process
#' @param nr_accept threshold for number of points sampled within the inner circle
#' @param sample_factor multiplication factor for sampling (95% interval * sample factor)
#'
#' @return a list containing a plot to explore the parameter space, and the `data.frame`
#' supporting it
#'
#' @export
#' @global LLR_accept LLR_quality
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' # Example with Lemna model - physiological params
#' # Before applying the function, a model needs to be calibrated and its parameters profiled
#' # Inputs for likelihood profiling
#'
#' # exposure - control run
#' exp <- Schmitt2013 %>%
#' filter(ID == "T0") %>%
#' select(time=t, conc)
#'
#' # observations - control run
#' obs <- Schmitt2013 %>%
#' filter(ID == "T0") %>%
#' select(t, BM=obs)
#'
#' # parameters after calibration
#' params <- c(
#' k_phot_max = 5.663571,
#' k_resp = 1.938689,
#' Topt = 26.7
#' )
#'
#' # set parameter boundaries (if different from defaults)
#' bounds <- list(
#' k_resp = list(0, 10),
#' k_phot_max = list(0, 30),
#' Topt = list(20, 30)
#' )
#'
#' # update metsulfuron
#' myscenario <- metsulfuron %>%
#' set_init(c(BM = 5, E = 1, M_int = 0)) %>%
#' set_param(list(
#' k_0 = 5E-5,
#' a_k = 0.25,
#' BM50 = 17600,
#' mass_per_frond = 0.1
#' )) %>%
#' set_exposure(exp) %>%
#' set_param(params) %>%
#' set_bounds(bounds)
#'
#' # Likelihood profiling
#' res <- lik_profile(
#' x = myscenario,
#' data = obs,
#' output = "BM",
#' par = params,
#' refit = FALSE,
#' type = "fine",
#' method = "Brent"
#' )
#' # plot
#' plot_lik_profile(res)
#'
#' # parameter space explorer
#' set.seed(1) # for reproducibility
#' res_space <- explore_space(
#' x = list(caliset(myscenario, obs)),
#' par = params,
#' res = res,
#' output = "BM",
#' sample_size = 1000,
#' max_runs = 20,
#' nr_accept = 100)
#'
#' plot_param_space(res_space)
#'
#' }
explore_space <- function(x,
par,
res,
output,
sample_size = 1000,
max_runs = 30,
nr_accept = 100,
sample_factor = 1.2) {
# some checks
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# check sensible use of function
if(length(output) > 1){
stop("Parameter `output` must have length of one")
}
if (length(names(res)) <= 1) {
stop("parameter space exploration only makes sense for 2 or more parameters")
}
# check if the same output is used throughout
#if(!(output %in% x[[1]]@scenario@endpoints)){
# stop("Specified output not present in scenario object (x)")
#}
if(!(output %in% colnames(x[[1]]@data))){
stop("Specified output not present in data of scenario object (x)")
}
# check if explored parameters are part of the model
if (any(names(res) %in% names(x[[1]]@scenario@param) == "FALSE")) {
stop("Profiled parameters (in res) are not part of the scenario object (x)")
}
# check if boundaries for explored parameters are available
if(any(names(res) %in% names(x[[1]]@scenario@param.bounds) == "FALSE")){
stop("No parameter boundaries available for explored parameters, please set bounds in scenario object (x)")
}
# calculate log likelihood with original model, for the profiled pars
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pred_orig <- list()
ll_orig <- list()
for (i in seq_along(x)) {
pred_orig[[i]] <- x[[i]]@scenario %>%
set_times(x[[i]]@data[, 1]) %>% # time is the 1st column, mandatory that it is the 1st column
set_param(par) %>% #use pars from calibration
simulate()
ll_orig[[i]] <- log_lik(
npars = length(res),
obs = x[[i]]@data[, 2], # observations are the 2nd column, mandatory that it is the 2nd column
pred = pred_orig[[i]][, c(output)]
)
}
ll_orig <- sum(unlist(ll_orig))
# obtain a (random uniform) sample from each parameter's profile region,
# and include the original param values
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
par_sample_list <- list()
for (i in 1:length(res)) {
par_sample_list[[i]] <- c(
res[[i]]$orig_par_value, # original par value is put first
stats::runif(sample_size,
min = res[[i]]$prof_region[1] / sample_factor,
max = res[[i]]$prof_region[2] * sample_factor
)
)
names(par_sample_list)[[i]] <- names(res[i])
}
par_sample <- data.frame(par_sample_list)
# calculate LL, LLR for each parameter set
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
LL <- NULL
for (i in 1:nrow(par_sample)) {
LL_tmp <- list()
for (j in seq_along(x)) {
pred <- x[[j]]@scenario %>%
set_param(param = par_sample[i, ]) %>%
set_times(x[[j]]@data[, 1]) %>%
simulate()
LL_tmp[[j]] <- log_lik(
npars = length(res),
obs = x[[j]]@data[, 2],
pred = pred[, output]
)
}
LL <- c(LL, sum(unlist(LL_tmp)))
}
# add likelihood to the dataframe
par_sample$LL <- LL
# calc likelihood ratio: -2*ln(likelihood ratio)
par_sample$LLR <- -2 * (par_sample$LL - ll_orig) # matlab BYOM L. 704
# X2 difference test to compare nested models
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# cutoffs for the Chi-square
# these are the values from a standard chi-square distribution table, for df 1
# to 10, at a alpha = 0.05 significance level,
# these values are used in the chi-square test to determine if a fit is significantly
# different from a previous one (i.e. larger than the cutoff given here)
chi_table <- c(3.8415, 5.9915, 7.8147, 9.4877, 11.070, 12.592, 14.067, 15.507, 16.919, 18.307)
chi_crit_j <- chi_table[length(res)] # cut-off for the 95% profile region
chi_crit_s <- chi_table[1] # cut-off for single par. CI
# clean dataset based on criteria
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
par_sample$LLR_accept <- ifelse(par_sample$LLR < chi_crit_j, "accept", "reject") # accept combinations within the profile region
par_sample$LLR_quality <- ifelse(par_sample$LLR < chi_crit_s, "Inner", "Outer") # give preference to those with 95% CI inner rim
# calculate nr of points within inner rim
inner_ind <- which(par_sample$LLR_quality == "Inner") # remember which ones are in the inner rim (used/updated later in the while loop)
inner_size <- nrow(par_sample[inner_ind, ])
# Additional sampling (repeating steps above with slight modifications)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sample_iter <- 1
while (inner_size < nr_accept) { # number of accepted points should be >= nr_accept
message("resampling parameter space, iteration: ", sample_iter)
# obtain a (random normal) sample based on the previously "inner" rim samples
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
par_sample_list <- list()
for (i in 1:length(res)) {
par_sample_list[[i]] <- stats::rnorm(sample_size,
mean = mean(par_sample[inner_ind, i]),
sd = stats::sd(par_sample[inner_ind, i]) * 1.5
)
names(par_sample_list)[[i]] <- names(res[i])
}
par_sample_new <- data.frame(par_sample_list)
# ensure that values are all within param bounds
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
for(i in 1:length(res)){ # for each parameter
nm <- names(res)[i]
too_small <- which(par_sample_new[, i] < x[[1]]@scenario@param.bounds[[nm]][1])
too_large <- which(par_sample_new[, i] > x[[1]]@scenario@param.bounds[[nm]][2])
if(length(c(too_small, too_large)) > 0){ # remove samples outside boundaries (if present)
par_sample_new <- par_sample_new[-c(too_small, too_large), ]
if (nrow(par_sample_new) == 0) {
stop("all sampled parameter values out of bounds")
}
}
}
# calculate LL, LLR for each parameter set
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
LL <- NULL
for (i in 1:nrow(par_sample_new)) {
LL_tmp <- list()
for (j in seq_along(x)) {
pred <- x[[j]]@scenario %>%
set_param(param = par_sample_new[i, ]) %>%
set_times(x[[j]]@data[, 1]) %>%
simulate()
LL_tmp[[j]] <- log_lik(
npars = length(res),
obs = x[[j]]@data[, 2],
pred = pred[, output]
)
}
LL <- c(LL, sum(unlist(LL_tmp)))
}
# add likelihood to the dataframe
par_sample_new$LL <- LL
# calc likelihood ratio: -2*ln(likelihood ratio)
par_sample_new$LLR <- -2 * (par_sample_new$LL - ll_orig) # matlab BYOM L. 704
# clean dataset based on criteria
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
par_sample_new$LLR_accept <- ifelse(par_sample_new$LLR < chi_crit_j, "accept", "reject") # accept combinations within the profile region
par_sample_new$LLR_quality <- ifelse(par_sample_new$LLR < chi_crit_s, "Inner", "Outer") # give preference to those with 95% CI inner rim
# join with previous df
par_sample <- rbind(par_sample, par_sample_new)
# calculate nr of points within inner rim (and update inner_ind)
inner_ind <- which(par_sample$LLR_quality == "Inner") # remember which ones are in the inner rim (used/updated later in the while loop)
inner_size <- nrow(par_sample[inner_ind, ])
sample_iter <- sample_iter + 1
if (sample_iter >= max_runs) {
message("max nr of iterations reached, increase 'max_runs'")
break()
}
} # end while loop
# find best LLR value, and compare with original LLR (which is per definition = 0)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (min(par_sample$LLR) < 0) {
message("better optimum found in space explorer")
best_fit_ind <- which(par_sample$LLR == min(par_sample$LLR))
par_sample$LLR_quality[best_fit_ind] <- "Best fit"
}
par_sample[1, "LLR_quality"] <- "Original fit"
# clean and return
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
output <- par_sample %>%
dplyr::filter(LLR_accept == "accept") %>%
dplyr::select(!c(LLR_accept))
class(output) <- c(class(output), "param_space")
return(output)
}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#' Plot likelihood profiles or all profiled parameters
#'
#' The function provides bivariate parameter space plots indicating
#' parameter draws (from the 95%confidence intervals per parameter obtained
#' through likelihood profiling) that fall within the inner rim (in green, i.e.
#' parameter sets which are not significantly different from the original, based
#' on a chi-square test). The original parameter set is also indicated (in orange),
#' and, if different from the original set, the best fit parameter set is indicated
#' (in red)
#'
#' @param x object of class param_space
#'
#' @return plots
#' @global LL
#' @export
plot_param_space <- function(x){
# make sure x is a param_space
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
stopifnot("param_space" %in% class(x))
# find best LLR value, and compare with original LLR (which is per definition = 0)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (min(x$LLR) < 0) {
plot_colors <- c("tomato", "orange", "#4DAC26", "#0571B0")
plot_size <- c(3, 3, 1.5, 1.5)
index <- which(x$LLR_quality %in% c("Original fit", "Best fit"))
x$LLR_quality <- as.factor(x$LLR_quality)
x$LLR_quality <- factor(x$LLR_quality,
levels = c("Best fit", "Original fit", "Inner", "Outer"))
} else {
plot_colors <- c( "orange","#4DAC26", "#0571B0")
plot_size <- c(3, 1.5, 1.5)
index <- which(x$LLR_quality %in% "Original fit")
x$LLR_quality <- as.factor(x$LLR_quality)
x$LLR_quality <- factor(x$LLR_quality,
levels = c("Original fit", "Inner", "Outer"))
}
# reorder so that the original and best fit are the last datapoints (and
# thus are plotted on top)
x_best <- x[index,]
x_withoutbest <- x[-index,]
x <- rbind(x_withoutbest, x_best)
# plot of all correlations
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# dummy plot to get a legend
p <- x %>%
ggplot2::ggplot(
ggplot2::aes(
x = colnames(x)[1],
y = LL,
color = LLR_quality)) +
ggplot2::geom_point() +
ggplot2::scale_color_manual(values = plot_colors)
leg <- GGally::grab_legend(p)
# actual plot, complete
full_plot <- x %>%
GGally::ggpairs(
columns = colnames(x)[-which(colnames(x) %in% c("LL", "LLR", "LLR_quality"))],
ggplot2::aes(
color = LLR_quality,
size = as.factor(LLR_quality)
),
upper = list(continuous = "blank"),
diag = list(continuous = "blankDiag"),
title = "Parameter space",
legend = leg
) +
ggplot2::scale_color_manual(values = plot_colors) +
ggplot2::scale_size_manual(values = plot_size) +
ggplot2::theme_classic()
# function to modify ggpairs output (trim empty space)
gpairs_lower <- function(g) {
g$plots <- g$plots[-(1:g$nrow)]
g$yAxisLabels <- g$yAxisLabels[-1]
g$nrow <- g$nrow - 1
g$plots <- g$plots[-(seq(g$ncol, length(g$plots), by = g$ncol))]
g$xAxisLabels <- g$xAxisLabels[-g$ncol]
g$ncol <- g$ncol - 1
g
}
# modification of the plot
full_plot <- gpairs_lower(full_plot)
return(full_plot)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.