Nothing
##' Calculate the expected value of sample information from a decision-analytic
##' model
##'
##' Calculate the expected value of sample information from a decision-analytic
##' model
##'
##' See the \href{https://chjackson.github.io/voi/articles/voi.html#evsi}{package overview / Get Started vignette} for some examples of using this function.
##'
##' @inheritParams evppi
##'
##' @param study Name of one of the built-in study types supported by this
##' package for EVSI calculation. If this is supplied, then the columns of
##' \code{inputs} that correspond to the parameters governing the study data
##' should be identified in \code{pars}.
##'
##' Current built-in studies are
##'
##' \code{"binary"} A study with a binary outcome observed on one sample of
##' individuals. Requires one parameter: the probability of the outcome. The
##' sample size is specifed in the \code{n} argument to \code{evsi()}, and the
##' binomially-distributed outcome is named \code{X1}.
##'
##' \code{"trial_binary"} Two-arm trial with a binary outcome. Requires two
##' parameters: the probability of the outcome in arm 1 and 2 respectively.
##' The sample size is the same in each arm, specifed in the \code{n} argument
##' to \code{evsi()}, and the binomial outcomes are named \code{X1} and
##' \code{X2} respectively.
##'
##' \code{"normal_known"} A study of a normally-distributed outcome, with a
##' known standard deviation, on one sample of individuals. Likewise the
##' sample size is specified in the \code{n} argument to \code{evsi()}. The
##' standard deviation defaults to 1, and can be changed by specifying
##' \code{sd} as a component of the \code{aux_pars} argument, e.g.
##' \code{evsi(..., aux_pars=list(sd=2))}.
##'
##' Either \code{study} or \code{datagen_fn} should be supplied to
##' \code{evsi()}.
##'
##' For the EVSI calculation methods where explicit Bayesian analyses of the
##' simulated data are performed, the prior parameters for these built-in studies
##' are supplied in the \code{analysis_args} argument to \code{evsi()}. These
##' assume Beta priors for probabilities, and Normal priors for the mean of a
##' normal outcome.
##'
##'
##' @param datagen_fn If the proposed study is not one of the built-in types
##' supported, it can be specified in this argument as an R function to sample
##' predicted data from the study. This function should have the following
##' specification:
##'
##' 1. the function's first argument should be a data frame of parameter
##' simulations, with one row per simulation and one column per parameter.
##' The parameters in this data frame must all be found in \code{inputs},
##' but need not necessarily be in the same order or include all of them.
##'
##' 2. the function should return a data frame.
##'
##' 3. the returned data frame should have number of rows equal to the number
##' of parameter simulations in \code{inputs}.
##'
##' 4. if \code{inputs} is considered as a sample from the posterior, then
##' \code{datagen_fn(inputs)} returns a corresponding sample from the
##' posterior predictive distribution, which includes two sources of
##' uncertainty: (a) uncertainty about the parameters and (b) sampling
##' variation in observed data given fixed parameter values.
##'
##' 5. the function can optionally have more than one argument. If so, these
##' additional arguments should be given default values in the definition of
##' \code{datagen_fn}. If there is an argument called \code{n}, then it is
##' interpreted as the sample size for the proposed study.
##'
##' @param pars Character vector identifying which parameters are learned from the proposed study.
##' This is required for the moment matching and importance sampling methods,
##' and these should be columns of \code{inputs}. This is not required for the nonparametric
##' regression methods.
##'
##' @param pars_datagen Character vector identifying which columns of \code{inputs} are
##' the parameters required to generate data from the proposed study.
##' These should be columns of \code{inputs}.
##'
##' If \code{pars_datagen} is not supplied, then it is assumed to be the same as \code{pars}.
##' Note that these can be different. Even if the study data are generated by a particular parameter,
##' when analysing the data we could choose to ignore the information that the data provides about
##' that parameter.
##'
##' @param n Sample size of future study, or vector of alternative sample sizes.
##' This is understood by the built-in study designs. For studies specified
##' by the user with \code{datagen_fn}, if \code{datagen_fn} has an argument
##' \code{n}, then this is interpreted as the sample size. However if
##' calling \code{evsi} for a user-specified design where
##' \code{datagen_fn} does not have an \code{n} argument, then any \code{n}
##' argument supplied to \code{evsi} will be ignored.
##'
##' Currently this
##' shortcut is not supported if more than one quantity is required to
##' describe the sample size, for example, trials with unbalanced arms. In
##' that case, you will have to hard-code the required sample sizes into
##' `datagen_fn`.
##'
##' For the nonparametric regression and importance sampling methods, the
##' computation is simply repeated for each sample size supplied here.
##'
##' The moment matching method uses a regression model to estimate the
##' dependency of the EVSI on the sample size, hence to enable EVSI to be
##' calculated efficiently for any number of sample sizes (Heath et al. 2019).
##'
##' @param aux_pars A list of additional fixed arguments to supply to the
##' function to generate the data, whether that is a built-in study design or user-defined
##' function supplied in \code{datagen_fn}. For example, \code{evsi(..., aux_pars = list(sd=2))} defines the fixed
##' standard deviation in the \code{"normal_known"} model.
##'
##' @param method Character string indicating the calculation method. Defaults to \code{"gam"}.
##'
##' All the nonparametric regression methods supported for
##' \code{\link{evppi}}, that is \code{"gam","gp","earth","inla"}, can also be
##' used for EVSI calculation by regressing on a summary statistic of the
##' predicted data (Strong et al 2015).
##'
##' \code{"is"} for importance sampling (Menzies 2016)
##'
##' \code{"mm"} for moment matching (Heath et al 2018)
##'
##' Note that the \code{"is"} and \code{"mm"} methods are used in conjunction
##' with nonparametric regression, and the \code{gam_formula} argument can be
##' supplied to \code{evsi} to specify this regression - see
##' \code{\link{evppi}} for documentation of this argument.
##'
##' @param likelihood Likelihood function, required (and only required) for the
##' importance sampling method when a study design other than one of the
##' built-in ones is used. This should have two arguments, named as follows:
##'
##' \code{Y}: a one-row data frame of predicted data. Columns are defined by different
##' outcomes in the data, with names matching the names of the data frame returned by
##' \code{datagen_fn}.
##'
##' \code{inputs}. a data frame of simulated parameter values. Columns should correspond
##' to different variables in \code{inputs}. The column names should all be
##' found in the names of \code{inputs}, though they do not have to be in the same
##' order, or include everything in \code{inputs}. The number or rows should be the same as
##' the number of rows in \code{inputs}.
##'
##' The function should return a vector whose length matches the number of
##' rows of the parameters data frame given as the second argument. Each
##' element of the vector gives the likelihood of the corresponding set of
##' parameters, given the data in the first argument. An example is given in
##' the vignette.
##'
##' The likelihood can optionally have a \code{n} argument, which is interpreted
##' as the sample size of the study. If the \code{n}
##' argument to \code{evsi} is used then this is passed to the likelihood function.
##' Conversely any \code{n} argument to \code{evsi} will be ignored by a likelihood
##' function that does not have its own \code{n} argument.
##'
##' Note the definition of the likelihood should agree with the definition of
##' \code{datagen_fn} to define a consistent sampling distribution for the
##' data. No automatic check is performed for this.
##'
##' @param analysis_fn Function which fits a Bayesian model to the generated
##' data. Required for \code{method="mm"} if a study design other than one
##' of the built-in ones is used. This should be a function that takes the
##' following arguments:
##'
##' `data`: A data frame with names matching the output of `datagen_fn`
##'
##' `args`: A list with constants required in the Bayesian analysis, e.g.
##' prior parameters, or options for the analysis, e.g. number of MCMC
##' simulations. The component of this list called \code{n} is assumed to
##' contain the sample size of the study.
##'
##' `pars` Names of the parameters whose posterior is being sampled.
##'
##' The function should return a data frame with names matching `pars`,
##' containing a sample from the posterior distribution of the parameters
##' given data supplied through `data`.
##'
##' `analysis_fn` is required to have all three of these arguments, but you do
##' not need to use any elements of `args` or `pars` in the body of
##' `analysis_fn`. Instead, sample sizes, prior parameters, MCMC options and
##' parameter names can alternatively be hard-coded inside `analysis_fn`. Passing these
##' through the function arguments (via the \code{analysis_args} argument to
##' \code{evsi}) is only necessary if we want to use the same `analysis_fn` to
##' do EVSI calculations with different sample sizes or other settings.
##'
##' @param analysis_args List of arguments required for the Bayesian analysis of
##' the predicted data, e.g. definitions of the prior and options to control
##' sampling. Only used in \code{method="mm"}. This is required if the study
##' design is one of the built-in ones specified in \code{study}. If a custom
##' design is specifed through \code{analysis_fn}, then any constants needed
##' in `analysis_fn` can either be supplied in `analysis_args`, or hard-coded
##' in `analysis_fn` itself.
##'
##' For the built-in designs, the lists should have the following named
##' components. An optional component `niter` in each case defines the
##' posterior sample size (default 1000).
##'
##' `study="binary"`: `a` and `b`: Beta shape parameters
##'
##' `study="trial_binary"`: `a1` and `b1`: Beta shape parameters for the prior
##' for the first arm, `a2` and `b2`: Beta shape parameters for the prior for
##' the second arm.
##'
##' `study="normal_known"`: `prior_mean`, `prior_sd` (mean and standard deviation
##' deviation of the Normal prior) and `sampling_sd` (SD of an individual-level normal
##' observation, so that the sampling SD of the mean outcome over the study is
##' `sampling_sd/sqrt(n)`.
##'
##' @param model_fn Function which evaluates the decision-analytic model, given
##' parameter values. Required for \code{method="mm"}. See
##' \code{\link{evppi_mc}} for full documentation of the required specification
##' of this function.
##'
##' @param par_fn Function to simulate values from the uncertainty distributions
##' of parameters needed by the decision-analytic model. Should take one
##' argument and return a data frame with one row for each simulated value,
##' and one column for each parameter. See \code{\link{evppi_mc}} for full
##' specification.
##'
##' @param Q Number of quantiles to use in \code{method="mm"}.
##'
##' @param npreg_method Method to use to calculate the EVPPI, for those methods
##' that require it. This is passed to \code{\link{evppi}} as the
##' \code{method} argument.
##'
##' @param nsim Number of simulations from the model to use for calculating
##' EVPPI. The first \code{nsim} rows of the objects in \code{inputs} and
##' \code{outputs} are used.
##'
##' @param ... Other arguments understood by specific methods, e.g. \code{gam_formula}
##' and other controlling options (see \code{\link{evppi}}) can be passed to the
##' nonparametric regression used inside the moment matching method.
##'
##' @return A data frame with a column \code{pars}, indicating the
##' parameter(s), and a column \code{evsi}, giving the corresponding
##' EVPPI. If the EVSI for multiple sample sizes was requested,
##' then the sample size is returned in the column \code{n}, and if
##' \code{outputs} is of "cost-effectiveness analysis" form, so that
##' there is one EVPPI per willingness-to-pay value, then a column
##' \code{k} identifies the willingness-to-pay.
##'
##' @references
##'
##' Heath, A., Kunst, N., & Jackson, C. (eds.). (2024). Value of Information for Healthcare Decision-Making. CRC Press.
##'
##' Strong, M., Oakley, J. E., Brennan, A., & Breeze, P. (2015). Estimating the
##' expected value of sample information using the probabilistic sensitivity
##' analysis sample: a fast, nonparametric regression-based method. Medical
##' Decision Making, 35(5), 570-583.
##'
##' Menzies, N. A. (2016). An efficient estimator for the expected value of
##' sample information. Medical Decision Making, 36(3), 308-320.
##'
##' Heath, A., Manolopoulou, I., & Baio, G. (2018). Efficient Monte Carlo
##' estimation of the expected value of sample information using moment
##' matching. Medical Decision Making, 38(2), 163-173.
##'
##' Heath, A., Manolopoulou, I., & Baio, G. (2019). Estimating the expected
##' value of sample information across different sample sizes using moment
##' matching and nonlinear regression. Medical Decision Making, 39(4), 347-359.
##'
##' @export
evsi <- function(outputs,
inputs,
study=NULL,
datagen_fn=NULL,
pars=NULL,
pars_datagen=NULL,
n=100,
aux_pars=NULL,
method=NULL,
likelihood=NULL,
analysis_fn=NULL,
analysis_args=NULL,
model_fn=NULL,
par_fn=NULL,
Q=50,
npreg_method="gam",
nsim=NULL,
verbose=FALSE,
check=FALSE,
...)
{
check_inputs(inputs)
check_ss(n)
outputs <- check_outputs(outputs, inputs)
if (is.null(method))
method <- default_evsi_method()
if (is.null(nsim)) nsim <- nrow(inputs)
outputs <- subset_outputs(outputs, nsim)
inputs <- inputs[1:nsim,,drop=FALSE]
datagen_fn <- form_datagen_fn(study, datagen_fn, inputs, aux_pars)
## Could use any nonparametric regression method to regress on a summary statistic, in identical way to EVPPI estimation.
pars_datagen <- check_pars_datagen(pars, pars_datagen, inputs)
npreg_method <- default_npreg_method(pars)
if (method %in% npreg_methods) {
res <- evsi_npreg(outputs=outputs, inputs=inputs,
datagen_fn=datagen_fn, pars=pars_datagen, n=n,
method=method, verbose=verbose, check=check,
aux_pars = aux_pars, ...)
} else if (method=="is") {
if (verbose) message("Processing likelihood...")
likelihood <- form_likelihood(study, likelihood, inputs, datagen_fn, pars_datagen)
if (verbose) message("Entering evsi_is...")
res <- evsi_is(outputs=outputs, inputs=inputs,
pars=pars, pars_datagen=pars_datagen,
datagen_fn=datagen_fn, n=n,
aux_pars=aux_pars, likelihood=likelihood,
npreg_method=npreg_method, verbose=verbose, ...)
} else if (method=="mm") {
res <- evsi_mm(outputs=outputs, inputs=inputs,
pars=pars, pars_datagen=pars_datagen,
datagen_fn=datagen_fn,
study=study,
analysis_fn=analysis_fn,
analysis_args=analysis_args,
model_fn=model_fn,
par_fn=par_fn,
n=n, Q=Q,
npreg_method=npreg_method,
verbose=verbose, ...)
}
else stop("Other methods not implemented yet")
attr(res, "method") <- method
attr(res, "outputs") <- class(outputs)[1]
class(res) <- c("evsi", attr(res,"class"))
res
}
evsi_npreg <- function(outputs, inputs, datagen_fn, pars, n, method=NULL, se=FALSE, B=500, verbose, check, aux_pars=NULL, ...){
nn <- length(n)
res <- vector(nn, mode="list")
validate_char(pars, "pars")
for (i in seq_along(n)){
Tdata <- generate_data(inputs, datagen_fn, n[i], pars, aux_pars)
res[[i]] <- evppi_npreg(outputs=outputs, inputs=Tdata,
pars=names(Tdata), method=method, se=se, B=B, verbose=verbose, ...)
names(res[[i]])[names(res[[i]])=="evppi"] <- "evsi"
rownames(res[[i]]) <- NULL
}
resall <- do.call("rbind", res)
resall <- cbind(n=rep(n,each=nrow(res[[1]])), resall)
if (check){
attr(resall, "models") <- lapply(res, function(x)attr(x, "models"))
names(attr(resall,"models")) <- as.character(n)
}
resall
}
generate_data <- function(inputs, datagen_fn, n=100, pars, aux_pars=NULL){
check_datagen_fn(datagen_fn, inputs, pars)
args <- list(inputs=inputs, n=n, pars=pars)
args <- c(args, aux_pars)
do.call(datagen_fn, args)
}
default_evsi_method <- function(){
"gam"
}
check_study <- function(study) {
if (!is.character(study) || (!(study %in% studies_builtin)))
stop("``study` should be a character string matching one of the supported study designs")
}
form_datagen_fn <- function(study, datagen_fn, inputs, aux_pars=NULL){
if (!is.null(study)) {
check_study(study)
if (!is.null(datagen_fn))
warning("Ignoring `datagen_fn`, since a built-in study design was requested")
datagen_fn <- get(sprintf("datagen_%s", study))
} else {
if (is.null(datagen_fn)) stop("`datagen_fn` should be supplied if `study` is not supplied")
if (!is.function(datagen_fn)) stop("`datagen_fn` should be a function")
if (!("n" %in% names(formals(datagen_fn))))
formals(datagen_fn) <- c(formals(datagen_fn), list(n=100))
formals(datagen_fn) <- c(formals(datagen_fn), list(pars=NULL))
check_datagen_fn(datagen_fn, inputs, aux_pars)
}
datagen_fn
}
check_datagen_fn <- function(datagen_fn, inputs, pars=NULL, aux_pars=NULL){
## If there's more than one argument, check that those args have
## defaults (e.g. sample sizes). Give error for now if not, but
## consider relaxing if this becomes a problem
extra_args <- formals(datagen_fn)[-1]
extra_args <- extra_args[names(extra_args) != "pars"]
if (length(extra_args)>0){
no_defaults <- sapply(extra_args, is.symbol)
if (any(no_defaults)){
stop(sprintf("Arguments \"%s\" of `datagen_fn` do not have default values", paste(names(no_defaults),collapse=",")))
}
}
ret <- datagen_fn(inputs, pars=pars)
if (!is.data.frame(ret)) stop("`datagen_fn` should return a data frame")
parnames <- names(ret)[names(ret) %in% names(inputs)]
if (length(parnames)>0) {
stop(sprintf("`datagen_fn` returns variables with the same names as parameters (%s). It should return simulated data", paste(parnames, collapse=",")))
}
if (nrow(ret) != nrow(inputs)){
stop(sprintf("`datagen_fn` returns a data frame with %s rows. There should be %s rows, the same number of rows as `inputs`", nrow(ret), nrow(inputs)))
}
}
form_analysis_fn <- function(study, analysis_fn, analysis_args, datagen_fn, inputs, n, pars, pars_datagen){
if (!is.null(study)){
check_study(study)
analysis_fn <- get(sprintf("analysis_%s", study))
} else {
if (is.null(analysis_fn)) stop("`analysis_fn` should be supplied if `study` is not supplied")
check_analysis_fn(analysis_fn, analysis_args, datagen_fn, inputs, n, pars, pars_datagen)
}
analysis_fn
}
check_analysis_fn <- function(analysis_fn, analysis_args, datagen_fn, inputs, n, pars, pars_datagen){
if (!is.function(analysis_fn)) stop("`analysis_fn` should be a function")
if (!identical(names(formals(analysis_fn)), c("data","args","pars")))
stop("`analysis_fn` should have arguments `data`,`args`,`pars` in that order")
testdata <- datagen_fn(inputs = inputs[1,,drop=FALSE], n = n, pars=pars_datagen)
post_pars <- analysis_fn(data = testdata, args = analysis_args, pars=pars)
missing_pars <- setdiff(pars, names(post_pars))
if (length(missing_pars) > 0){
badpars <- paste(missing_pars, collapse=",")
stop(sprintf("Parameters %s not found in data frame returned by `analysis_fn`", badpars))
}
}
check_pars_in_modelfn <- function(pars, model_fn){
mfargs <- names(formals(model_fn))
missing_args <- setdiff(pars, mfargs)
if (length(missing_args) > 0){
badpars <- paste(missing_args, collapse=",")
stop(sprintf("Parameters %s are not arguments to model_fn. Should model_fn be reparameterised?", badpars))
}
}
check_ss <- function(n){
if (!is.numeric(n))
stop("sample size `n` should be a numeric vector")
if (any(n < 0))
stop("sample sizes `n` should all be positive integers")
}
form_analysis_args <- function(analysis_args, study, n){
if (!is.null(study)){
if (study %in% studies_builtin){
if (!is.list(analysis_args))
stop("analysis_args should be supplied as a named list if using one of the built-in study designs")
}
}
if (is.null(analysis_args))
analysis_args <- list() # for testing
if (is.null(analysis_args$n))
analysis_args$n <- n[1]
analysis_args
}
check_pars_datagen <- function(pars, pars_datagen, inputs){
if (is.null(pars_datagen))
pars_datagen <- pars
check_pars(pars_datagen, inputs, evppi=FALSE)
pars_datagen
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.