R/ExprModVar.R
In rdecision: Decision Analytic Modelling in Health Economics

#' @title A model variable constructed from an expression of other variables
#'
#' @description An R6 class representing a model variable constructed from an
#' expression involving other variables.
#'
#' @details A class to support expressions involving objects of base class
#' \code{ModVar}, which itself behaves like a model variable. For example, if
#' \code{A} and \code{B} are variables with base class \code{ModVar} and
#' \code{c} is a variable of type \code{numeric}, then it is not possible to
#' write, for example, \code{x <- 42*A/B + c}, because R cannot manipulate class
#' variables using the same operators as regular variables. But such forms of
#' expression may be desirable in constructing a model and this class provides
#' a mechanism for doing so. Inherits from class \code{ModVar}.
#'
#' @references{
#'   Briggs A, Claxton K, Sculpher M. Decision modelling for health
#'   economic evaluation. Oxford, UK: Oxford University Press; 2006.
#' }
#'
#' @note For many expressions involving model variables there will
#' be no closed form expressions for the mean, standard deviation and
#' the quantiles. When an \code{ExprModVar} is created, an empirical
#' distribution is generated by repeatedly drawing a random sample from each
#' operand and evaluating the expression. The empirical distribution, which
#' becomes associated with the object, is used to provide estimates of the
#' mean, standard deviation and the quantiles via functions \code{mu_hat},
#' \code{sigma_hat} and \code{q_hat}.
#'
#' @note For consistency with \code{ModVar}s which are not expressions, the
#' function \code{mean} returns the value of the expression when all
#' its operands take their mean values. This will, in general, not
#' be the mean of the expression distribution (which can be obtained
#' via \code{mu_hat}), but is the value normally used in the base
#' case of a model as the point estimate. As Briggs \emph{et al} note
#' (section 4.1.1) "in all but the most non-linear models, the
#' difference between the expectation over the output of a
#' probabilistic model and that model evaluated at the mean values
#' of the input parameters, is likely to be modest."
#'
#' @note Functions \code{SD}, \code{mode} and \code{quantile} return \code{NA}
#' because they do not necessarily have a closed form. The standard
#' deviation can be estimated by calling \code{sigma_hat} and the
#' quantiles by \code{q_hat}. Because a unimodal distribution is not
#' guaranteed, there is no estimator provided for the mode.
#'
#' @note Method \code{distribution} returns the string representation
#' of the expression used to create the model variable.
#'
#' @author Andrew J. Sims \email{andrew.sims@@newcastle.ac.uk}
#' @docType class
#' @export
#'
ExprModVar <- R6::R6Class(
  classname = "ExprModVar",
  lock_class = TRUE,
  inherit =  ModVar,
  private = list(
    # fields
    expr = NULL,
    env = NULL,
    q.mean = NULL,
    q.get = NULL,
    .operands = NULL
  ),
  public = list(

    #' @description Create a \code{ModVar} formed from an expression involving
    #' other model variables.
    #' @param description Name for the model variable expression. In
    #' a complex model it may help to tabulate how model variables are
    #' combined into costs, probabilities and rates.
    #' @param units Units in which the variable is expressed.
    #' @param quo A \verb{quosure} (see package \pkg{rlang}), which contains an
    #' expression and its environment. The usage is \code{quo(x+y)} or
    #' \code{rlang::quo(x+y)}.
    #' @param nemp sample size of the empirical distribution which will be
    #' associated with the expression, and used to estimate values for
    #' \code{mu_hat}, \code{sigma_hat} and \code{q_hat}.
    #' @return An object of type \code{ExprModVar}
    initialize = function(description, units, quo, nemp = 1000L) {
      # check, split and save the quosure
      abortifnot(rlang::is_quosure(quo),
        message = "Argument quo must be a quosure",
        class = "quo_not_quosure"
      )
      private$expr <- rlang::quo_get_expr(quo)
      private$env <- rlang::quo_get_env(quo)
      # create pre-packaged evaluable quosures to optimize calculations
      private$q.mean <- self$add_method("mean()")
      private$q.get <- self$add_method("get()")
      # check and process the sample estimation size
      abortifnot(is.integer(nemp),
        message = "Argument 'nemp' must be an integer",
        class = "nemp_not_integer"
      )
      abortifnot(nemp >= 1000L,
        message = "Argument nemp must not be less than 1000",
        class = "nemp_too_small"
      )
      # create and keep a list of (non-recursive) operands, for optimisation
      private$.operands <- self$operands(recursive = FALSE)
      # create an empirical distribution, accounting for nested autocorrelation
      samp <- vapply(seq_len(nemp), FUN.VALUE = 1.0, FUN = function(i) {
        # set each operand to return a random sample
        for (o in private$.operands) o$set("random")
        # compute the value using the random value for each operand
        # using the pre-packaged expression
        rv <- rlang::eval_tidy(private$q.get)
        return(rv)
      })
      D <- EmpiricalDistribution$new(x = samp)
      # initialize the base class
      super$initialize(description, units, D = D)
      # save the components of .value that do not change
      private$.value[["expected"]] <- self$mean()
      for (o in private$.operands) o$set("q2.5")
      private$.value[["q2.5"]] <- rlang::eval_tidy(private$q.get)
      for (o in private$.operands) o$set("q50")
      private$.value[["q50"]] <- rlang::eval_tidy(private$q.get)
      for (o in private$.operands) o$set("q97.5")
      private$.value[["q97.5"]] <- rlang::eval_tidy(private$q.get)
      # return
      return(invisible(self))
    },

    #' @description Create a new \verb{quosure} from that supplied in
    #' \code{new()} but with each \code{ModVar}
    #' operand appended with \code{$x} where \code{x} is the argument to this
    #' function.
    #' @param method A character string with the method, e.g. \code{"mean()"}.
    #' @return A \dfn{quosure} whose expression is each \code{ModVar} \code{v}
    #' in the
    #' expression replaced with \code{v$method} and the same environment as
    #' specified in the quosure supplied in new().
    #' @details This method is mostly intended for internal use within the
    #' class and will not generally be needed for normal use of
    #' \code{ExprModVar} objects. The returned expression is \emph{not}
    #' syntactically checked or evaluated before it is returned.
    add_method = function(method = "mean()") {
      # check argument
      abortifnot(is.character(method),
        message = "Argument 'method' must be a character string",
        class = "method_not_character"
      )
      # substitute model variable names with call to method
      mv <- list()
      for (v in base::all.vars(private$expr)) {
        vv <- rlang::eval_bare(expr = rlang::sym(v), env = private$env)
        rep <- v
        # build replacement string if necessary
        if (is_ModVar(vv)) {
          rep <- gsub(v, paste(v, method, sep = "$"), v)
        }
        # create symbol from the modified string
        mv[[v]] <- str2lang(rep)
      }
      emod <- eval(
        substitute(substitute(e, env = mv), env = list(e = private$expr))
      )
      # create new quosure
      q <- rlang::new_quosure(expr = emod, env = private$env)
      # return the quosure
      return(q)
    },

    #' @description Tests whether the model variable is probabilistic, i.e.
    #' a random variable that follows a distribution, or an expression involving
    #' random variables, at least one of which follows a distribution.
    #' @return \code{TRUE} if probabilistic
    is_probabilistic = function() {
      # test each operand for whether it is probabilistic
      lv <- vapply(
        base::all.vars(private$expr),
        FUN.VALUE = TRUE,
        FUN = function(v) {
          rv <- FALSE
          vv <- rlang::eval_bare(expr = rlang::sym(v), env = private$env)
          if (is_ModVar(vv)) {
            rv <- vv$is_probabilistic()
          }
          return(rv)
        }
      )
      return(any(lv))
    },

    #' @description Return a list of operands.
    #' @details Finds operands that are themselves \code{ModVar}s in the
    #' expression. if \code{recursive=TRUE}, the list includes all
    #' \code{ModVar}s that are operands of expression operands, recursively.
    #' @param recursive Whether to include nested variables in the list.
    #' @return A list of model variables.
    operands = function(recursive = TRUE) {
      # filter the expression variables that are ModVars
      mvlist <- list()
      for (v in all.vars(private$expr)) {
        vv <- rlang::eval_bare(expr = rlang::sym(v), env = private$env)
        if (inherits(vv, what = "ModVar")) {
          # add the variable to the list
          mvlist <- c(mvlist, vv)
          # recurse, if required
          if (recursive && inherits(vv, what = "ExprModVar")) {
            for (o in vv$operands(recursive)) {
              mvlist <- c(mvlist, o)
            }
          }
        }
      }
      # find the unique variables
      mvlist <- unique(mvlist)
      # return the list
      return(mvlist)
    },

    #' @description Accessor function for the name of the expression model
    #' variable.
    #' @return Expression as a character string with all control characters
    #' having been removed.
    distribution = function() {
      estr <- rlang::as_label(
        rlang::new_quosure(expr = private$expr, env = private$env)
      )
      estr <- gsub("[[:cntrl:]]", "", estr)
      return(estr)
    },

    #' @description Return the value of the expression when its operands take
    #' their mean value (i.e. value returned by call to \code{mean} or their
    #' value, if numeric). See notes on this class for further explanation.
    #' @return Mean value as a numeric value.
    mean = function() {
      # evaluate the pre-packaged expression
      rv <- rlang::eval_tidy(private$q.mean)
      # return it
      return(rv)
    },

    #' @description Return the mode of the variable. By default returns
    #' \code{NA}, which will be the case for most \code{ExprModVar} variables,
    #' because an arbitrary expression is not guaranteed to be unimodal.
    #' @return Mode as a numeric value.
    mode = function() {
      return(NA_real_)
    },

    #' @description Return the standard deviation of the distribution as
    #' \code{NA} because the variance is not available as a closed form for
    #' all functions of distributions.
    #' @return Standard deviation as a numeric value
    SD = function() {
      return(NA_real_)
    },

    #' @description Find quantiles of the uncertainty distribution. Not
    #' available as a closed form, and returned as \code{NA}.
    #' @param probs Numeric vector of probabilities, each in range [0,1].
    #' @return Vector of numeric values of the same length as \code{probs}.
    quantile = function(probs) {
      # test argument
      vapply(probs, FUN.VALUE = TRUE, FUN = function(x) {
        abortif(is.na(x),
          message = "All elements of 'probs' must be defined",
          class = "probs_not_defined"
        )
        abortifnot(is.numeric(x),
          message = "Argument 'probs' must be a numeric vector",
          class = "probs_not_numeric"
        )
        abortifnot(x >= 0.0 && x <= 1.0,
          message = "Elements of 'probs' must be in range[0,1]",
          class = "probs_out_of_range"
        )
        return(TRUE)
      })
      return(rep(NA_real_, length(probs)))
    },

    #' @description Return the estimated expected value of the variable.
    #' @details This is computed by numerical simulation because there is, in
    #' general, no closed form expressions for the mean of a function of
    #' distributions. It is derived from the empirical distribution associated
    #' with the object.
    #' @return Expected value as a numeric value.
    mu_hat = function() {
      rv <- private$.D$mean()
      return(rv)
    },

    #' @description Return the estimated standard deviation of the distribution.
    #' @details This is computed by numerical simulation because there is, in
    #' general, no closed form expressions for the SD of a function of
    #' distributions. It is derived from the empirical distribution associated
    #' with the object.
    #' @return Standard deviation as a numeric value.
    sigma_hat = function() {
      rv <- private$.D$SD()
      return(rv)
    },

    #' @description Return the estimated quantiles by sampling the variable.
    #' @details This is computed by numerical simulation because there is, in
    #' general, no closed form expressions for the quantiles of a function of
    #' distributions. The quantiles are derived from the empirical distribution
    #' associated with the object.
    #' @param probs Vector of probabilities, in range [0,1].
    #' @return Vector of quantiles.
    q_hat = function(probs) {
      q <- private$.D$quantile(probs)
      return(q)
    },

    #' @description Sets the value of the \code{ExprModVar}.
    #' @details The available options for parameter \code{what} are identical to
    #' those available for the \code{set} method of \code{ModVar}. However,
    #' because an \code{ExprModVar} represents the left hand side of an
    #' expression involving operands, the effect of some options is different
    #' from its effect on a non-expression \code{ModVar}.
    #' @param what Until \code{set} is called again, subsequent calls to
    #' \code{get} will return a value determined by the \code{what} parameter.
    #' as follows:
    #' \describe{
    #' \item{\code{"random"}}{a random sample is derived by taking a random
    #' sample from each of the operands and evaluating the expression. It
    #' does not draw from the empirical distribution because of the possibility
    #' of nested autocorrelation. For example, if \eqn{z=xy}, where \eqn{x} is a
    #' model variable and \eqn{y} is an expression which involves \eqn{x}, then
    #' \eqn{y} and \eqn{x} are correlated and will produce a different
    #' distribution for \eqn{z} than if \eqn{x} and \eqn{y} were independent.
    #' However, if \eqn{z} was sampled from the empirical distribution of
    #' \eqn{y} and the uncertainty distribution of \eqn{x} independently, the
    #' effect of correlation would be lost;}
    #' \item{\code{"expected"}}{the value of the expression when each of its
    #' operands takes its expected value. This will not - in general - be the
    #' mean of the uncertainty distribution for the expression which can be
    #' estimated by calling \code{mu_hat};}
    #' \item{\code{"q2.5"}}{the value of the expression when each of its
    #' operands is equal to the 2.5th centile of their own uncertainty
    #' distribution. In general, this will be a more extreme value than the
    #' 2.5th centile of the uncertainty distribution of the expression, which
    #' can be found by using \code{q_hat(p=0.025)};}
    #' \item{\code{"q50"}}{as per \code{"q2.5"} but for the 50th
    #' centile (median);}
    #' \item{\code{"q97.5"}}{as per \code{"q2.5"} but for the 97.5th
    #' centile;}
    #' \item{\code{"current"}}{leaves the \code{what} parameter of method
    #' \code{set} unchanged \emph{for each operand} and causes the expression
    #' to be re-evaluated at subsequent calls to \code{get}. Thus, after calling
    #' \code{set(what="current")} for the expression, subsequent calls to
    #' \code{get} for the expression may not return the same value, if method
    #' \code{set} has been called for one or more operands in the meantime;}
    #' \item{\code{"value"}}{sets the value of the expression to be equal to
    #' parameter \code{val}. This is not recommended for normal usage because it
    #' allows the model variable to be set to an implausible value, based on its
    #' defined uncertainty. An example of where this may be needed is in
    #' threshold finding.}
    #' }
    #' @param val A numeric value, only used with \code{what}=\code{"value"},
    #' ignored otherwise.
    #' @return Updated \code{ExprModVar}.
    set = function(what = "random", val = NULL) {
      # check argument
      abortifnot(is.character(what),
        message = "Argument 'what' must be a character",
        class = "what_not_character"
      )
      abortifnot(what %in% c(private$.whats, "current"),
        message = paste(
          "'what' must be one of", paste(private$.whats, collapse = "|")
        ),
        class = "what_not_supported"
      )
      # if random, make a new draw from the distribution
      if (what == "random") {
        # set the operands to return a random sample
        for (o in private$.operands) o$set("random")
      } else if (what == "value") {
        # if value, check and save the supplied number
        abortif(is.null(val) || !is.numeric(val),
          message = "'val' must be numeric",
          class = "invalid_val"
        )
        private$.value[["value"]] <- val
      }
      # update the whatnext field
      private$.whatnext <- what
      # return the updated object
      return(invisible(self))
    },

    #' @description
    #' Gets the value of the \code{ExprModVar} that was set by the most recent
    #' call to \code{set()}.
    #' @return Value determined by last \code{set()}.
    get = function() {
      if (private$.whatnext == "random") {
        rv <- rlang::eval_tidy(private$q.get)
        private$.value[["random"]] <- rv
      } else if (private$.whatnext == "current") {
        rv <- rlang::eval_tidy(private$q.get)
      } else {
        rv <- private$.value[[private$.whatnext]]
      }
      # return it
      return(rv)
    }
  )
)