R/surrogate.R

Defines functions surrogate

Documented in surrogate

#' Surrogate response
#'
#' Simulate surrogate response values for cumulative link regression models
#' using the latent method described in Liu and Zhang (2017).
#'
#' @param object An object of class \code{\link[ordinal]{clm}},
#' \code{\link[stats]{glm}} \code{\link[rms]{lrm}}, \code{\link[rms]{orm}},
#' \code{\link[MASS]{polr}}, or \code{\link[VGAM]{vglm}}.
#'
#' @param method Character string specifying which method to use to generate the
#' surrogate response values. Current options are \code{"latent"} and
#' \code{"uniform"}. Default is \code{"latent"}.
#'
#' @param jitter.uniform.scale Character string specifying the scale on which to perform
#' the jittering whenever \code{method = "uniform"}. Current options are
#' \code{"response"} and \code{"probability"}. Default is \code{"response"}.
#'
#' @param nsim Integer specifying the number of bootstrap replicates to use.
#' Default is \code{1L} meaning no bootstrap samples.
#'
#' @param ... Additional optional arguments. (Currently ignored.)
#'
#' @return A numeric vector of class \code{c("numeric", "surrogate")} containing
#' the simulated surrogate response values. Additionally, if \code{nsim} > 1,
#' then the result will contain the attributes:
#' \describe{
#'   \item{\code{boot_reps}}{A matrix  with \code{nsim} columns, one for each
#'   bootstrap replicate of the surrogate values. Note, these are random and do
#'   not correspond to the original ordering of the data;}
#'   \item{\code{boot_id}}{A matrix  with \code{nsim} columns. Each column
#'   contains the observation number each surrogate value corresponds to in
#'   \code{boot_reps}. (This is used for plotting purposes.)}
#' }
#'
#' @note
#' Surrogate response values require sampling from a continuous distribution;
#' consequently, the result will be different with every call to
#' \code{surrogate}. The internal functions used for sampling from truncated
#' distributions are based on modified versions of
#' \code{truncdist:rtrunc} and \code{truncdist:qtrunc}.
#'
#' For \code{"glm"} objects, only the \code{binomial()} family is supported.
#'
#' @references
#' Liu, D., Li, S., Yu, Y., & Moustaki, I. (2020). Assessing partial association between
#' ordinal variables: quantification, visualization, and hypothesis testing. \emph{Journal
#' of the American Statistical Association}, 1-14. \doi{10.1080/01621459.2020.1796394}
#'
#' Liu, D., & Zhang, H. (2018). Residuals and diagnostics for ordinal regression models:
#' A surrogate approach. \emph{Journal of the American Statistical Association}, 113(522), 845-854.
#' \doi{10.1080/01621459.2017.1292915}
#'
#' Nadarajah, S., & Kotz, S. (2006). R Programs for Truncated Distributions. \emph{Journal
#' of Statistical Software}, 16(Code Snippet 2), 1 - 8. \doi{10.18637/jss.v016.c02}
#'
#' @export
#'
#' @examples
#' # Generate data from a quadratic probit model
#' set.seed(101)
#' n <- 2000
#' x <- runif(n, min = -3, max = 6)
#' z <- 10 + 3*x - 1*x^2 + rnorm(n)
#' y <- ifelse(z <= 0, yes = 0, no = 1)
#'
#' # Scatterplot matrix
#' pairs(~ x + y + z)
#'
#' # Setup for side-by-side plots
#' par(mfrow = c(1, 2))
#'
#' # Misspecified mean structure
#' fm1 <- glm(y ~ x, family = binomial(link = "probit"))
#' s1 <- surrogate(fm1)
#' scatter.smooth(x, s1 - fm1$linear.predictors,
#'                main = "Misspecified model",
#'                ylab = "Surrogate residual",
#'                lpars = list(lwd = 3, col = "red2"))
#' abline(h = 0, lty = 2, col = "blue2")
#'
#' # Correctly specified mean structure
#' fm2 <- glm(y ~ x + I(x ^ 2), family = binomial(link = "probit"))
#' s2 <- surrogate(fm2)
#' scatter.smooth(x, s2 - fm2$linear.predictors,
#'                main = "Correctly specified model",
#'                ylab = "Surrogate residual",
#'                lpars = list(lwd = 3, col = "red2"))
#' abline(h = 0, lty = 2, col = "blue2")
#'
#'
#' dev.off() # reset to defaults once finish
#'
surrogate <- function(object, method = c("latent", "uniform"),
                      jitter.uniform.scale = c("probability", "response"),
                      nsim = 1L, ...) {

  # Match arguments
  method <- match.arg(method)
  jitter.uniform.scale = match.arg(jitter.uniform.scale)

  # Issue warning for jittering method
  if (method == "uniform") {
    message("Jittering is an experimental feature, use at your own risk!")
  }

  # Generate surrogate response values
  s <- generate_surrogate(object, method = method, jitter.uniform.scale = jitter.uniform.scale)

  # Multiple (re)samples
  if (nsim > 1L) {  # bootstrap
    boot_reps <- boot_id <- matrix(nrow = nobs(object), ncol = nsim)
    for(i in seq_len(nsim)) {
      boot_id[, i] <- sample(nobs(object), replace = TRUE)
      boot_reps[, i] <-
        generate_surrogate(object, method = method, jitter.uniform.scale = jitter.uniform.scale,
                           draws_id = boot_id[, i, drop = TRUE])
    }
    attr(s, "boot_reps") <- boot_reps
    attr(s, "boot_id") <- boot_id
  }

  # Return residuals
  class(s) <- c("numeric", "surrogate")
  s

}

Try the PAsso package in your browser

Any scripts or data that you put into this service are public.

PAsso documentation built on June 18, 2021, 5:09 p.m.