R/surrogate.R

Defines functions surrogate

Documented in surrogate

#' Surrogate response
#'
#' Simulate surrogate response values for cumulative link regression models
#' using the latent method described in Liu and Zhang (2017).
#'
#' @param object An object of class \code{\link[ordinal]{clm}},
#' \code{\link[stats]{glm}} \code{\link[rms]{lrm}}, \code{\link[rms]{orm}},
#' \code{\link[MASS]{polr}}, or \code{\link[VGAM]{vglm}}.
#'
#' @param nsim Integer specifying the number of bootstrap replicates to use.
#' Default is \code{1L} meaning no bootstrap samples.
#'
#' @param method Character string specifying which method to use to generate the
#' surrogate response values. Current options are \code{"latent"} and
#' \code{"jitter"}. Default is \code{"latent"}.
#'
#' @param jitter.scale Character string specifyint the scale on which to perform
#' the jittering whenever \code{method = "jitter"}. Current options are
#' \code{"response"} and \code{"probability"}. Default is \code{"response"}.
#'
#' @param ... Additional optional arguments. (Currently ignored.)
#'
#' @return A numeric vector of class \code{c("numeric", "surrogate")} containing
#' the simulated surrogate response values. Additionally, if \code{nsim} > 1,
#' then the result will contain the attributes:
#' \describe{
#'   \item{\code{boot_reps}}{A matrix  with \code{nsim} columns, one for each
#'   bootstrap replicate of the surrogate values. Note, these are random and do
#'   not correspond to the original ordering of the data;}
#'   \item{\code{boot_id}}{A matrix  with \code{nsim} columns. Each column
#'   contains the observation number each surrogate value corresponds to in
#'   \code{boot_reps}. (This is used for plotting purposes.)}
#' }
#'
#' @note
#' Surrogate response values require sampling from a continuous distribution;
#' consequently, the result will be different with every call to
#' \code{surrogate}. The internal functions used for sampling from truncated
#' distributions are based on modified versions of
#' \code{\link[truncdist]{rtrunc}} and \code{\link[truncdist]{qtrunc}}.
#'
#' For \code{"glm"} objects, only the \code{binomial()} family is supported.
#'
#' @references
#' Liu, Dungang and Zhang, Heping. Residuals and Diagnostics for Ordinal
#' Regression Models: A Surrogate Approach.
#' \emph{Journal of the American Statistical Association} (accepted). URL
#' http://www.tandfonline.com/doi/abs/10.1080/01621459.2017.1292915?journalCode=uasa20
#'
#' Nadarajah, Saralees and Kotz, Samuel. R Programs for Truncated Distributions.
#' \emph{Journal of Statistical Software, Code Snippet}, 16(2), 1-8, 2006. URL
#' https://www.jstatsoft.org/v016/c02.
#'
#' @export
#'
#' @examples
#' # Generate data from a quadratic probit model
#' set.seed(101)
#' n <- 2000
#' x <- runif(n, min = -3, max = 6)
#' z <- 10 + 3*x - 1*x^2 + rnorm(n)
#' y <- ifelse(z <= 0, yes = 0, no = 1)
#'
#' # Scatterplot matrix
#' pairs(~ x + y + z)
#'
#' # Setup for side-by-side plots
#' par(mfrow = c(1, 2))
#'
#' # Misspecified mean structure
#' fm1 <- glm(y ~ x, family = binomial(link = "probit"))
#' s1 <- surrogate(fm1)
#' scatter.smooth(x, s1 - fm1$linear.predictors,
#'                main = "Misspecified model",
#'                ylab = "Surrogate residual",
#'                lpars = list(lwd = 3, col = "red2"))
#' abline(h = 0, lty = 2, col = "blue2")
#'
#' # Correctly specified mean structure
#' fm2 <- glm(y ~ x + I(x ^ 2), family = binomial(link = "probit"))
#' s2 <- surrogate(fm2)
#' scatter.smooth(x, s2 - fm2$linear.predictors,
#'                main = "Correctly specified model",
#'                ylab = "Surrogate residual",
#'                lpars = list(lwd = 3, col = "red2"))
#' abline(h = 0, lty = 2, col = "blue2")
surrogate <- function(object, nsim = 1L, method = c("latent", "jitter"),
                      jitter.scale = c("response", "probability"), ...) {

  # Match arguments
  method <- match.arg(method)
  jitter.scale = match.arg(jitter.scale)

  # Issue warning for jittering method
  if (method == "jitter") {
    warning("Jittering is an experimental feature, use at your own risk!",
            call. = FALSE)
  }

  # Generate surrogate response values
  s <- generate_surrogate(object, method = method, jitter.scale = jitter.scale)

  # Multiple (re)samples
  if (nsim > 1L) {  # bootstrap
    boot_reps <- boot_id <- matrix(nrow = nobs(object), ncol = nsim)
    for(i in seq_len(nsim)) {
      boot_id[, i] <- sample(nobs(object), replace = TRUE)
      boot_reps[, i] <-
        generate_surrogate(object, method = method, jitter.scale = jitter.scale,
                           boot_id = boot_id[, i, drop = TRUE])
    }
    attr(s, "boot_reps") <- boot_reps
    attr(s, "boot_id") <- boot_id
  }

  # Return residuals
  class(s) <- c("numeric", "surrogate")
  s

}
AFIT-R/ordr documentation built on Feb. 6, 2020, 1:17 a.m.