R/prediction.R

Defines functions predict2.MCMCglmm recycler

Documented in predict2.MCMCglmm recycler

#' Define a generic prediction function
#'
#' This defintes a generic \code{predict2} function which
#' is similar to the usual \code{predict} but can use different
#' methods. In particular, the \code{MCMCglmm} method has features
#' not available in the regular \code{predict} method for \code{MCMCglmm}
#' objects.
#'
#' @param object A model object to predict from
#' @param \dots Additional arguments passed to the methods
#' @export
#' @rdname predict2
#' @examples
#' # to see available methods
#' methods(predict2)
predict2 <- function (object, ...) {
  UseMethod("predict2", object)
}

#' Get predicted values from a \code{MCMCglmm} object
#'
#' This is the main workhorse of the package.
#' It is a \code{predict2} method for MCMCglmm objects.
#' There are a few core arguments. The model \code{object}
#' and design matrices, \code{X} (fixed effects) and \code{Z} (random effects).
#' If \code{X} and \code{Z} are missing, it will attempt to fill
#' them in from the model object (which optionally saves them).
#' If \code{X} and \code{Z} are specified or \code{NULL}, they are not used.
#' This is useful either for out of sample predictions
#' or to use just the fixed effects. Note that these must be
#' full design matrices, not data matrices.  For example, they
#' must dummy code factors and include the intercept (if there
#' was an intercept in the model).
#'
#' You can also use all posterior samples or just the mean.
#' All is nice because it lets you construct highest posterior
#' density (HPD) intervals around the predicted values, rather
#' than just get an estimate. The mean is nice because if
#' that is all you care about, it is much much faster.
#' You can get either the linear predictor values or the response scale.
#' However, response is currently only implemented for ordinal (probit) models.
#' Theoretically it could be extended but the code is a pain.
#'
# @param object A \code{MCMCglmm} model object to use for prediction.
#' @param X The fixed effects design matrix. Can be the original or new data.
#' @param Z The random effects design matrix. Can be the original or new data.
#' @param use A character string. Use just the posterior \dQuote{mean} or
#'   \dQuote{all} posterior samples (the default).
#' @param type A cahracter string. Either \dQuote{lp} for the linear predictor
#'   (the default) or \dQuote{response} for the predicted values on the response scale.
# @param \dots Not currently used.
#' @return Either a matrix of the linear predictor if \code{type = "lp"} or a
#'   list of class MCMCglmmPredictedProbs if \code{type = "response"}
#' @method predict2 MCMCglmm
#' @export
#' @rdname predict2
#' @seealso \code{\link{summary.MCMCglmmPredictedProbs}}, \code{\link{recycler}}
#' @examples
#'   \dontrun{
#'     data(PlodiaPO)
#'     PlodiaPO <- within(PlodiaPO, {
#'       PO2 <- cut(PO, quantile(PO, c(0, .33, .66, 1)))
#'     })
#'
#'     m <- MCMCglmm(PO2 ~ 1, random = ~ FSfamily,
#'       family = "ordinal", data = PlodiaPO,
#'       prior = list(
#'         R = list(V = 1, fix = 1),
#'         G = list(
#'           G1 = list(V = 1, nu = .002)
#'         )
#'       ), verbose=FALSE, thin=1, pr=TRUE)
#'
#'     # predicted probabilities for each level of the outcome
#'     # using all posterior samples
#'     yhat <- predict2(m, use = "all", type = "response")
#'     str(yhat) # view structure
#'
#'     # summary of predicted probabilities
#'     sumyhat <- summary(yhat)
#'     # first few summaries for level 1
#'     head(sumyhat[[1]])
#'
#'     # first few summaries for level 2
#'     head(sumyhat[[2]])
#'
#'     # first few summaries for level 3
#'     head(sumyhat[[3]])
#'
#'     # combine
#'     longsum <- do.call(rbind.data.frame, sumyhat)
#'     # create a level indicator
#'     longsum$Level <- factor(rep(1:3, each = nrow(sumyhat[[1]])))
#'
#'     # plot
#'     boxplot(M ~ Level, data = longsum)
#'   }
predict2.MCMCglmm <- function(object, X, Z, use = c("all", "mean"),
  type = c("lp", "response"), ...) {
  use <- match.arg(use)
  type <- match.arg(type)

  if (!is.null(object$X) && missing(X)) {
    X <- object$X
  }

  if (!is.null(object$Z) && missing(Z)) {
    Z <- object$Z
  }

  if (missing(X)) X <- NULL
  if (missing(Z)) Z <- NULL

  Xb <- Zu <- 0L

  b <- fixef(object, use = use)
  u <- ranef(object, use = use)

  if (!is.null(X)) Xb <- X %*% b
  if (!is.null(Z)) Zu <-  Z %*% u

  res <- t(as.matrix(Xb + Zu))

  dimnames(res) <- list(switch(use,
    all = paste0("Rep.", 1:nrow(res)), mean = NULL), NULL)

  if (type == "response") {
    if (all(object$family %in% c("ordinal"))) {
      stopifnot(length(unique(object$error.term)) == 1)

      stddev <- sqrt(unique(object$error.term) + 1)

      # cut points
      CP <- object$CP
      if (use == "mean") CP <- colMeans(CP)

      CP <- as.list(as.data.frame(CP))
      CP <- c(0, lapply(CP, function(x) {
        do.call("cbind", rep(list(x), dim(res)[2L]))
      }))
      # difference between cuts and predicted plus lower and upper bounds
      for (i in seq_along(CP)) {
        CP[[i]] <- pnorm(CP[[i]] - res, 0, stddev)
      }

      CP <- c(0, CP, 1)

      q <- vector("list", length(CP) - 2)
      for (i in 2:(length(CP) - 1)) {
        q[[i - 1]] <- mcmc(CP[[i + 1]] - CP[[i]])
      }
      q <- c(list(mcmc(1 - Reduce(`+`, q[1:(i - 1)]))), q)
      class(q) <- c("list", "MCMCglmmPredictedProbs")
      res <- q
    } else if (all(object$family %in% c("categorical", "multinomial"))) {

      # stopifnot(length(unique(object$error.term)) == 1)
      k <- ((16*sqrt(3))/(15*pi))^2  # this should be the scaling constant for logit
      stddev <- sqrt(object$error.term + k) # check that this is right error
      q <- plogis(res)
      class(q) <- c("MCMCglmmPredictedProbs")
      res <- q
    } else {
      stop("Function does not support response type for families beside ordinal")
    }
  } else if (type == "lp") {
    res <- as.mcmc(res)
    class(res) <- c("mcmc", "MCMCglmmPredictedLP")
  }

  return(res)
}

#' Calculate change in predicted probabilities
#'
#' \code{recycler} wraps many of the functions in \pkg{postMCMCglmm} to calculate
#' the change in predicted probabilities for a twiddle change in the predictor,
#' or for discrete predictors, it can use values so it is the change from 0 to 1
#' (for example). The result is a MCMCglmmPredictedProbs
#' (of course a difference but still) object, so it can be summarized using
#' the \code{MCMCglmmPredictedProbs} \code{summary} method.
#' This gives average marginal recycled predicted probabilities,
#' as well as highest posterior density intervals.
#'
#' @param object A \code{MCMCglmm} model object to use for recycled predictions.
#' @param index An integer indicating the column of the fixed effects design matrix, X,
#'   to vary.  Defaults to 2L.
#' @param twiddle A twiddle value for continuous variables. Needs to be small enough
#'   for the scale of the predictor that a twiddle change is a reasonable approximation
#'   of taking the first derivative at a point. That is, a very small change.
#'   If missing, reverts to .01.
#' @param values Specific values to use for the varying predictor. These are primarily
#'   meant for discrete predictors rather than continuous ones.
#' @param \dots Passed on to \code{predict2}
#' @seealso \code{\link{summary.MCMCglmmPredictedProbs}}, \code{\link{predict2.MCMCglmm}}
#' @return A list of class \code{MCMCglmmPredictedProbs} that are the differences
#'   in predicted probabilities for a one unit change (calculated from the
#'   twiddle value or between the discrete values supplied in \code{values}).
#' @export
#' @examples
#' \dontrun{
#'   ## Make me!
#' }
recycler <- function(object, index = 2L, twiddle, values, ...) {
  L1 <- missing(twiddle)
  L2 <- missing(values)
  if (!L1 && !L2) stop("Please specify either a twiddle value or actual values, not both")
  X1 <- X2 <- object$X

  stopifnot(index %in% seq.int(ncol(X1)) || index %in% colnames(X1))

  if (L1 && L2) {
    twiddle <- .01
  }
  if (L2) {
    X2[, index] <- X2[, index] + twiddle
  } else {
    stopifnot(length(values) == 2L)
    X1[, index] <- values[1]
    X2[, index] <- values[2]
    twiddle <- diff(values)
  }

  p1 <- predict2(object, X = X1, use = "all", type = "response", ...)
  p2 <- predict2(object, X = X2, use = "all", type = "response", ...)


  pdelta <- lapply(seq_along(p2), function(i) {
    (p2[[i]] - p1[[i]])/twiddle
  })

  class(pdelta) <- c("list", "MCMCglmmPredictedProbs")

  return(pdelta)
}
JWiley/postMCMCglmm documentation built on May 7, 2019, 10:15 a.m.