R/predict.R
In pmrm: Progression Models for Repeated Measures

Documented in predict.pmrm_fit

#' @title Predict new outcomes
#' @export
#' @family predictions
#' @description Return the expected values, standard errors,
#'   and confidence intervals of new outcomes.
#' @return A `tibble` with one row for each row in the `data` argument and
#'   columns `"estimate"`, `"standard_error"`, `"lower"`, and `"upper"`.
#'   Columns `"lower"` and `"upper"` are lower and upper bounds of 2-sided
#'   confidence intervals on the means.
#'   (The confidence intervals are not actually truly prediction intervals
#'   because they do not include variability from residuals.)
#' @inheritParams summary.pmrm_fit
#' @param data A `tibble` or data frame with one row per patient visit.
#'   This is the new data for making predictions.
#'   It must have all the same columns as the original you fit with the model,
#'   except that the outcome column can be entirely absent.
#'   `object$data` is an example dataset that will work.
#'   It is just like the original data, except that rows with missing
#'   responses are removed, and the remaining rows are sorted
#'   by patient ID and categorical scheduled visit.
#' @param adjust `TRUE` or `FALSE`.
#'    `adjust = TRUE` returns estimates and inference for
#'    covariate-adjusted `mu_ij`
#'    values (defined in `vignette("models", package = "pmrm")`) for new data.
#'    `adjust = FALSE` instead returns inference on `mu_ij - W %*% gamma`,
#'    the non-covariate-adjusted predictions useful in plotting a
#'    continuous disease progression trajectory in [plot.pmrm_fit()].
#' @param confidence Numeric between 0 and 1, the confidence level
#'   to use in the 2-sided confidence intervals.
#' @examples
#'   set.seed(0L)
#'   simulation <- pmrm_simulate_decline_proportional(
#'     visit_times = seq_len(5L) - 1,
#'     gamma = c(1, 2)
#'   )
#'   fit <- pmrm_model_decline_proportional(
#'     data = simulation,
#'     outcome = "y",
#'     time = "t",
#'     patient = "patient",
#'     visit = "visit",
#'     arm = "arm",
#'     covariates = ~ w_1 + w_2
#'   )
#'   new_data <- pmrm_simulate_decline_proportional(
#'     patients = 1,
#'     visit_times = seq_len(5L) - 1,
#'     gamma = c(1, 2)
#'   )
#'   new_data$y <- NULL # Permitted but not strictly necessary.
#'   predict(fit, new_data)
predict.pmrm_fit <- function(
  object,
  data = object$data,
  adjust = TRUE,
  confidence = 0.95,
  ...
) {
  assert(
    adjust,
    isTRUE(.) || isFALSE(.),
    message = "adjust must be TRUE or FALSE."
  )
  assert(
    confidence,
    is.numeric(.),
    length(.) == 1L,
    is.finite(.),
    . >= 0,
    . <= 1,
    message = "confidence must have length 1 and be between 0 and 1."
  )
  data$.pmrm_original_row_order <- seq_len(nrow(data))
  data_new <- pmrm_predict_data_new(object, data)
  original_order <- data_new$.pmrm_original_row_order
  data_new$.pmrm_original_row_order <- NULL
  data_old <- object$data
  data_combined <- pmrm_predict_data_combined(object, data_new, data_old)
  constants <- pmrm_constants(
    data = data_combined,
    visit_times = object$constants$visit_times,
    spline_knots = object$constants$spline_knots,
    spline_method = object$constants$spline_method,
    reml = FALSE,
    hessian = object$constants$hessian,
    slowing = object$constants$slowing,
    proportional = object$constants$proportional,
    predict = TRUE,
    adjust = adjust
  )
  constants$W_column_means <- object$constants$W_column_means
  parameters <- object$optimization$par
  model <- RTMB::MakeADFun(
    func = function(parameters) object$objective(constants, parameters),
    parameters = object$initial,
    random = object$constants$random,
    silent = object$options$silent
  )
  hessian <- stats::optimHess(par = parameters, fn = model$fn, gr = model$gr)
  report <- RTMB::sdreport(
    obj = model,
    par.fixed = parameters,
    hessian.fixed = hessian,
    getReportCovariance = FALSE
  )
  summary <- summary(report)
  name <- ifelse(adjust, "mu", "mu_unadjusted")
  summary <- tibble::as_tibble(summary[rownames(summary) == name, ])
  summary <- summary[-seq_len(nrow(data_old)), , drop = FALSE] # nolint
  z <- stats::qnorm(p = (1 - confidence) / 2, lower.tail = FALSE)
  labels <- pmrm_data_labels(object$data)
  out <- tibble::tibble(
    patient = data_new[[labels$patient]],
    arm = data_new[[labels$arm]],
    visit = data_new[[labels$visit]],
    time = data_new[[labels$time]],
    estimate = summary[["Estimate"]],
    standard_error = summary[["Std. Error"]],
    lower = estimate - z * standard_error,
    upper = estimate + z * standard_error
  )
  out[order(original_order), , drop = FALSE]
}

pmrm_predict_data_combined <- function(object, data_new, data_old) {
  labels <- pmrm_data_labels(object$data)
  patient <- labels$patient
  data_new[[patient]] <- paste0("new_", data_new[[patient]])
  data_old[[patient]] <- paste0("old_", data_old[[patient]])
  data_combined <- dplyr::bind_rows(data_old, data_new)
  data_combined[[patient]] <- factor(
    data_combined[[patient]],
    levels = unique(data_combined[[patient]])
  )
  data_combined
}

pmrm_predict_data_new <- function(object, data) {
  labels <- pmrm_data_labels(object$data)
  data[[labels$outcome]] <- 0
  data <- pmrm_data(
    data = data,
    outcome = labels$outcome,
    time = labels$time,
    patient = labels$patient,
    visit = labels$visit,
    arm = labels$arm,
    covariates = labels$covariates,
    subset = TRUE
  )
  data[[labels$outcome]] <- NA_real_
  assert(
    all(levels(data[[labels$visit]]) %in% levels(object$data[[labels$visit]])),
    message = paste(
      "visit levels in new data must be a subset",
      "of visit levels in original data."
    )
  )
  assert(
    all(levels(data[[labels$arm]]) %in% levels(object$data[[labels$arm]])),
    message = paste(
      "arm levels in new data must be a subset",
      "of arm levels in original data."
    )
  )
  levels(data[[labels$visit]]) <- levels(object$data[[labels$visit]])
  levels(data[[labels$arm]]) <- levels(object$data[[labels$arm]])
  data
}

#' @export
stats::predict