R/viz_auc_svyglm.R
In svyCausalGLM: Survey-Weighted Modeling Utilities

Documented in viz_auc_svyglm

# --------------------------------------------------
#' Weighted ROC Curve for Survey-Weighted Models
#'
#' Produces a weighted ROC curve and reports weighted AUC
#' for survey-based models.
#' @param fit_object object obtain from logistic regression
#' @param title Character. Plot title.
#' @param line_color Character. ROC curve color.
#' @examples
#' set.seed(123)
#' n <- 100
#' dat <- data.frame(
#'   psu = sample(1:10, n, replace = TRUE),
#'   strata = sample(1:5, n, replace = TRUE),
#'   weight = runif(n, 0.5, 2),
#'   age = rnorm(n, 50, 10),
#'   sex = factor(sample(c("Male", "Female"), n, replace = TRUE)),
#'   exposure = rbinom(n, 1, 0.5)
#' )
#' dat$outcome <- rbinom(n, 1, plogis(-2 + 0.03*dat$age + 0.5*dat$exposure))
#' fit_example<-final_svyglm(dat, dep_var="outcome", covariates=c("age","sex"),
#'              id_var="psu", strata_var="strata", weight_var="weight")
#'viz_auc_svyglm(fit_object=fit_example)
#' @return A ggplot object.
#' @details
#' AUC is computed using, consistent with
#' complex survey weighting.
#'@importFrom survey svydesign svyglm
#' @importFrom ggplot2 ggplot aes geom_point geom_errorbar geom_vline
#' @importFrom stats complete.cases
#' @importFrom utils head tail
#' @export
# --------------------------------------------------
viz_auc_svyglm <- function(
    fit_object,
    title = "Weighted ROC Curve",
    line_color = "#0072B2"
) {

  # -----------------------------
  # Extract components
  # -----------------------------
  outcome   <- fit_object$outcome
  predicted <- fit_object$predictions
  weights   <- fit_object$final_weights

  if (!all(outcome %in% c(0, 1))) {
    stop("Outcome must be binary (0/1).", call. = FALSE)
  }

  df <- data.frame(outcome, predicted, weights)
  df <- df[complete.cases(df), ]

  # -----------------------------
  # Sort by predicted risk (descending)
  # -----------------------------
  df <- df[order(df$predicted, decreasing = TRUE), ]

  # -----------------------------
  # Weighted ROC components (Stata senspec)
  # -----------------------------
  cum_wt_pos <- cumsum(df$weights * df$outcome)
  cum_wt_neg <- cumsum(df$weights * (1 - df$outcome))

  total_pos <- sum(df$weights * df$outcome)
  total_neg <- sum(df$weights * (1 - df$outcome))

  sens <- cum_wt_pos / total_pos
  fpos <- cum_wt_neg / total_neg

  # -----------------------------
  # Trapezoidal AUC (integ sens fpos)
  # -----------------------------
  awauc <- sum(
    diff(fpos) * (head(sens, -1) + tail(sens, -1)) / 2
  )

  # -----------------------------
  # # SE and CI (Hanley-McNeil / Stata)
  # -----------------------------
  n1 <- total_pos
  n2 <- total_neg

  q1 <- awauc / (2 - awauc)
  q2 <- 2 * awauc^2 / (1 + awauc)
  awauc2 <- awauc^2

  se_awauc <- sqrt(
    (awauc * (1 - awauc) +
       (n1 - 1) * (q1 - awauc2) +
       (n2 - 1) * (q2 - awauc2)) / (n1 * n2)
  )

  ll_awauc <- awauc - 1.96 * se_awauc
  ul_awauc <- awauc + 1.96 * se_awauc

  # -----------------------------
  # ROC data for plotting
  # -----------------------------
  roc_df <- data.frame(
    FPR = fpos,
    TPR = sens
  )

  # -----------------------------
  # Plot
  # -----------------------------
  p <- ggplot2::ggplot(
    roc_df,
    ggplot2::aes(x = .data$FPR, y = .data$TPR)
  ) +
    ggplot2::geom_step(color = line_color, linewidth = 1.1) +
    ggplot2::geom_abline(
      slope = 1, intercept = 0,
      linetype = "dashed", color = "grey60"
    ) +
    ggplot2::coord_equal() +
    ggplot2::labs(
      title = title,
      subtitle = sprintf(
        "Weighted AUC = %.4f (95%% CI %.4f\u2013%.4f)",
        awauc, ll_awauc, ul_awauc
      ),
      x = "False Positive Rate",
      y = "True Positive Rate"
    ) +
    ggplot2::theme_minimal(base_size = 13)

  # Return both plot and statistics (useful for papers)
  list(
    plot = p,
    stats = c(
      AUC = awauc,
      SE = se_awauc,
      LCI = ll_awauc,
      UCI = ul_awauc
    )
  )
}