Nothing
# ========================================
# final_prog_svyglm: Prognostic-weighted survey GLM
# ========================================
#' Prognostic-weighted survey GLM
#' Prognostic-weighted survey GLM
#'
#' Fits a survey-weighted logistic regression model using stabilized
#' prognostic score weights derived from a model predicting the outcome
#' conditional on baseline covariates and excluding the exposure effect.
#' The function supports design-based inference under complex survey
#' sampling while adjusting for confounding through prognostic weighting
#' @param data Data frame
#' @param dep_var Character; binary outcome
#' @param exposure Character; exposure variable
#' @param covariates Character vector; adjustment variables
#' @param id_var Character; PSU
#' @param strata_var Character; strata
#' @param weight_var Character; survey weight
#' @param outcome_covariates Character vector; optional covariates for final model
#' @param level Numeric; CI level
#' @param ... Additional args to svyglm
#' @examples
#' set.seed(123)
#' n <- 1000
#' dat <- data.frame(
#' psu = sample(1:10, n, replace = TRUE),
#' strata = sample(1:5, n, replace = TRUE),
#' weight = runif(n, 0.5, 2),
#' age = rnorm(n, 50, 10),
#' sex = factor(sample(c("Male", "Female"), n, replace = TRUE)),
#' exposure = rbinom(n, 1, 0.5)
#' )
#' dat$outcome <- rbinom(n, 1, plogis(-2 + 0.03*dat$age + 0.5*dat$exposure))
#'fit<-final_prog_svyglm(data = dat,
#' dep_var = "outcome",
#' exposure="exposure",
#' covariates = c("age", "sex"),
#' id_var = "psu",
#' strata_var = "strata",
#' weight_var = "weight",
#' level = 0.95
#' )
#' names(fit)
#' fit$OR_table
#' @return A list with:
#' \itemize{
#' \item \code{prog_model}: Prognostic svyglm.
#' \item \code{final_model}: Weighted outcome svyglm.
#' \item \code{OR_table}: Odds ratios with CI.
#' \item \code{AUC}: Weighted AUC.
#' \item \code{data}: Data with prognostic weights.
#' }
#' @importFrom survey svydesign svyglm
#' @importFrom stats coef confint predict reformulate quasibinomial as.formula
#' @export
#--------------------------------------------------------------------------
final_prog_svyglm <- function(
data,
dep_var,
exposure,
covariates,
id_var,
strata_var,
weight_var,
outcome_covariates = NULL,
level = 0.95,
...
) {
# ---- Safety Check: Remove rows with missing critical variables ----
required_vars <- c(dep_var, exposure, covariates, outcome_covariates)
required_vars <- required_vars[!sapply(required_vars, is.null)] # exclude NULL
missing_rows <- !complete.cases(data[, required_vars, drop = FALSE])
if (any(missing_rows)) {
warning(sprintf("Removing %d rows with missing values in outcome, exposure, or covariates", sum(missing_rows)))
data <- data[!missing_rows, , drop = FALSE]
}
# Prognostic model
prog_formula <- stats::reformulate(c(exposure, covariates), response=dep_var)
design0 <- survey::svydesign(
id = stats::as.formula(paste0("~", id_var)),
strata = stats::as.formula(paste0("~", strata_var)),
weights =stats::as.formula(paste0("~", weight_var)),
data = data,
nest = TRUE
)
prog_fit <- survey::svyglm(prog_formula, design=design0, family=stats::quasibinomial(), ...)
# Linear predictor minus exposure
lp0 <-as.numeric(stats::predict(prog_fit, type="link"))
coef_fit <- stats::coef(prog_fit)
lp_adj <- lp0
if(is.factor(data[[exposure]])) {
ref <- levels(data[[exposure]])[1]
for(lv in levels(data[[exposure]])) {
if(lv==ref) next
coef_name <- paste0(exposure, lv)
if(coef_name %in% names(coef_fit)) {
lp_adj[data[[exposure]]==lv] <- lp_adj[data[[exposure]]==lv] - coef_fit[coef_name]
}
}
} else {
if(exposure %in% names(coef_fit)) lp_adj <- lp_adj - coef_fit[exposure] * data[[exposure]]
}
eps <- 1e-6
data$var_pg_sw <- pmin(pmax(stats::plogis(lp_adj), eps), 1 - eps)
mean_y <- mean(data[[dep_var]], na.rm=TRUE)
data$ipgtwt_pg <- (data[[dep_var]] * mean_y / data$var_pg_sw) + ((1-data[[dep_var]])*(1-mean_y)/(1-data$var_pg_sw))
data$new_weight_pg <- data$ipgtwt_pg * data[[weight_var]]
design_pg <- survey::svydesign(
id = stats::as.formula(paste0("~", id_var)),
strata = stats::as.formula(paste0("~", strata_var)),
weights = ~new_weight_pg,
data = data,
nest = TRUE
)
# ---- Safety Check: Ensure outcome has both 0 and 1 ----
outcome_table <- table(data[[dep_var]])
if (length(outcome_table) < 2) {
stop(sprintf("Outcome '%s' does not contain both 0 and 1. Cannot fit logistic model.", dep_var))
}
if (any(outcome_table < 5)) {
warning(sprintf("Outcome '%s' has very few events (%d) or non-events (%d). Estimates may be unstable.",
dep_var, outcome_table[1], outcome_table[2]))
}
##########################################
# Final model
if(is.null(outcome_covariates)) {
final_form <- stats::reformulate(exposure, response=dep_var)
} else {
final_form <- stats::reformulate(c(exposure, outcome_covariates), response=dep_var)
}
final_fit <- survey::svyglm(final_form, design=design_pg, family=stats::quasibinomial(), ...)
# OR table
coef_fit <- stats::coef(final_fit)
ci <- stats::confint(final_fit, level=level)
OR_table <- data.frame(
Variable = names(coef_fit),
OR = exp(coef_fit),
CI_low = exp(ci[,1]),
CI_high = exp(ci[,2]),
p_value = summary(final_fit)$coefficients[, "Pr(>|t|)"],
row.names = NULL
)
# AUC
pred <- stats::predict(final_fit, type="response")
pred_class <-ifelse(pred >= 0.5, 1L, 0L)
out<-list(
prognostic_model = prog_fit,
final_model = final_fit,
OR_table = OR_table,
outcome = data[[dep_var]],
final_weights= as.numeric(data$new_weight_pg),
predictions=as.numeric(pred)
)
class(out) <- "svyCausal"
return(invisible(out))
}
#'@exportS3Method
# Add this small helper function OUTSIDE your main function
# This tells R: "If someone looks at this object, only show the OR Table"
print.svyCausal <- function(x, ...) {
cat("\n--- svyCausalGLM Results ---\n")
print(x$OR_table)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.