R/final_svyglm.R

Defines functions final_svyglm

Documented in final_svyglm

# --------------------------------------------------
#' Final Survey-Weighted GLM
#'
#' Fits a survey-weighted logistic regression model (quasibinomial) using raw survey variables.
#' Returns ORs, confidence intervals, p-values, and model discrimination statistics.
#'
#' @param data A data frame containing the survey data.
#' @param dep_var Character. Name of the binary outcome variable (0/1).
#' @param covariates Character vector of covariate names to adjust for.
#' @param id_var Character. Name of the primary sampling unit variable.
#' @param strata_var Character. Name of the stratification variable.
#' @param weight_var Character. Name of the survey weight variable.
#' @param family Character. Currently supports only `"binomial"`.
#' @param level Numeric. Confidence level for intervals (default = 0.95).
#' @param interaction_terms Optional character vector of interaction terms.
#' @examples
#' set.seed(123)
#' n <- 100
#' dat <- data.frame(
#'   psu = sample(1:10, n, replace = TRUE),
#'   strata = sample(1:5, n, replace = TRUE),
#'   weight = runif(n, 0.5, 2),
#'   age = rnorm(n, 50, 10),
#'   sex = factor(sample(c("Male", "Female"), n, replace = TRUE)),
#'   exposure = rbinom(n, 1, 0.5)
#' )
#' dat$outcome <- rbinom(n, 1, plogis(-2 + 0.03*dat$age + 0.5*dat$exposure))
#' fit_simple<-final_svyglm(dat, dep_var="outcome", covariates=c("age","sex"),
#'              id_var="psu", strata_var="strata", weight_var="weight")
#'fit_simple$OR_table
#' @return A list containing:
#' \itemize{
#'   \item \code{model}: Survey-weighted logistic regression model.
#'   \item \code{results_table}: Odds ratios with confidence intervals and p-values.
#'   \item \code{AUC}: Survey-weighted AUC (Somers' C).
#'   \item \code{data}: Input data with predicted probabilities.
#'   \item \code{design}: Survey design object.
#' }
#'
#' @importFrom survey svydesign svyglm  degf
#' @importFrom stats as.formula binomial complete.cases quasibinomial reformulate
#' @importFrom stats coef confint fitted glm lm plogis predict residuals sd
#' @export
# --------------------------------------------------
final_svyglm <- function(data,
                         dep_var,
                         covariates,
                         id_var,
                         strata_var,
                         weight_var,
                         family = "binomial",
                         level = 0.95,
                         interaction_terms = NULL) {

  if (family != "binomial") {
    stop("Only 'binomial' family is currently supported.", call. = FALSE)
  }

  # 1. Clean data: svyglm will fail/drop rows with NAs
  keep_vars <- c(dep_var, covariates, id_var, strata_var, weight_var)
  data <- data[stats::complete.cases(data[, keep_vars]), ]

  # Build model formula
  rhs <- covariates
  if (!is.null(interaction_terms)) rhs <- c(rhs, interaction_terms)
  formula_model <- stats::reformulate(rhs, dep_var)

  # Survey design
  svy_design <- survey::svydesign(
    id = stats::as.formula(paste0("~", id_var)),
    strata = stats::as.formula(paste0("~", strata_var)),
    weights = stats::as.formula(paste0("~", weight_var)),
    data = data,
    nest = TRUE
  )

  # Fit model
  fit <- survey::svyglm(
    formula_model,
    design = svy_design,
    family = stats::quasibinomial()
  )

  # Odds ratio table
  ci <- stats::confint(fit, level = level, df = survey::degf(fit))
  OR_table <- data.frame(
    Variable = names(stats::coef(fit)),
    OR = exp(stats::coef(fit)),
    CI_low = exp(ci[, 1]),
    CI_high = exp(ci[, 2]),
    p_value = summary(fit)$coefficients[, "Pr(>|t|)"],
    row.names = NULL
  )

  # 2. Extract final data used in model to avoid NULLs
  # Use model.response for the outcome and weights() for design
  final_outcome <- as.numeric(stats::model.response(stats::model.frame(fit)))
  pred_vals <- as.numeric(stats::predict(fit, type = "response"))

  # IMPORTANT: weights(svy_design) returns the vector; 'type' is for replicate weights
  final_wts <- as.numeric(stats::weights(fit$survey.design))

  out <- list(
    model = fit,
    OR_table = OR_table,
    outcome = final_outcome,
    predictions = pred_vals,
    final_weights = final_wts,
    design = svy_design
  )
  class(out) <- "svyCausal"
  return(out) # Removed invisible() for easier debugging
}



#devtools::document()

Try the svyCausalGLM package in your browser

Any scripts or data that you put into this service are public.

svyCausalGLM documentation built on March 3, 2026, 5:08 p.m.