Storing and Analyzing Imputed Data with rbmiUtils

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

This vignette demonstrates how to:

This pattern enables reproducible workflows where imputation and analysis can be separated and revisited independently.

Statistical Context

This approach applies Rubin’s Rules for inference after multiple imputation:

We fit a model to each imputed dataset, dervive a response variable on the CHG score, extract marginal effects or other statistics of interest, and combine the results into a single inference using Rubin’s combining rules.


Step 1: Setup and Data Preparation

library(dplyr)
library(tidyr)
library(readr)
library(purrr)
library(rbmi)
library(beeca)
library(rbmiUtils)
set.seed(1974)
data("ADEFF")

ADEFF <- ADEFF %>%
  mutate(
    TRT = factor(TRT01P, levels = c("Placebo", "Drug A")),
    USUBJID = factor(USUBJID),
    AVISIT = factor(AVISIT)
  )

Step 2: Define Imputation Model

vars <- set_vars(
  subjid = "USUBJID",
  visit = "AVISIT",
  group = "TRT",
  outcome = "CHG",
  covariates = c("BASE", "STRATA", "REGION")
)
method <- method_bayes(
  n_samples = 100,
  control = control_bayes(warmup = 200, thin = 2)
)
dat <- ADEFF %>%
  select(USUBJID, STRATA, REGION, REGIONC, TRT, BASE, CHG, AVISIT)

draws_obj <- draws(data = dat, vars = vars, method = method)

impute_obj <- impute(draws_obj, references = c("Placebo" = "Placebo", "Drug A" = "Placebo"))

ADMI <- get_imputed_data(impute_obj)

Step 3: Add Responder Variables

ADMI <- ADMI %>%
  mutate(
    CRIT1FLN = ifelse(CHG > 3, 1, 0),
    CRIT1FL = ifelse(CRIT1FLN == 1, "Y", "N"),
    CRIT = "CHG > 3"
  )

Step 4: Continuous Endpoint Analysis (CHG)

ana_obj_ancova <- analyse_mi_data(
  data = ADMI,
  vars = vars,
  method = method,
  fun = ancova
)
pool_obj_ancova <- pool(ana_obj_ancova)
print(pool_obj_ancova)
tidy_pool_obj(pool_obj_ancova)

Step 5: Responder Endpoint Analysis (CRIT1FLN)

Define Analysis Function

gcomp_responder <- function(data, ...) {
  model <- glm(CRIT1FLN ~ TRT + BASE + STRATA + REGION, data = data, family = binomial)

  marginal_fit <- get_marginal_effect(
    model,
    trt = "TRT",
    method = "Ge",
    type = "HC0",
    contrast = "diff",
    reference = "Placebo"
  )

  res <- marginal_fit$marginal_results
  list(
    trt = list(
      est = res[res$STAT == "diff", "STATVAL"][[1]],
      se = res[res$STAT == "diff_se", "STATVAL"][[1]],
      df = NA
    )
  )
}

Define Variables and Run Analysis

vars_binary <- set_vars(
  subjid = "USUBJID",
  visit = "AVISIT",
  group = "TRT",
  outcome = "CRIT1FLN",
  covariates = c("BASE", "STRATA", "REGION")
)
ana_obj_prop <- analyse_mi_data(
  data = ADMI,
  vars = vars_binary,
  method = method,
  fun = gcomp_responder
)
pool_obj_prop <- pool(ana_obj_prop)
print(pool_obj_prop)

Final Notes



Try the rbmiUtils package in your browser

Any scripts or data that you put into this service are public.

rbmiUtils documentation built on June 8, 2025, 11:07 a.m.