Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, message = FALSE---------------------------------------------------
library(multibias)
library(dplyr)
## -----------------------------------------------------------------------------
nhanes <- read.csv("nhanes.csv")
## -----------------------------------------------------------------------------
nhanes_filtered <- nhanes |>
mutate(alcohol_extreme = case_when(
alcohol_day_total > 14 * 1.5 & gender_female == 1 ~ 1,
alcohol_day_total > 28 * 1.5 & gender_female == 0 ~ 1,
TRUE ~ 0
)) |>
filter(age >= 18) |>
filter(eligstat == 1) |>
filter(alcohol_12mo > 0)
## -----------------------------------------------------------------------------
exposure_outcome_table <- nhanes_filtered |>
group_by(alcohol_extreme, mortstat) |>
summarize(count = n()) |>
ungroup() |>
mutate(proportion = count / sum(count))
# Distribution of Exposure and Outcome:
print(exposure_outcome_table)
demographic_table <- nhanes_filtered |>
group_by(alcohol_extreme) |>
summarize(
n = n(),
age_mean = mean(age),
age_sd = sd(age),
gender_prop = mean(gender_female)
) |>
mutate(
alcohol_extreme = if_else(
alcohol_extreme == 1, "Extreme", "Non-extreme"
)
)
# Demographic Characteristics by Exposure Status:
print(demographic_table)
## -----------------------------------------------------------------------------
base_mod <- glm(
mortstat ~ alcohol_extreme + gender_female + age,
data = nhanes_filtered,
family = binomial(link = "logit")
)
base_results <- data.frame(
term = c("Extreme Alcohol Use", "Female Gender", "Age"),
estimate = round(exp(coef(base_mod)), 2)[-1],
ci_lower = round(exp(confint(base_mod)), 2)[-1, 1],
ci_upper = round(exp(confint(base_mod)), 2)[-1, 2]
)
# Base Model Results: Odds Ratios and 95% Confidence Intervals
print(base_results)
## -----------------------------------------------------------------------------
# Create observed data object with uncontrolled confounding bias
df_obs1 <- data_observed(
data = nhanes_filtered,
bias = "uc",
exposure = "alcohol_extreme",
outcome = "mortstat",
confounders = c("age", "gender_female")
)
# Create validation data with smoking information
df_temp1 <- nhanes_filtered |>
filter(!is.na(smoked_100cigs))
df_val1 <- data_validation(
data = df_temp1,
true_exposure = "alcohol_extreme",
true_outcome = "mortstat",
confounders = c("age", "gender_female", "smoked_100cigs")
)
# Perform bias adjustment
set.seed(1234)
uc_adjusted <- multibias_adjust(df_obs1, df_val1)
# Uncontrolled Confounding Adjusted Results:
print(uc_adjusted)
## -----------------------------------------------------------------------------
# Create observed data object with both biases
df_obs2 <- data_observed(
data = nhanes_filtered,
bias = c("em", "uc"),
exposure = "alcohol_extreme",
outcome = "mortstat",
confounders = c("age", "gender_female")
)
# Create validation data with adjusted alcohol consumption
df_temp2 <- nhanes_filtered |>
filter(!is.na(smoked_100cigs)) |>
mutate(
# Assume 50% higher consumption for high SES individuals
alcohol_adj = if_else(
income == "$100,000 and Over" | education == "College graduate or above",
alcohol_day_total * 1.5,
alcohol_day_total
)
) |>
mutate(alcohol_extreme_adj = case_when(
alcohol_adj > 14 * 1.5 & gender_female == 1 ~ 1,
alcohol_adj > 28 * 1.5 & gender_female == 0 ~ 1,
TRUE ~ 0
))
df_val2 <- data_validation(
data = df_temp2,
true_exposure = "alcohol_extreme_adj",
true_outcome = "mortstat",
confounders = c("age", "gender_female", "smoked_100cigs"),
misclassified_exposure = "alcohol_extreme"
)
# Perform bias adjustment
set.seed(1234)
uc_em_adjusted <- multibias_adjust(df_obs2, df_val2)
# Exposure Misclassification & Confounding Adjusted Results:
print(uc_em_adjusted)
## -----------------------------------------------------------------------------
# Create observed data object with all three biases
df_obs3 <- data_observed(
data = nhanes_filtered,
bias = c("em", "uc", "sel"),
exposure = "alcohol_extreme",
outcome = "mortstat",
confounders = c("age", "gender_female")
)
# Prepare data for selection bias adjustment
df_temp3a <- nhanes_filtered |>
filter(!is.na(smoked_100cigs)) |>
mutate(
alcohol_adj = if_else(
income == "$100,000 and Over" | education == "College graduate or above",
alcohol_day_total * 1.5,
alcohol_day_total
),
alcohol_extreme_adj = case_when(
alcohol_adj > 14 * 1.5 & gender_female == 1 ~ 1,
alcohol_adj > 28 * 1.5 & gender_female == 0 ~ 1,
TRUE ~ 0
),
# Combine survey weights
weight = if_else(weight_day2 == 0, weight_day1, weight_day2)
)
# Create selection indicator based on sampling weights
set.seed(1234)
selected_sample <- sample(
x = df_temp3a$seqn,
size = nrow(df_temp3a),
replace = TRUE,
prob = df_temp3a$weight
)
df_selected_sample <- data.frame()
for (id in selected_sample) {
df_selected_sample <- rbind(
df_selected_sample,
df_temp3a[df_temp3a$seqn == id, ]
)
}
df_selected_sample$selection <- 1
df_not_selected_sample <- df_temp3a[!(df_temp3a$seqn %in% selected_sample), ]
df_not_selected_sample$selection <- 0
df_temp3b <- rbind(df_selected_sample, df_not_selected_sample)
# Create validation data with selection information
df_val3 <- data_validation(
data = df_temp3b,
true_exposure = "alcohol_extreme_adj",
true_outcome = "mortstat",
confounders = c("age", "gender_female", "smoked_100cigs"),
misclassified_exposure = "alcohol_extreme",
selection = "selection"
)
# Perform final bias adjustment
set.seed(1234)
final_adjusted <- multibias_adjust(df_obs3, df_val3)
# Triple Bias Adjusted Results:
print(final_adjusted)
## -----------------------------------------------------------------------------
# Create comparison table
comparison_table <- data.frame(
Model = c(
"Base Model",
"Adjusted - Single Bias",
"Adjusted - Double Biases",
"Adjusted - Triple Biases"
),
OR = c(
round(exp(coef(base_mod))["alcohol_extreme"], 2),
round(uc_adjusted$estimate, 2),
round(uc_em_adjusted$estimate, 2),
round(final_adjusted$estimate, 2)
),
CI_lower = c(
round(exp(confint(base_mod))["alcohol_extreme", 1], 2),
round(uc_adjusted$ci[1], 2),
round(uc_em_adjusted$ci[1], 2),
round(final_adjusted$ci[1], 2)
),
CI_upper = c(
round(exp(confint(base_mod))["alcohol_extreme", 2], 2),
round(uc_adjusted$ci[2], 2),
round(uc_em_adjusted$ci[2], 2),
round(final_adjusted$ci[2], 2)
)
)
# Comparison of Bias-Adjusted Estimates:
print(comparison_table)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.