Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
if (!requireNamespace("mgcv", quietly = TRUE)) {
stop("Package 'mgcv' is required to run this vignette.
Please install it with install.packages('mgcv').")
}
## ----setup--------------------------------------------------------------------
library(RegCalReliab)
## -----------------------------------------------------------------------------
library(mgcv)
set.seed(123)
# Helper for measurement error
add_err = function(v, sd = sqrt(0.4)) v + rnorm(length(v), 0, sd)
# True coefficient (on log-odds scale) and OR
beta = log(1.5)
OR_true = 1.5
## -----------------------------------------------------------------------------
simulate_once = function() {
# ---- True covariates ----
x = mgcv::rmvn(3000, c(0,0), matrix(c(1,0.3,0.3,1), 2))
# Error-free covariates (W1 = continuous, W2 = binary)
W1_main = rnorm(1500)
W2_main = rbinom(1500, 1, 0.5)
W1_rep = rnorm(1500)
W2_rep = rbinom(1500, 1, 0.5)
# ---- Main study error-prone Z ----
z.main = x[1:1500, 1:2] + matrix(rnorm(1500*2, 0, sqrt(0.4)), 1500, 2)
colnames(z.main) = c("z1","z2")
# ---- External replicates for Z ----
z1_rep = rbind(
cbind(add_err(x[1501:2000, 1]), add_err(x[1501:2000, 1]), NA, NA),
cbind(add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), NA),
cbind(add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]))
)
colnames(z1_rep) = paste0("z1_", 1:4)
z2_rep = rbind(
cbind(add_err(x[1501:2000, 2]), add_err(x[1501:2000, 2]), NA, NA),
cbind(add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), NA),
cbind(add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]))
)
colnames(z2_rep) = paste0("z2_", 1:4)
# ---- Outcome ----
p = plogis(-2.3 + beta*rowSums(x[1:1500, ]) + 0.5*W1_main - 0.3*W2_main)
Y = rbinom(1500, 1, p)
main_data = data.frame(
Y = Y,
z1 = z.main[, "z1"],
z2 = z.main[, "z2"],
W1 = W1_main,
W2 = W2_main
)
rep_data = data.frame(z1_rep, z2_rep, W1 = W1_rep, W2 = W2_rep, check.names = FALSE)
# ---- Regression Calibration ----
res = RC_ExReliab(
formula = Y ~ z1(z1_1, z1_2, z1_3, z1_4) + z2(z2_1, z2_2, z2_3, z2_4) + W1 + W2,
main_data = main_data,
rep_data = rep_data,
link = "logistic"
)
return(res)
}
## -----------------------------------------------------------------------------
B = 100
results_list = replicate(B, simulate_once(), simplify = FALSE)
# Extract naive + corrected tables
naive_tabs = lapply(results_list, function(x) x$uncorrected)
corrected_tabs = lapply(results_list, function(x) x$corrected)
## -----------------------------------------------------------------------------
avg_naive = Reduce("+", naive_tabs) / B
avg_corrected = Reduce("+", corrected_tabs) / B
cat("\nAverage Naive Logistic Estimates (B = ", B, "):\n", sep = "")
print(round(avg_naive, 5))
cat("\nAverage Corrected Logistic Estimates (B = ", B, "):\n", sep = "")
print(round(avg_corrected, 5))
## -----------------------------------------------------------------------------
inside_ci = function(tab, i, truth = OR_true) {
ci = tab[i , c("CI.low", "CI.high")]
ci[1] <= truth && truth <= ci[2]
}
row_z1 = which(rownames(avg_naive) == "z1")
row_z2 = which(rownames(avg_naive) == "z2")
coverage = function(tab_list, row) {
mean(sapply(tab_list, function(tab) inside_ci(tab, row))) * 100
}
cov_z1_naive = coverage(naive_tabs, row_z1)
cov_z1_corr = coverage(corrected_tabs, row_z1)
cov_z2_naive = coverage(naive_tabs, row_z2)
cov_z2_corr = coverage(corrected_tabs, row_z2)
cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z1:\n")
cat(sprintf(" • Naive : %5.1f %%\n", cov_z1_naive))
cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z1_corr))
cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z2:\n")
cat(sprintf(" • Naive : %5.1f %%\n", cov_z2_naive))
cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z2_corr))
## -----------------------------------------------------------------------------
library(mgcv)
set.seed(123)
# Helper for measurement error
add_err = function(v, sd = sqrt(0.4)) v + rnorm(length(v), 0, sd)
# True slope and OR
beta = log(1.5)
OR_true = 1.5
## -----------------------------------------------------------------------------
simulate_once = function() {
# ---- True covariates ----
x = mgcv::rmvn(3000, c(0,0,0),
matrix(c(1,0.3,0.2,
0.3,1,0.5,
0.2,0.5,1), nrow = 3))
# Binary W2 depends on x1
w2 = sapply(x[,1], function(t) {
if (t > median(x[,1])) rbinom(1,1,0.5) else rbinom(1,1,0.3)
})
# Error-free covariates
W = cbind(x[,3], w2)
colnames(W) = c("W1", "W2")
# ---- Replicate design ----
r = c(rep(1,1500), rep(2,500), rep(3,400), rep(4,600))
# Replicates for z1
z1 = rbind(
cbind(add_err(x[1:1500, 1]), NA, NA, NA),
cbind(add_err(x[1501:2000, 1]), add_err(x[1501:2000, 1]), NA, NA),
cbind(add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), NA),
cbind(add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]))
)
colnames(z1) = paste0("z1_",1:4)
# Replicates for z2
z2 = rbind(
cbind(add_err(x[1:1500, 2]), NA, NA, NA),
cbind(add_err(x[1501:2000, 2]), add_err(x[1501:2000, 2]), NA, NA),
cbind(add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), NA),
cbind(add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]))
)
colnames(z2) = paste0("z2_",1:4)
# ---- Outcome ----
p = plogis(-2.65 + beta*rowSums(x[,1:3]) + beta*w2)
Y = rbinom(3000, 1, p)
# ---- Main data with outcome, replicates, covariates ----
main_data = data.frame(Y, z1, z2, W)
# ---- Regression calibration ----
res = RC_InReliab(
formula = Y ~ myz1(z1_1, z1_2, z1_3, z1_4) +
myz2(z2_1, z2_2, z2_3, z2_4) +
W1 + W2,
main_data = main_data,
link = "logistic"
)
return(res)
}
## -----------------------------------------------------------------------------
B = 100
results_list = replicate(B, simulate_once(), simplify = FALSE)
# Collect naïve + corrected tables
naive_tabs = lapply(results_list, function(x) x$uncorrected)
corrected_tabs = lapply(results_list, function(x) x$corrected)
## -----------------------------------------------------------------------------
avg_naive = Reduce("+", naive_tabs) / B
avg_corrected = Reduce("+", corrected_tabs) / B
cat("\nAverage Naive Logistic Estimates (B = ", B, "):\n", sep = "")
print(round(avg_naive, 5))
cat("\nAverage Corrected Logistic Estimates (B = ", B, "):\n", sep = "")
print(round(avg_corrected, 5))
## -----------------------------------------------------------------------------
inside_ci = function(tab, i, truth = OR_true) {
ci = tab[i , c("CI.low", "CI.high")]
ci[1] <= truth && truth <= ci[2]
}
row_z1 = which(rownames(avg_naive) == "myz1")
row_z2 = which(rownames(avg_naive) == "myz2")
coverage = function(tab_list, row) {
mean(sapply(tab_list, function(tab) inside_ci(tab, row))) * 100
}
cov_z1_naive = coverage(naive_tabs, row_z1)
cov_z1_corr = coverage(corrected_tabs, row_z1)
cov_z2_naive = coverage(naive_tabs, row_z2)
cov_z2_corr = coverage(corrected_tabs, row_z2)
cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z1:\n")
cat(sprintf(" • Naive : %5.1f %%\n", cov_z1_naive))
cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z1_corr))
cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z2:\n")
cat(sprintf(" • Naive : %5.1f %%\n", cov_z2_naive))
cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z2_corr))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.