Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5
)
## ----load-packages, message=FALSE---------------------------------------------
# Libraries
library(NlsyLinks)
library(discord)
library(utils)
library(tidyverse)
library(ggplot2)
# Set random seed for reproducibility
set.seed(1492)
# Disable scientific notation for clarity
options(scipen = 999)
conditions <- expand.grid(
total_pairs = c(100, 250, 500, 750, 1000),
relatedness = c(1, .5),
cov_a = c(0, 0.25),
cov_c = c(0, 0.25),
cov_e = c(0, 0.25),
ace_a = c(1),
ace_c = c(1),
ace_e = c(1)
)
## -----------------------------------------------------------------------------
set.seed(1492) # Set seed for reproducibility
n_trials <- 100
FAST <- FALSE # Set to FALSE for slower, more detailed analysis
results_list <- list()
name.results <- c("coef_xdiff", "p_xdiff", "r.squared")
for (cond in seq_along(conditions)) {
current <- conditions[cond, ]
temp_results <- matrix(NA, nrow = n_trials, ncol = length(name.results))
colnames(temp_results) <- name.results
for (i in 1:n_trials) {
trial <- kinsim(
r_vector = rep(current$relatedness, each = current$total_pairs),
npg_all = current$total_pairs,
ace_all = c(current$ace_a, current$ace_c, current$ace_e),
cov_a = current$cov_a,
cov_c = current$cov_c,
cov_e = current$cov_e,
variables = 2
)
extract <- data.frame(
id = trial$id, r = trial$r,
y_s1 = trial$y1_1, y_s2 = trial$y1_2,
x_s1 = trial$y2_1, x_s2 = trial$y2_2
)
if (FAST == TRUE) {
# faster
# double enter the data
extract2 <- rbind(
transform(extract,
y_s1 = y_s2, y_s2 = y_s1,
x_s1 = x_s2, x_s2 = x_s1
),
extract
)
extract2$y_diff <- extract2$y_s1 - extract2$y_s2
extract2$x_diff <- extract2$x_s1 - extract2$x_s2
extract2$x_bar <- (extract2$x_s1 + extract2$x_s2) / 2
extract2$y_bar <- (extract2$y_s1 + extract2$y_s2) / 2
# select pair with ydiff > 0
extract3 <- extract2[extract2$y_diff > 0, ]
fit <- tryCatch(
lm(y_diff ~ x_bar + y_bar + x_diff, data = extract3),
error = function(e) {
return(NULL)
}
)
}
# slower
if (FAST == FALSE) {
fit <- tryCatch(
discord_regression(
data = extract, outcome = "y", predictors = "x",
id = "id",
sex = NULL,
race = NULL,
fast = TRUE
),
error = function(e) {
return(NULL)
}
)
}
if (!is.null(fit)) {
sm <- summary(fit)
temp_results[i, "coef_xdiff"] <- coef(sm)["x_diff", "Estimate"]
temp_results[i, "p_xdiff"] <- coef(sm)["x_diff", "Pr(>|t|)"]
temp_results[i, "r.squared"] <- sm$r.squared
}
}
results_list[[cond]] <- as.data.frame(temp_results)
}
## ----summarize-power----------------------------------------------------------
power_summary <- lapply(results_list, function(res) {
data.frame(
power_xdiff = mean(res$p_xdiff < 0.05, na.rm = TRUE),
median_r2 = median(res$r.squared, na.rm = TRUE)
)
})
final_results <- cbind(conditions, do.call(rbind, power_summary))
## ----echo=FALSE, message=FALSE------------------------------------------------
# Define custom labels for each facet variable
facet_labels <- list(
cov_a = c("0" = "No Cov_A", "1" = "With Cov_A"),
cov_c = c("0" = "No Cov_C", "1" = "With Cov_C"),
cov_e = c("0" = "No Cov_E", "1" = "With Cov_E")
)
# Ensure factors with exact string levels
final_results$cov_a <- factor(final_results$cov_a,
levels = c(0, 0.25),
labels = c(
"No Covariate A",
"Covariate A (0.25)"
)
)
final_results$cov_c <- factor(final_results$cov_c,
levels = c(0, 0.25),
labels = c(
"No Covariate C",
"Covariate C (0.25)"
)
)
final_results$cov_e <- factor(final_results$cov_e,
levels = c(0, 0.25),
labels = c(
"No Covariate E",
"Covariate E (0.25)"
)
)
## ----plot-power, echo=FALSE---------------------------------------------------
final_results_noc <- final_results[final_results$cov_c == "No Covariate C", ]
p_noc <- ggplot(final_results_noc, aes(
x = total_pairs, y = power_xdiff,
fill = factor(relatedness),
color = factor(relatedness)
)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Power Analysis for Discordant Sibling Designs",
x = "Total Pairs",
y = "Power (p < 0.05)",
color = "Relatedness",
fill = "Relatedness"
) +
theme_minimal() +
geom_text(
x = 600, y = 0.55, label = "False Positive Rate",
data = data.frame(cov_a = "No Covariate A", cov_e = "No Covariate E"),
fontface = "italic", size = 3.5, inherit.aes = FALSE
) +
geom_text(
x = 600, y = 0.85, label = "Genetic Confounding",
data = data.frame(cov_a = "Covariate A (0.25)", cov_e = "No Covariate E"),
fontface = "italic", size = 3.5, inherit.aes = FALSE
) +
facet_grid(cov_e ~ cov_a, labeller = labeller(facet_labels))
p_noc
final_results_noa <- final_results[final_results$cov_a == "No Covariate A", ]
p_noa <- ggplot(final_results_noa, aes(
x = total_pairs, y = power_xdiff,
fill = factor(relatedness),
color = factor(relatedness)
)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Power Analysis for Discordant Sibling Designs",
x = "Total Pairs",
y = "Power (p < 0.05)",
color = "Relatedness",
fill = "Relatedness"
) +
theme_minimal() +
geom_text(
x = 600, y = 0.55, label = "False Positive Rate",
data = data.frame(cov_c = "No Covariate C", cov_e = "No Covariate E"),
fontface = "italic", size = 3.5, inherit.aes = FALSE
) +
geom_text(
x = 600, y = 0.85, label = "Shared-Environmental Confounding",
data = data.frame(cov_c = "Covariate C (0.25)", cov_e = "No Covariate E"),
fontface = "italic", size = 3.5, inherit.aes = FALSE
) +
facet_grid(cov_e ~ cov_c, labeller = labeller(facet_labels))
p_noa
## ----echo=FALSE, message=FALSE------------------------------------------------
knitr::kable(final_results)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.