library(sreg) library(dplyr) library(tidyr) library(extraDistr) library(tidyverse) rm(list = ls()) source("/Users/trifonovjuri/Desktop/sreg/R/2.0(dev)/fun_new.r")
Here we provide the results for the case with multiple treatments but without clusters (one covariate so far).
nsim <- 2000 tau_vec <- c(0.5, 1.5) n.treat <- 2 k <- 3 n <- 1200 treat_sizes <- c(1, 1, 1) theta_hat_mtrx <- matrix(ncol = n.treat, nrow = nsim) v_hat_mtrx <- matrix(ncol = n.treat, nrow = nsim) hit <- matrix(ncol = n.treat, nrow = nsim) for (s in 1:nsim) { data_raw <- dgp_po(n = n, theta.vec = tau_vec, n.treat = n.treat, gamma.vec = c(0.4, 0.2, 1)) # Checked that now it returns several X's data_df <- run_experiment_multi(data_raw, n.treat = n.treat, k = k, treat_sizes = treat_sizes) # 1 gets treatment 0, 1 gets treatment 1, 1 gets treatment 2 in each triplet data_full <- data.frame("Y" = data_df$y, "D" = data_df$A, "X" = data_df$X, "S" = data_df$block) #data_full <- data.frame("Y" = data_df$y, "D" = data_df$A, "X" = data.frame(data_df$X, "S" = data_df$block) N <- nrow(data_full) theta_hat_vec <- theta_hat_mult(data_full$Y, data_full$D, data_full$X, data_full$S)$tau.hat v_hat_vec <- var_hat_mult(data_full) / N theta_hat_mtrx[s, ] <- theta_hat_vec v_hat_mtrx[s, ] <- v_hat_vec upper <- theta_hat_vec + qnorm(0.975) * sqrt(v_hat_vec) lower <- theta_hat_vec - qnorm(0.975) * sqrt(v_hat_vec) hit[s, ] <- (tau_vec < upper) & (tau_vec > lower) print(s) } result_table <- data.frame( `True tau` = tau_vec, `E[tau_n]` = round(colMeans(theta_hat_mtrx), 4), `True Variance` = round(colMeans(v_hat_mtrx), 4), `E[V_n]` = round(apply(theta_hat_mtrx, 2, var), 4), `Coverage` = round(colMeans(hit), 4), check.names = FALSE ) print(result_table)
Here we provide the results for the case of cluster level treatment assignment with multiple treatments (one covariate so far).
n_treat <- 3 nsim <- 10000 tau_vec <- c(0.2, 1.2, 0.7) gamma_vec <- c(0.4, 0.2, 1) tau_hat_mtrx <- matrix(ncol = n_treat, nrow = nsim) var_hat_mtrx <- matrix(ncol = n_treat, nrow = nsim) hit_mtrx <- matrix(ncol = n_treat, nrow = nsim) G <- 500 Nmax <- 50 k <- 5 treat_sizes <- c(2, 1, 1, 1) # treatment 0 - p_1 individuals, treatment 1 - p_2 individuals, ... max.support <- Nmax / 10 - 1 # Ng <- rep(Nmax, G) Ng <- gen.cluster.sizes(G, max.support) for (s in 1:nsim) { data_pot <- dgp.po.creg(Ng = Ng, tau.vec = tau_vec, G = G, gamma.vec = gamma_vec, n.treat = n_treat) proba <- dgp.obs.creg(baseline = data_pot, n.treat = n_treat, k = k, treat_sizes = treat_sizes) data_cl <- data.frame("Y" = proba$Y, "D" = proba$D, proba$X, "S" = proba$S, "Ng" = proba$Ng, "G.id" = proba$G.id) tau_hat <- theta_hat_mult_cl_old(Y = data_cl$Y, D = data_cl$D, X = data_cl$x_1, S = data_cl$S, G.id = data_cl$G.id, Ng = data_cl$Ng) tau_hat_mtrx[s, ] <- tau_hat$tau.hat var_hat <- var_hat_mult_cl(Y = data_cl$Y, D = data_cl$D, X = data_cl$x_1, S = data_cl$S, G.id = data_cl$G.id, Ng = data_cl$Ng, tau.hat = tau_hat) / G var_hat_mtrx[s, ] <- var_hat upper <- tau_hat$tau.hat + qnorm(0.975) * sqrt(var_hat) lower <- tau_hat$tau.hat - qnorm(0.975) * sqrt(var_hat) hit_mtrx[s, ] <- (tau_vec < upper) & (tau_vec > lower) print(s) } result_table <- data.frame( `True tau` = tau_vec, `E[tau_n]` = round(colMeans(tau_hat_mtrx), 4), `E[V_n]` = round(colMeans(var_hat_mtrx), 4), `True Variance` = round(apply(tau_hat_mtrx, 2, var), 4), `Coverage` = round(colMeans(hit_mtrx), 4), check.names = FALSE ) print(result_table)
tau_vec <- c(0.5, 1.5) n.treat <- 2 k <- 3 n <- 900 treat_sizes <- c(1, 1, 1) datim <- sreg.rgen(n = 1200, cluster = TRUE, small.strata = TRUE, tau.vec = c(1.2, 0.8)) X <- data.frame(datim$x_1, datim$x_2, datim$Ng) sreg(Y = datim$Y, S = datim$S, D = datim$D, G.id = datim$G.id, Ng = datim$Ng, X = X, HC1 = TRUE, small.strata = TRUE) data_raw <- dgp_po(n = n, theta.vec = tau_vec, n.treat = n.treat, gamma.vec = c(0.4, 0.2, 1)) # Checked that now it returns several X's data_df <- run_experiment_multi(data_raw, n.treat = n.treat, k = k, treat_sizes = treat_sizes) # 1 gets treatment 0, 1 gets treatment 1, 1 gets treatment 2 in each triplet Y <- data_df$y D <- data_df$A X <- data.frame("x_1" = data_df$x_1, "x_2" = data_df$x_2)#, 'bor' = rnorm(24, 2, 1), 'god' = rnorm(24, 3, 1), 'bro'= rnorm(24, 3, 1), 'bro1'= rnorm(24, 3, 1), 'bro2'= rnorm(24, 3, 1)) S <- data_df$block data_full <- data.frame(Y, D, X, S) N <- nrow(data_full) theta_hat_vec <- theta_hat_mult(Y, D, X, S)#$tau.hat var_hat_vec <- var_hat_mult(Y, D, X, S, theta_hat_vec) / N # Using the aggregated master functions to test: tau_hat <- tau.hat.sreg.ss(Y, D, X, S) var_hat <- as.var.sreg.ss(Y, D, X, S, fit = tau_hat) / N var_hat <- as.var.sreg.ss(Y, D, X, S, fit = tau_hat, HC1 = TRUE) / N sqrt(var_hat) tau_hat <- tau.hat.sreg.ss(Y, D, X = NULL, S) var_hat <- as.var.sreg.ss(Y, D, X = NULL, S, fit = NULL, HC1 = TRUE) / N X <- data.frame("age" = data_df$x_1)#, "x_2" = data_df$x_2)#, 'bor' = rnorm(24, 2, 1), 'god' = rnorm(24, 3, 1), 'bro'= rnorm(24, 3, 1), 'bro1'= rnorm(24, 3, 1), 'bro2'= rnorm(24, 3, 1), 'bro3' = rnorm(24, 5, 2)) agg_result <- res.sreg.ss(Y, S, D, X, HC1 = TRUE) agg_result$data any(sapply(agg_result$beta.hat, function(x) any(is.na(x)))) any(sapply(agg_result$se.rob, function(x) any(is.infinite(x)))) abju <- sreg(Y, S = S, D, G.id = NULL, Ng = NULL, X = NULL, HC1 = TRUE, small.strata = TRUE) abju$lin.adj nsim <- 1000 n <- 1200 k <- 3 theta_vec <- c(0.5, 1.5) n.treat <- 2 treat_sizes <- c(1, 1, 1) theta_hat_mtrx <- matrix(nrow = nsim, ncol = n.treat) var_hat_mtrx <- matrix(nrow = nsim, ncol = n.treat) hit_mtrx <- matrix(nrow = nsim, ncol = n.treat) for (s in 1:nsim) { data_raw <- dgp_po(n = n, theta.vec = theta_vec, n.treat = n.treat, gamma.vec = c(0.4, 0.2, 1)) # Checked that now it returns several X's data_df <- run_experiment_multi(data_raw, n.treat = n.treat, k = k, treat_sizes = treat_sizes) # 1 gets treatment 0, 1 gets treatment 1, 1 gets treatment 2 in each triplet Y <- data_df$y D <- data_df$A X <- data.frame("x_1" = data_df$x_1, "x_2" = data_df$x_2) S <- data_df$block data_full <- data.frame(Y, D, X, S) N <- nrow(data_full) fit <- theta_hat_mult(Y, D, X, S) theta_hat_vec <- fit$tau.hat var_hat_vec <- var_hat_mult(Y, D, X, S, fit) / N theta_hat_mtrx[s, ] <- theta_hat_vec var_hat_mtrx[s, ] <- var_hat_vec upper <- theta_hat_vec + qnorm(0.975) * sqrt(var_hat_vec) lower <- theta_hat_vec - qnorm(0.975) * sqrt(var_hat_vec) hit_mtrx[s, ] <- (theta_vec < upper) & (theta_vec > lower) print(s) } colMeans(theta_hat_mtrx) colMeans(var_hat_mtrx) apply(theta_hat_mtrx, 2, var) colMeans(hit_mtrx) data_full %>% group_by(S) %>% summarize( Y_treated = mean(Y[D == 1], na.rm = TRUE), Y_control = mean(Y[D == 0], na.rm = TRUE), X_treated = list(colMeans(X[D == 1, , drop = FALSE], na.rm = TRUE)), X_control = list(colMeans(X[D == 0, , drop = FALSE], na.rm = TRUE)), k = n(), l = sum(D == 1), q = sum(D == 0), .groups = "drop" ) agg_data_old <- data_full %>% group_by(S) %>% summarize( Y_treated = mean(Y[D == 1], na.rm = TRUE), Y_control = mean(Y[D == 0], na.rm = TRUE), X_treated = list(colMeans(X[D == 1, , drop = FALSE], na.rm = TRUE)), X_control = list(colMeans(X[D == 0, , drop = FALSE], na.rm = TRUE)), k = n(), l = sum(D == 1), q = sum(D == 0), .groups = "drop" ) # Turn list columns into two separate columns per covariate agg_data_old <- agg_data_old %>% mutate( X_treated_1 = map_dbl(X_treated, 1), X_treated_2 = map_dbl(X_treated, 2), X_control_1 = map_dbl(X_control, 1), X_control_2 = map_dbl(X_control, 2) ) # Now view the first few rows agg_data_old agg_data <- data_full %>% group_split(S) %>% map_dfr(~ { df <- . list( S = df$S[1], Y_treated = mean(df$Y[df$D == 1], na.rm = TRUE), Y_control = mean(df$Y[df$D == 0], na.rm = TRUE), X_treated = list(colMeans(df[df$D == 1, grep("^x_", names(df)), drop = FALSE])), X_control = list(colMeans(df[df$D == 0, grep("^x_", names(df)), drop = FALSE])), k = nrow(df), l = sum(df$D == 1), q = sum(df$D == 0) ) }) # Turn list columns into two separate columns per covariate agg_data <- agg_data %>% mutate( X_treated_1 = map_dbl(X_treated, 1), X_treated_2 = map_dbl(X_treated, 2), X_control_1 = map_dbl(X_control, 1), X_control_2 = map_dbl(X_control, 2) )
n_treat <- 3 nsim <- 10 tau_vec <- c(0.2, 1.2, 0.7) gamma_vec <- c(0.4, 0.2, 1) # n.treat = 2 k <- 5 treat_sizes <- c(2, 1, 1, 1) tau_hat_mtrx <- matrix(ncol = n_treat, nrow = nsim) tau_hat_mtrx_old <- matrix(ncol = n_treat, nrow = nsim) var_hat_mtrx <- matrix(ncol = n_treat, nrow = nsim) var_hat_old_mtrx <- matrix(ncol = n_treat, nrow = nsim) hit_mtrx <- matrix(ncol = n_treat, nrow = nsim) G <- 500 Nmax <- 50 # treat_sizes = c(2, 1, 1, 1) # treatment 0 - p_1 individuals, treatment 1 - p_2 individuals, ... max.support <- Nmax / 10 - 1 # Ng <- rep(Nmax, G) Ng <- gen.cluster.sizes(G, max.support) for (s in 1:nsim) { Ng <- gen.cluster.sizes(G, max.support) data_pot <- dgp.po.creg(Ng = Ng, tau.vec = tau_vec, G = G, gamma.vec = gamma_vec, n.treat = n_treat) proba <- dgp.obs.creg(baseline = data_pot, n.treat = n_treat, k = k, treat_sizes = treat_sizes) data_cl <- data.frame("Y" = proba$Y, "D" = proba$D, proba$X, "S" = proba$S, "Ng" = proba$Ng, "G.id" = proba$G.id) Y <- data_cl$Y D <- data_cl$D X <- data.frame("x_1" = data_cl$x_1, 'x_2' = data_cl$x_2) S <- data_cl$S G.id <- data_cl$G.id Ng <- data_cl$Ng data_full <- data.frame(Y, D, X, S, G.id, Ng) #X <- data.frame("x_1" = data_full$x_1, 'x_2' = data_full$x_2) tau_hat <- theta_hat_mult_cl(Y, D, X, S, G.id, Ng) var_hat <- var_hat_mult_cl(Y, D, X, S, G.id, Ng, fit = tau_hat) / G tau_hat_1 <- tau.hat.creg.ss(Y, D, X, S, G.id, Ng) var_hat_1 <- as.var.creg.ss(Y, D, X, S, G.id, Ng, tau_hat_1, HC1 = FALSE) / G tau_hat_1 <- tau.hat.creg.ss(Y, D, X = NULL, S, G.id, Ng) var_hat_1 <- as.var.creg.ss(Y, D, X = NULL, S, G.id, Ng) / G agg_result <- res.creg.ss(Y, S, D, G.id, Ng = NULL, X = X, HC1 = TRUE) tau_hat_1 = tau.hat.creg.ss(Y, D, X = X, S, G.id, Ng = NULL) as.var.creg.ss(Y, D, X, S, G.id, Ng = NULL, tau_hat_1, HC1 = FALSE) / G head(agg_result$lin.adj) !check.cluster(data.frame("G.id" = agg_result$data$G.id, agg_result$lin.adj)) X <- data.frame('Ng' = data_cl$Ng, 'x_2' = data_cl$x_2) result <- sreg(Y, S = S, D, G.id, Ng = NULL, X = NULL, HC1 = TRUE, small.strata = TRUE) result result$small.strata # tau_hat_old <- theta_hat_mult_cl_old(Y, D, X, S, G.id, Ng) # var_hat_old <- var_hat_mult_cl_old(Y, D, X, S, G.id, Ng, tau.hat = tau_hat_old) / G tau_hat_mtrx[s, ] <- tau_hat$tau.hat # tau_hat_mtrx_old[s, ] <- tau_hat_old$tau.hat var_hat_mtrx[s, ] <- var_hat # var_hat_old_mtrx[s, ] <- var_hat_old upper <- tau_hat$tau.hat + qnorm(0.975) * sqrt(var_hat) lower <- tau_hat$tau.hat - qnorm(0.975) * sqrt(var_hat) hit_mtrx[s, ] <- (tau_vec < upper) & (tau_vec > lower) # var_hat <- var_hat_mult_cl(Y = data_cl$Y, D = data_cl$D, X = data_cl$x_1, S = data_cl$S, G.id = data_cl$G.id, Ng = data_cl$Ng, tau.hat = tau_hat) / G # var_hat_mtrx[s, ] <- var_hat # upper <- tau_hat$tau.hat + qnorm(0.975) * sqrt(var_hat) # lower <- tau_hat$tau.hat - qnorm(0.975) * sqrt(var_hat) # hit_mtrx[s, ] <- (tau_vec < upper) & (tau_vec > lower) print(s) } colMeans(tau_hat_mtrx) colMeans(tau_hat_mtrx_old) apply(tau_hat_mtrx, 2, var) colMeans(var_hat_mtrx) apply(tau_hat_mtrx_old, 2, var) colMeans(hit_mtrx)
theta_hat_mult <- function(Y, D, X, S) { tau.hat <- numeric(max(D)) beta.hat <- matrix(ncol = ncol(X), nrow = max(D)) data_full <- data.frame(Y, D, X, S) for (d in 1:max(D)) { # Compute averages for treated and control within each stratum
agg_data <- data_full %>% group_split(S) %>% map_dfr(~{ df <- . list( S = df$S[1], Y_treated = mean(df$Y[df$D == d], na.rm = TRUE), Y_control = mean(df$Y[df$D == 0], na.rm = TRUE), X_treated = list(colMeans(df[df$D == d, grep("^x_", names(df)), drop = FALSE])), # mean value among treated in each stratum X_control = list(colMeans(df[df$D == 0, grep("^x_", names(df)), drop = FALSE])), # mean value among control in each stratum k = nrow(df), l = sum(df$D == d), q = sum(df$D == 0) ) })
Y_diff <- with(agg_data, Y_treated - Y_control)
X_diff_mat <- map2(agg_data$X_treated, agg_data$X_control, ~ .x - .y) %>% do.call(rbind, .)
data_decomp <- as.data.frame(agg_data)
X_treated_mat <- do.call(rbind, data_decomp$X_treated)
X_control_mat <- do.call(rbind, data_decomp$X_control)
lm_model <- lm(Y_diff ~ ., data = as.data.frame(cbind(Y_diff, X_diff_mat)))
beta_hat <- unname(lm_model$coefficients[-1])
X_mat <- as.matrix(data_full[, grepl("^x_", names(data_full))]) print(X_mat) X_bar <- colMeans(X_mat) X_dem <- sweep(X_mat, 2, X_bar)
theta_hat_adj <- sum((data_full$Y * (data_full$D == d))) / sum((data_full$D == d)) - sum((data_full$Y) * (data_full$D == 0)) / sum(data_full$D == 0) - as.numeric(t(colMeans(X_dem[data_full$D == d, , drop = FALSE]) - colMeans(X_dem[data_full$D == 0, , drop = FALSE])) %*% beta_hat) tau.hat[d] <- theta_hat_adj beta.hat[d, ] <- beta_hat }
ret_list <- list( tau.hat = tau.hat, beta.hat = beta.hat ) return(ret_list) }
theta_hat_mult_cl_old <- function(Y, D, X, S, G.id, Ng)
{
tau.hat <- numeric(max(D))
beta.hat <- numeric(max(D))
working.df <- data.frame(Y, S, D, G.id, Ng, X) Y.bar.g <- aggregate(Y ~ G.id, working.df, mean) #print(Y.bar.g) cl.lvl.data <- unique(working.df[, c("G.id", "D", "S", "Ng", names(working.df)[6:ncol(working.df)])]) cl.lvl.data <- data.frame("Y.bar" = Y.bar.g$Y, cl.lvl.data) data <- cl.lvl.data N.bar.G <- mean(data$Ng) #??? Why is this so weird? #N.bar.G <- Ng #print(N.bar.g) #print(data) #print(head(working.df, 200)) #print(data) for (d in 1:max(D)) { # Compute averages for treated and control within each stratum agg_data <- data %>% group_by(S) %>% summarise( Y_treated = mean(Y.bar[D == d] * N.bar.G, na.rm = TRUE), Y_control = mean(Y.bar[D == 0] * N.bar.G, na.rm = TRUE), X_treated = mean(x_1[D == d], na.rm = TRUE), X_control = mean(x_1[D == 0], na.rm = TRUE), k = n(), # Total units in stratum (should be 2) l = sum(D == d), # Number of treated units (should be 1) q = sum(D == 0) ) %>%
mutate( Y_diff = (1/l) * Y_treated - (1/q) * Y_control, X_diff = (1/l) * X_treated - (1/q) * X_control, k = k, l = l, q = q )
data_decomp <- as.data.frame(agg_data)
lm_model <- lm(Y_diff ~ X_diff, data = data_decomp) print(summary(lm_model)) beta_hat <- coef(lm_model)[2] X_bar <- mean(data$x_1) X_dem <- data$x_1 - X_bar
theta_hat_adj <- sum((data$Y.bar * data$Ng * (data$D == d))) / sum((data$D == d) * data$Ng) - sum(data$Y.bar * data$Ng * (data$D == 0)) / sum((data$D == 0) * data$Ng) - (sum(X_dem * (data$D == d)) / sum((data$D == d) * data$Ng) - sum(X_dem * (data$D == 0)) / sum((data$D == 0) * data$Ng)) * beta_hat tau.hat[d] <- theta_hat_adj beta.hat[d] <- beta_hat }
ret_list <- list( tau.hat = tau.hat, beta.hat = beta.hat ) return(ret_list) }
library(devtools) # install devtools library(sreg) library(sandwich) library(lubridate) library(compiler) library(extraDistr) library(VGAM) library(Matrix) library(parallel) library(progress) library(purrr) library(SimDesign) library(pbapply)
rm(list = ls())
options(scipen = 999) # disable scientific notation
num_cores <- detectCores() cl <- makeCluster(num_cores, outfile = "")
clusterEvalQ(cl, { library(lubridate) library(compiler) library(extraDistr) library(Matrix) library(progress) library(parallel) library(sreg) library(pbapply) library(dplyr) library(purrr) library(tidyverse) source("/Users/trifonovjuri/Desktop/sreg/R/2.0(dev)/fun_new.r") })
sim.func <- function(sim.id) { seed <- 1000 + sim.id set.seed(seed)
# --- DGP settings --- n.treat <- 2 tau.vec <- c(0.2, 1.2) # true ATEs gamma.vec <- c(0.4, 0.2, 1) k <- 4 treat_sizes <- c(1, 1, 2) G <- 12000 Nmax <- 50 max.support <- Nmax / 10 - 1
# --- Generate data --- Ng <- gen.cluster.sizes(G, max.support) data_pot <- dgp.po.creg(Ng = Ng, tau.vec = tau.vec, G = G, gamma.vec = gamma.vec, n.treat = n.treat) proba <- dgp.obs.creg(baseline = data_pot, n.treat = n.treat, k = k, treat_sizes = treat_sizes)
data_cl <- data.frame( Y = proba$Y, D = proba$D, proba$X, S = proba$S, Ng = proba$Ng, G.id = proba$G.id )
Y <- data_cl$Y D <- data_cl$D X <- data.frame("x_1" = data_cl$x_1) # or include other covariates as needed S <- data_cl$S G.id <- data_cl$G.id Ng <- data_cl$Ng
result <- tryCatch({ # Estimate ATEs and variances theta_res <- theta_hat_mult_cl(Y, D, X, S, G.id, Ng) tau_hat <- theta_res$tau.hat var_hat <- var_hat_mult_cl(Y, D, X, S, G.id, Ng, fit = theta_res) / G se_hat <- sqrt(var_hat)
# Confidence intervals and coverage CI.left <- tau_hat - qnorm(0.975) * se_hat CI.right <- tau_hat + qnorm(0.975) * se_hat tstat <- tau_hat / se_hat ci.hit <- as.numeric(tau.vec >= CI.left & tau.vec <= CI.right) list( tau = tau_hat, se = se_hat, tstat = tstat, CI.left = CI.left, CI.right = CI.right, ci.hit = ci.hit )
}, error = function(e) { cat("Simulation", sim.id, "encountered an error:", conditionMessage(e), "\n") list( tau = NA, se = NA, tstat = NA, CI.left = NA, CI.right = NA, ci.hit = NA ) })
return(result) }
simres <- pblapply(1:50000, sim.func, cl=cl) stopCluster(cl) tau <- na.omit(as.matrix(sapply(simres, function(simres) simres$tau))) se <- na.omit(as.matrix(sapply(simres, function(simres) simres$se))) ci.hit <- na.omit(as.matrix(sapply(simres, function(simres) simres$ci.hit))) rowMeans(tau) apply(tau, 1, sd) rowMeans(se) rowMeans(ci.hit)
library("sreg") library("dplyr") library("haven")
data <- sreg.rgen(n = 1200, tau.vec = c(0.2, 0.5, 0.8), n.strata = 4, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) Y <- data$Y head(data)
S <- data$S D <- data$D X <- data.frame("age" = data$x_1, "sex" = data$x_2) G.id <- data$G.id Ng <- data$Ng result <- sreg(Y, S, D, G.id = G.id, Ng = Ng, X) sreg(Y, S = S, D, G.id = NULL, Ng = NULL, X = X) print(result) plot(result)
dtapm <- sreg.rgen(n = 100, tau.vec = c(1.2), cluster = TRUE, small.strata = TRUE, k = 2, treat.sizes = c(1, 1)) result <- sreg(Y = dtapm$Y, S = dtapm$S, D = dtapm$D, X = dtapm$X, small.strata = TRUE, G.id = dtapm$G.id, Ng = dtapm$Ng) result head(dtapm) data <- sreg.rgen(n = 300, tau.vec = c(1.2, 0.8), cluster = FALSE, small.strata = TRUE, k = 3, treat.sizes = c(1, 1, 1)) head(data)
result <- sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame('x_1' = data$x_1, 'x_2' = data$x_2), small.strata = TRUE) print(result)
max(result$data$S)
length(result$data$Y) / max(result$data$S)
data("AEJapp") data <- AEJapp Y <- data$gradesq34 D <- data$treatment S <- data$class_level data.clean <- data.frame(Y, D, S) data.clean <- data.clean %>% mutate(D = ifelse(D == 3, 0, D)) Y <- data.clean$Y D <- data.clean$D S <- data.clean$S head(data.clean)
result <- sreg::sreg(Y = Y, S = S, D = D) print(result)
pills <- data$pills_taken age <- data$age_months data.clean <- data.frame(Y, D, S, pills, age) data.clean <- data.clean %>% mutate(D = ifelse(D == 3, 0, D)) Y <- data.clean$Y D <- data.clean$D S <- data.clean$S X <- data.frame("pills" = data.clean$pills, "age" = data.clean$age) result <- sreg::sreg(Y, S, D, G.id = NULL, X = X) print(result)
result
plot_sreg_results <- function(result) { # Load ggplot2 library(ggplot2)
# Construct a data frame from result df <- data.frame( treatment = factor(seq_along(result$tau.hat)), # factor for x-axis tau = result$tau.hat, CI.lower = result$CI.left, CI.upper = result$CI.right )
# Create the plot ggplot(df, aes(x = treatment, y = tau)) + geom_point(size = 3) + geom_errorbar(aes(ymin = CI.lower, ymax = CI.upper), width = 0.1) + labs( x = "Treatment Group", y = "Estimated ATE", title = "Point Estimates and Confidence Intervals for ATE" ) + theme_minimal() }
plot_sreg_results_fancy <- function(result) { library(ggplot2)
df <- data.frame( treatment = factor(seq_along(result$tau.hat)), tau = result$tau.hat, CI.lower = result$CI.left, CI.upper = result$CI.right )
# Add numeric version of treatment for proper ribbon plotting df$treatment_num <- as.numeric(df$treatment)
ggplot(df, aes(x = treatment_num, y = tau)) + # Confidence interval ribbon with transparency geom_ribbon(aes(ymin = CI.lower, ymax = CI.upper), fill = "#1f78b4", alpha = 0.2) + # ATE point estimates geom_point(size = 4, color = "#1f78b4") + # Optional dashed line connecting points geom_line(color = "#1f78b4", linetype = "dashed") + # Horizontal line at 0 geom_hline(yintercept = 0, linetype = "dotted", color = "gray40") + # Set treatment labels on x-axis scale_x_continuous( breaks = df$treatment_num, labels = df$treatment ) + labs( x = "Treatment Group", y = "Estimated ATE", title = "Estimated ATEs with Confidence Intervals" ) + theme_minimal(base_size = 14) + theme( axis.title = element_text(face = "bold"), panel.grid.major.y = element_line(color = "gray90") ) } plot_sreg_results(result) plot_sreg_results_fancy(result) plot_sreg_horizontal(result) plot_sreg_horizontal <- function(result, treatment_labels = NULL) { library(ggplot2)
df <- data.frame( treatment = seq_along(result$tau.hat), tau = result$tau.hat, CI.lower = result$CI.left, CI.upper = result$CI.right, SE = round((result$CI.right - result$tau.hat) / 1.96, 2) )
if (!is.null(treatment_labels)) { df$label <- factor(treatment_labels, levels = rev(treatment_labels)) } else { df$label <- factor(paste("Treatment", df$treatment), levels = rev(paste("Treatment", df$treatment))) }
ggplot(df, aes(y = label)) + # Horizontal CI bar geom_rect(aes(xmin = CI.lower, xmax = CI.upper, ymin = as.numeric(label) - 0.01, ymax = as.numeric(label) + 0.01), fill = "#2c3e50", alpha = 0.8) + # Diamond-shaped point geom_point(aes(x = tau), shape = 23, size = 3, fill = "white", stroke = 1.2, color = "black") + # Text label with tau (SE) geom_text(aes(x = tau, label = sprintf("%.2f (%.2f)", tau, SE)), vjust = -1.5, color = "black", size = 4) + # Vertical line at 0 geom_vline(xintercept = 0, linetype = "dashed", color = "gray40") + labs( x = "Treatment Effect", y = NULL, title = "Estimated ATEs with Confidence Intervals" ) + theme_minimal(base_size = 14) + theme( axis.text.y = element_text(hjust = 1), axis.title.x = element_text(face = "bold") ) }
result
plot_sreg_horizontal <- function(result, treatment_labels = NULL) { library(ggplot2)
df <- data.frame( treatment = seq_along(result$tau.hat), tau = result$tau.hat, CI.lower = result$CI.left, CI.upper = result$CI.right, SE = result$se.rob )
if (!is.null(treatment_labels)) { df$label <- factor(treatment_labels, levels = rev(treatment_labels)) } else { df$label <- factor(paste("Treatment", df$treatment), levels = rev(paste("Treatment", df$treatment))) }
ggplot(df, aes(y = label)) + geom_rect(aes( xmin = CI.lower, xmax = CI.upper, ymin = as.numeric(label) - 0.01, ymax = as.numeric(label) + 0.01, fill = tau ), alpha = 0.8) + geom_point(aes(x = tau), shape = 23, size = 3, fill = "white", stroke = 1.2, color = "black") + geom_text(aes(x = tau, label = sprintf("%.2f (%.2f)", tau, SE)), vjust = -1.5, color = "black", size = 4) + geom_vline(xintercept = 0, linetype = "dashed", color = "gray40") + scale_fill_viridis_c(option = "viridis", name = "Effect Size") + #scale_fill_gradient(low = "#bdd7e7", high = "#08519c", name = "Effect Size") + labs( x = "Treatment Effect", y = NULL, title = "Estimated ATEs with Confidence Intervals" ) + theme_minimal(base_size = 14) + theme( axis.text.y = element_text(hjust = 1), axis.title.x = element_text(face = "bold"), legend.position = "right" ) }
library(viridis) plot_sreg_horizontal(result)
result plot(result)
class(result)
# ------------------------------------------------------------------ # Classify strata as "small" or "big" in a mixed design # ------------------------------------------------------------------ # design.classifier <- function(data, S, G.id = NULL, keep.size = FALSE, warn = TRUE) { # ## 1. Capture column names (works with bare names or strings) # S_name <- if (is.character(substitute(S))) substitute(S) else deparse(substitute(S)) # G_name <- if (!missing(G.id)) { # if (is.character(substitute(G.id))) substitute(G.id) else deparse(substitute(G.id)) # } else { # NULL # } # ## 2. Compute stratum sizes # if (!is.null(G_name)) { # # -- clustered: count *clusters* per stratum # cluster_strata <- dplyr::distinct(data, .data[[S_name]], .data[[G_name]]) # strata_sizes <- dplyr::count(cluster_strata, .data[[S_name]], name = "size") # } else { # # -- individual: count *units* per stratum # strata_sizes <- dplyr::count(data, .data[[S_name]], name = "size") # } # ## 3. If every stratum has the same size, nothing to classify # if (length(unique(strata_sizes$size)) == 1) { # if (!keep.size) strata_sizes$size <- NULL # cosmetic # return(dplyr::left_join(data, strata_sizes, by = S_name)) # } # ## 4. Find the modal size and classify # modal_size <- strata_sizes %>% # dplyr::count(size) %>% # dplyr::arrange(dplyr::desc(n)) %>% # dplyr::pull(size) %>% # .[[1]] # strata_sizes <- strata_sizes %>% # dplyr::mutate(stratum_type = ifelse(size == modal_size, "small", "big")) # if (!keep.size) strata_sizes$size <- NULL # ## 5. Merge back and optionally warn # out <- dplyr::left_join(data, strata_sizes, by = S_name) # if (warn) # warning("Mixed design detected: strata of varying cluster/unit counts. ", # "Weighted estimators will be used.", call. = FALSE) # return(out) # }
data_s <- sreg.rgen(n = 3000, tau.vec = c(0.2, 0.8), n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S
S <- data_s$S
data_b <- sreg.rgen(n = 50, tau.vec = c(0.2, 0.8), n.strata = 2, cluster = TRUE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2)
max_id <- max(data_s$S)
unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata)
new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata)
data_b$S <- stratum_map[as.character(data_b$S)]
max_gid <- max(data_s$G.id) data_b$G.id <- data_b$G.id + max_gid length(intersect(data_s$G.id, data_b$G.id)) == 0
data <- rbind(data_s[, -ncol(data_s)], data_b) S <- data$S G.id <- data$G.id
design.classifier(data, S, G.id)
data_all <- design.classifier(data, S, G.id)
data_small <- dplyr::filter(data_all, stratum_type == "small") data_big <- dplyr::filter(data_all, stratum_type == "big")
res_small <- res.sreg.ss(Y = data_small$Y, D = data_small$D, S = data_small$S, X = data.frame(data_small$x_1, data_small$x_2), HC1 = TRUE) res_small <- res.creg.ss(Y = data_small$Y, D = data_small$D, S = data_small$S, G.id = data_small$G.id, Ng = data_small$Ng, X = data.frame(data_small$x_1, data_small$x_2), HC1 = FALSE) res_small$tau.hat res_small$se.rob
res_big <- res.sreg(Y = data_big$Y, D = data_big$D, S = data_big$S, X = NULL, HC1 = TRUE) res_big <- res.creg(Y = data_big$Y, D = data_big$D, S = data_big$S, X = NULL, G.id = data_big$G.id, Ng = data_big$Ng, HC1 = FALSE) res_big$tau.hat res_big$se.rob
N_small <- nrow(data_small) N_big <- nrow(data_big) N_total <- N_small + N_big
tau_hat <- (N_small / N_total) * res_small$tau.hat + (N_big / N_total) * res_big$tau.hat
se_combined <- sqrt((N_small / N_total)^2 * res_small$se.rob^2 + (N_big / N_total)^2 * res_big$se.rob^2)
## Simulations for the combined estimator (sreg) ```r nsim = 1000 tau.vec = c(0.2, 0.8) n.treat = length(tau.vec) n_1 = 3000 n_2 = 50 theta_hat_mtrx <- matrix(ncol = n.treat, nrow = nsim) se_hat_mtrx <- matrix(ncol = n.treat, nrow = nsim) hit <- matrix(ncol = n.treat, nrow = nsim) for (s in 1:nsim) { data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] data <- rbind(data_s, data_b) ## ----- ## ----- ## ----- ## ---- ## ----- # # Step 1: Classify strata data_all <- design.classifier(data, S) # Step 2: Split the data data_small <- dplyr::filter(data_all, stratum_type == "small") data_big <- dplyr::filter(data_all, stratum_type == "big") # Step 3: Run the estimators #res_small <- res.sreg.ss(Y = data_small$Y, D = data_small$D, S = data_small$S, X = data_small[, grep("^x_", names(data_small)), drop = FALSE], HC1 = TRUE) res_small <- res.sreg.ss(Y = data_small$Y, D = data_small$D, S = data_small$S, X = NULL, HC1 = FALSE) #res_big <- res.sreg(Y = data_big$Y, D = data_big$D, S = data_big$S, X = data_big[, grep("^x_", names(data_big)), drop = FALSE], HC1 = TRUE) res_big <- res.sreg(Y = data_big$Y, D = data_big$D, S = data_big$S, X = NULL, HC1 = FALSE) # Step 4: Combine estimates N_small <- nrow(data_small) N_big <- nrow(data_big) N_total <- N_small + N_big tau_hat <- (N_small / N_total) * res_small$tau.hat + (N_big / N_total) * res_big$tau.hat se_combined <- sqrt((N_small / N_total)^2 * res_small$se.rob^2 + (N_big / N_total)^2 * res_big$se.rob^2) theta_hat_mtrx[s, ] <- tau_hat se_hat_mtrx[s, ] <- se_combined upper <- tau_hat + qnorm(0.975) * se_combined lower <- tau_hat - qnorm(0.975) * se_combined hit[s, ] <- (tau.vec < upper) & (tau.vec > lower) print(s) } result_table <- data.frame( `True tau` = tau.vec, `E[tau_n]` = round(colMeans(theta_hat_mtrx), 4), `True Variance` = round(colMeans(se_hat_mtrx), 4), `E[V_n]` = round(apply(theta_hat_mtrx, 2, sd), 4), `Coverage` = round(colMeans(hit), 4), check.names = FALSE ) print(result_table)
nsim = 1000 tau.vec = c(0.2, 0.8) n.treat = length(tau.vec) n_1 = 3000 n_2 = 50 theta_hat_mtrx <- matrix(ncol = n.treat, nrow = nsim) se_hat_mtrx <- matrix(ncol = n.treat, nrow = nsim) hit <- matrix(ncol = n.treat, nrow = nsim) for (s in 1:nsim) { data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = TRUE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] # 🧠Step 4: Make cluster IDs unique across the two datasets!!!!! max_gid <- max(data_s$G.id) data_b$G.id <- data_b$G.id + max_gid length(intersect(data_s$G.id, data_b$G.id)) == 0 data <- rbind(data_s[, -ncol(data_s)], data_b) ## ----- ## ----- ## ----- ## ---- ## ----- # # Step 1: Classify strata data_all <- design.classifier(data, S, G.id) # Step 2: Split the data data_small <- dplyr::filter(data_all, stratum_type == "small") data_big <- dplyr::filter(data_all, stratum_type == "big") # Step 3: Run the estimators #res_small <- res.sreg.ss(Y = data_small$Y, D = data_small$D, S = data_small$S, X = data_small[, grep("^x_", names(data_small)), drop = FALSE], HC1 = TRUE) res_small <- res.creg.ss(Y = data_small$Y, D = data_small$D, S = data_small$S, X = data.frame(data_small$x_1, data_small$x_2), G.id = data_small$G.id, Ng = data_small$Ng, HC1 = TRUE) #res_big <- res.sreg(Y = data_big$Y, D = data_big$D, S = data_big$S, X = data_big[, grep("^x_", names(data_big)), drop = FALSE], HC1 = TRUE) res_big <- res.creg(Y = data_big$Y, D = data_big$D, S = data_big$S, X = NULL, G.id = data_big$G.id, Ng = data_big$Ng, HC1 = FALSE) #res_big <- res.creg(Y = data_big$Y, D = data_big$D, S = data_big$S, X = data.frame(data_big$x_1, data_big$x_2), G.id = data_big$G.id, Ng = data_big$Ng, HC1 = TRUE) res_big$tau.hat res_big$se.rob # Step 4: Combine estimates N_small <- nrow(data_small) N_big <- nrow(data_big) N_total <- N_small + N_big tau_hat <- (N_small / N_total) * res_small$tau.hat + (N_big / N_total) * res_big$tau.hat se_combined <- sqrt((N_small / N_total)^2 * res_small$se.rob^2 + (N_big / N_total)^2 * res_big$se.rob^2) theta_hat_mtrx[s, ] <- tau_hat se_hat_mtrx[s, ] <- se_combined upper <- tau_hat + qnorm(0.975) * se_combined lower <- tau_hat - qnorm(0.975) * se_combined hit[s, ] <- (tau.vec < upper) & (tau.vec > lower) print(s) } result_table <- data.frame( `True tau` = tau.vec, `E[tau_n]` = round(colMeans(theta_hat_mtrx), 4), `True Variance` = round(colMeans(se_hat_mtrx), 4), `E[V_n]` = round(apply(theta_hat_mtrx, 2, sd), 4), `Coverage` = round(colMeans(hit), 4), check.names = FALSE ) print(result_table)
data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = TRUE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] # 🧠Step 4: Make cluster IDs unique across the two datasets!!!!! max_gid <- max(data_s$G.id) data_b$G.id <- data_b$G.id + max_gid length(intersect(data_s$G.id, data_b$G.id)) == 0 data <- rbind(data_s[, -ncol(data_s)], data_b) resultim <- res.sreg.mixed(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), HC1 = TRUE) resultim$tau.hat resultim$se.rob ##### no clusters ##### data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] # 🧠Step 4: Make cluster IDs unique across the two datasets!!!!! max_gid <- max(data_s$G.id) data_b$G.id <- data_b$G.id + max_gid length(intersect(data_s$G.id, data_b$G.id)) == 0 data <- rbind(data_s, data_b) resultim <- res.sreg.mixed(Y = data$Y, S = data$S, D = data$D, X = NULL, HC1 = TRUE) resultim$tau.hat resultim$se.rob
##### no clusters ##### data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] data <- rbind(data_s, data_b) resultim <- res.sreg.mixed(Y = data$Y, S = data$S, D = data$D, X = NULL, HC1 = TRUE) resultim$tau.hat resultim$se.rob res_sreg <- sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), HC1 = FALSE, small.strata = TRUE) print(res_sreg) res_sreg$tau.hat res_sreg$se.rob data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = TRUE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] # 🧠Step 4: Make cluster IDs unique across the two datasets!!!!! max_gid <- max(data_s$G.id) data_b$G.id <- data_b$G.id + max_gid length(intersect(data_s$G.id, data_b$G.id)) == 0 data <- rbind(data_s[, -ncol(data_s)], data_b) resultim <- res.creg.mixed(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE) resultim$tau.hat resultim$se.rob result_creg <- sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = TRUE) promo <- res.creg.mixed(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE) promo$tau.hat promo$se.rob length(promo$lin.adj[,1]) length(promo$data$G.id) print(result_creg)
# Test 1. data: small strata, option: small strata set.seed(123) tau.vec = c(0.2, 0.8) n.treat = length(tau.vec) n_1 = 300 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = FALSE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = NULL, X = data.frame(data$x_1), HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = NULL, D = NULL, S = data$S, X = data.frame(data$x_1), HC1 = TRUE, small.strata = TRUE) print(result) # Test 2. data: big strata, option: big strata tau.vec = c(0.2, 0.9, 1.5) n.treat = length(tau.vec) n_1 = 1000 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1, 1), k = 3) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = TRUE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = TRUE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = TRUE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = FALSE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), HC1 = TRUE, small.strata = FALSE) print(result) # Test 3. data: small strata, option: big strata set.seed(123) tau.vec = c(0.2, 0.8) n.treat = length(tau.vec) n_1 = 900 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = TRUE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = TRUE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = FALSE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = FALSE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), HC1 = TRUE, small.strata = FALSE) print(result) nsim = 1000 tau_small <- matrix(nrow = nsim, ncol = length(tau.vec)) tau_big <- matrix(nrow = nsim, ncol = length(tau.vec)) se_small <- matrix(nrow = nsim, ncol = length(tau.vec)) se_big <- matrix(nrow = nsim, ncol = length(tau.vec)) for (s in 1:nsim){ data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = TRUE, treat.sizes = c(1, 1), k = 2) result_big <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = TRUE, small.strata = FALSE) result_small <- sreg(Y = data$Y, D = data$D, S = data$S, X = NULL, HC1 = FALSE, small.strata = TRUE) tau_small[s, ] <- result_small$tau.hat tau_big[s, ] <- result_big$tau.hat se_small[s, ] <- result_small$se.rob se_big[s, ] <- result_big$se.rob print(s) } apply(tau_small, 2, sd) colMeans(se_big) colMeans(se_small) # Test 4. data: big strata, option: small strata tau.vec = c(0.2, 0.9) n.treat = length(tau.vec) n_1 = 1000 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 20, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) head(data) sreg(Y = data$Y, D = data$D, S = data$S, small.strata = TRUE) # Test 5. data: mixed design, option: small strata (so we should have just a mixed estimator) tau.vec = c(0.2, 0.9) n.treat = length(tau.vec) n_1 = 3000 n_2 = 50 data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = FALSE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] data <- rbind(data_s, data_b) result <- sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), HC1 = TRUE, small.strata = TRUE) result result$res.big$tau.hat[1] result$res.small$tau.hat[1] result$se.rob[1] ^ 2 N_s <- n_1 N_b <- n_2 N_total <- N_s + N_b (N_s * N_b) / N_total^3 * (result$res.small$tau.hat[1] - result$res.big$tau.hat[1])^2 sqrt(result$se.rob[1] ^ 2 + ((N_s * N_b) / N_total^3 * (result$res.small$tau.hat[1] - result$res.big$tau.hat[1])^2)) tau_hat <- result$tau.hat[1] # Test 6. data: mixed design, option: big strata (just a big strata estimator, but should through a warning that estimators are invalid!) proba <- sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), HC1 = FALSE, small.strata = FALSE) proba proba <- sreg(Y = data$Y, S = data$S, D = data$D, X = NULL, HC1 = FALSE, small.strata = FALSE) proba
# Test 1. data: small strata, option: small strata tau.vec = c(0.2, 0.8) n.treat = length(tau.vec) n_1 = 900 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) result <- sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = NULL, HC1 = FALSE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), G.id = NULL, Ng = data$G.id, HC1 = FALSE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = NULL, G.id = data$G.id, Ng = data$G.id, HC1 = TRUE, small.strata = TRUE) print(result) # Test 2. data: big strata, option: big strata tau.vec = c(0.2, 0.9, 1.5) n.treat = length(tau.vec) n_1 = 1000 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = FALSE, treat.sizes = c(1, 1, 1), k = 3) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = TRUE, G.id = data$G.id, Ng = data$Ng, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, HC1 = FALSE, G.id = data$G.id, Ng = data$Ng, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = TRUE, G.id = data$G.id, Ng = data$Ng, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), HC1 = FALSE, G.id = data$G.id, Ng = data$Ng, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), G.id = data$G.id, Ng = NULL, HC1 = FALSE, small.strata = FALSE) print(result) # Test 3. data: small strata, option: big strata tau.vec = c(0.2, 0.8) n.treat = length(tau.vec) n_1 = 900 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) result <- sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = TRUE) # should not match fith small.strata = FALSE unless we change the estimator print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = TRUE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = FALSE) print(result) result <- sreg(Y = data$Y, D = data$D, S = data$S, X = data.frame(data$x_1), G.id = data$G.id, Ng = NULL, HC1 = TRUE, small.strata = FALSE) print(result) # Test 4. data: big strata, option: small strata (should not work and throw a warning) tau.vec = c(0.2, 0.9) n.treat = length(tau.vec) n_1 = 1000 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 20, cluster = TRUE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) head(data) sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = data$Ng, small.strata = TRUE, HC1 = TRUE) sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = data$Ng, small.strata = TRUE, HC1 = FALSE) sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = data$Ng, X = data.frame(data$x_1), small.strata = TRUE, HC1 = TRUE) sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = NULL, X = data.frame(data$x_1), small.strata = TRUE, HC1 = TRUE) # Test 4. data: big strata, option: small strata tau.vec = c(0.2, 0.9) n.treat = length(tau.vec) n_1 = 1000 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 20, cluster = FALSE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) head(data) sreg(Y = data$Y, D = data$D, S = data$S, small.strata = TRUE) # Test 5. data: mixed design, option: small strata (so we should have just a mixed estimator) tau.vec = c(0.2, 0.9) n.treat = length(tau.vec) n_1 = 3000 n_2 = 50 data_s <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) Y <- data$Y S <- data$S data_b <- sreg.rgen(n = n_2, tau.vec = tau.vec, n.strata = 2, cluster = TRUE, small.strata = FALSE, treat.sizes = c(1, 1), k = 2) # Step 1: Get the max stratum ID in data_s max_id <- max(data_s$S) # Step 2: Get unique strata in data_b and assign new IDs unique_b_strata <- sort(unique(data_b$S)) num_b_strata <- length(unique_b_strata) # Create a named mapping from old to new stratum IDs new_ids <- seq(max_id + 1, max_id + num_b_strata) stratum_map <- setNames(new_ids, unique_b_strata) # Step 3: Relabel data_b$S data_b$S <- stratum_map[as.character(data_b$S)] # 🧠Step 4: Make cluster IDs unique across the two datasets!!!!! max_gid <- max(data_s$G.id) data_b$G.id <- data_b$G.id + max_gid length(intersect(data_s$G.id, data_b$G.id)) == 0 data <- rbind(data_s[, -ncol(data_s)], data_b) Y <- data$Y S <- data$S G.id <- data$G.id Ng <- data$Ng D <- data$D data_tst <- data.frame(Y = Y, D = D, S = S, G.id = G.id, Ng = Ng) proba <-sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2, data$Ng), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = TRUE) proba proba <-sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = TRUE) proba proba <-sreg(Y = data$Y, S = data$S, D = data$D, X = NULL, G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = TRUE) proba proba <-sreg(Y = data$Y, S = data$S, D = data$D, X = NULL, G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = TRUE) proba proba <-sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = NULL, HC1 = TRUE, small.strata = TRUE) proba proba <-sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = NULL, HC1 = FALSE, small.strata = TRUE) proba proba <-sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = NULL, Ng = NULL, HC1 = FALSE, small.strata = TRUE) proba # Test 6. data: mixed design, option: big strata (just a big strata estimator!) proba <- sreg(Y = data$Y, S = data$S, D = data$D, X = data.frame(data$x_1, data$x_2), G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = FALSE) proba proba <- sreg(Y = data$Y, S = data$S, D = data$D, X = NULL, G.id = data$G.id, Ng = data$Ng, HC1 = TRUE, small.strata = FALSE) proba proba <- sreg(Y = data$Y, S = data$S, D = data$D, X = NULL, G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = FALSE) proba proba <- sreg(Y = data$Y, S = data$S, D = data$D, X = NULL, G.id = data$G.id, Ng = NULL, HC1 = FALSE, small.strata = FALSE) proba #### Let's play around with out plot! ### tau.vec = c(0.2, 0.5) n.treat = length(tau.vec) n_1 = 900 data <- sreg.rgen(n = n_1, tau.vec = tau.vec, n.strata = 4, cluster = TRUE, small.strata = TRUE, treat.sizes = c(1, 1, 1), k = 3) result <- sreg(Y = data$Y, D = data$D, S = data$S, G.id = data$G.id, Ng = data$Ng, HC1 = FALSE, small.strata = TRUE) print(result) plot(result, treatment_labels = c('FIRST', 'SECOND'), bar_fill = c('blue', 'orange'), title = 'New Plot', point_shape = 23, point_size = 4, point_color = "green", point_fill = 'black', point_stroke = 1.2, label_color = 'purple', label_size = 10, zero_line = FALSE, grid = TRUE, bg_color = 'grey') plot(result, grid = FALSE, bg_color = 'white', y_axis_title = 'Effects', x_axis_title = 'effect size')
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.