Nothing
#-------------------------------------------------------------------
# %# Generates observed outcomes,
# %# by taking as input the potential outcomes,
# %# matrix of strata assignments, pi.vec, and
# %# number of treatments
#----------------------------------------------------------------------
dgp.obs.sreg <- function(baseline, I.S, pi.vec, n.treat, is.cov = TRUE)
#----------------------------------------------------------------------
{
if (n.treat != length(pi.vec)) {
stop("The number of treatments doesn't match the length of vector pi.vec.")
}
num.strata <- ncol(I.S)
n <- length(baseline$Y.0)
A <- cbind(rep(0, n))
l.seq <- num.strata / 2
pi.matr <- matrix(1, ncol = num.strata, nrow = n.treat)
pi.matr.w <- pi.matr * pi.vec
for (k in 1:num.strata)
{
index <- which(I.S[, k] == 1)
ns <- length(index)
A[index] <- gen.treat.sreg(pi.matr.w, ns, k)
}
for (a in 0:n.treat)
{
assign(paste("Y.", a, sep = ""), baseline[[paste("Y.", a, sep = "")]])
}
formula <- gen.rubin.formula.sreg(n.treat)
Y.obs <- eval(parse(text = formula))
if (is.cov == TRUE) {
ret.list <- list(
"Y" = Y.obs,
"D" = A,
"X" = baseline$X
)
} else {
ret.list <- list(
"Y" = Y.obs,
"D" = A
)
}
return(ret.list)
}
#-------------------------------------------------------------------
dgp.obs.creg <- function(baseline, I.S, pi.vec, n.treat)
#-------------------------------------------------------------------
{
if (n.treat != length(pi.vec)) {
stop("The number of treatments doesn't match the length of vector pi.vec.")
}
num.strata <- ncol(I.S)
n <- baseline$G
A <- cbind(rep(0, n))
l.seq <- num.strata / 2
pi.matr <- matrix(1, ncol = num.strata, nrow = n.treat)
pi.matr.w <- pi.matr * pi.vec
for (k in 1:num.strata)
{
index <- which(I.S[, k] == 1)
ns <- length(index)
A[index] <- gen.treat.creg(pi.matr.w, ns, k)
}
strata.set <- data.frame(I.S)
strata.set$S <- max.col(strata.set)
cluster.indicator <- baseline$cl.id
G.seq <- seq(c(1:baseline$G))
data.short <- data.frame(
"cl.id" = G.seq, A, S = strata.set$S, Ng = baseline$Ng,
baseline$X
)
data.long <- data.frame("cl.id" = cluster.indicator)
merged.data <- merge(data.long, data.short, by = "cl.id")
length(merged.data$A)
A <- merged.data$A
S <- merged.data$S
X <- merged.data[5:ncol(merged.data)]
Ng <- merged.data$Ng
for (a in 0:n.treat)
{
assign(paste("Y.", a, sep = ""), baseline[[paste("Yig.", a, sep = "")]])
}
formula <- gen.rubin.formula.creg(n.treat)
Y.obs <- eval(parse(text = formula))
ret.list <- list(
"Y" = Y.obs,
"D" = A,
"S" = S,
"Z.2" = baseline$Z.g.2,
"X" = X,
"Ng" = Ng,
"G.id" = cluster.indicator,
"cl.lvl.data" = data.short
)
return(ret.list)
}
dgp.obs.sreg.ss <- function(dgp_list,
n.treat,
k,
treat_sizes,
badmatch = FALSE,
poutcome = FALSE) {
# Basic checks
stopifnot(length(treat_sizes) == (n.treat + 1))
if (sum(treat_sizes) != k) {
stop("Sum of treat_sizes must equal k.")
}
# Number of total arms is n.treat + 1, labeled 0,1,...,n.treat
all_treat_levels <- 0:n.treat
# -- Convert the list from dgp_po into a workable data.frame --
# We assume that your dgp_list has elements named "Y.0","Y.1",...,"Y.n.treat", plus W, plus X (if is.cov=TRUE).
# Make a data.frame that has columns for each potential outcome and your stratification variable(s).
# First, figure out how many observations:
n <- length(dgp_list[[paste0("Y.", 0)]]) # length of Y.0
# Build a DF row-by-row:
# Potential Outcomes:
Y_mat <- sapply(all_treat_levels, function(a) dgp_list[[paste0("Y.", a)]])
colnames(Y_mat) <- paste0("Y.", all_treat_levels)
# For convenience, also extract W and (optionally) X
W <- dgp_list$W
# If you have a data.frame X, you can splice it in; or if it's just one vector x_1, do the same.
# Here, assume 'X' is either a single numeric or a data.frame. We’ll just keep it as-is.
X <- dgp_list$X
# Combine into one data.frame:
# We'll store W in a single column called "W". If X is a data.frame, you may want to cbind.
df <- data.frame(Y_mat, W = W)
# If X is a data.frame with multiple columns, do: df <- cbind(df, X).
# If it’s just a single vector, do:
if (is.data.frame(X)) {
df <- cbind(df, X)
} else {
# treat X as a single column
df[["X"]] <- X
}
# For matching, we need a single covariate (or a score) to order or "bad‐match" by.
# If you want to match by W, we’ll define `match_var = df[["W"]]`.
# If you want to match by X, define `match_var = df[["X"]]`.
# Adjust as you like.
match_var <- df[["W"]] # or df[["X"]], etc.
# Sort or rearrange for block creation
if (!badmatch) {
# Good-match approach: just sort ascending by 'match_var'
sort_index <- order(match_var)
df_sorted <- df[sort_index, ]
} else {
# "Bad matching": pair smallest with largest, etc. generalized to block size k
# The simplest approach for pairs is to reorder: first is smallest, second is largest, third is 2nd smallest, fourth is 2nd largest, etc.
# For block size > 2, you can do an analogous approach (though it gets trickier).
# Here is a simple generalization that reverses half of the data row‐wise:
o <- order(match_var) # ascending
df_asc <- df[o, ]
# Now we can build "bad" blocks by taking half from the front, half from the back, interleaving, etc.
# For simplicity, we can do something like: 1st block = 1, N, 2, N-1, ...
# The code below tries a standard “pair up extremes” approach, repeated for blocks of size k.
# If k=2, it just does the classical pairs. If k=3 or 4, you might want to adapt logic further.
# This skeleton tries to keep it straightforward:
N <- nrow(df_asc)
# vector of indices from front/back
half <- floor(N / 2)
front_inds <- 1:half
back_inds <- (N):(half + 1)
# interleave front and back
new_order <- as.vector(rbind(front_inds, back_inds))
# if N is not an even multiple of k, watch out for leftover
if (length(new_order) < N) {
leftover <- setdiff(seq_len(N), new_order)
new_order <- c(new_order, leftover)
}
df_sorted <- df_asc[new_order, ]
}
# Now we assign blocks of size k. We'll loop block by block.
N <- nrow(df_sorted)
n.blocks <- N %/% k # integer number of full blocks
if (n.blocks * k != N) {
stop("The total number of units (N) is not divisible by the block size (k). Please adjust the sample size or choose a different block size for the small strata design.")
}
# Prepare container for assigned treatments and block labels
A <- rep(NA, N) # the assigned treatment
block_id <- rep(NA, N)
# For each block, randomly permute the treat_sizes among the k units
# so that exactly treat_sizes[1] get treat=0, treat_sizes[2] get treat=1, etc.
# all_treat_levels = c(0,1,...,n.treat)
for (j in seq_len(n.blocks)) {
# indices for block j
these_inds <- ((j - 1) * k + 1):(j * k)
# We create a vector of length k with exactly treat_sizes[a] copies of 'a', for a in all_treat_levels
block_treat_vector <- unlist(mapply(function(tt, tsize) rep(tt, tsize),
tt = all_treat_levels,
tsize = treat_sizes
))
# Randomly permute that vector so we don't always assign in the same pattern
block_treat_vector <- sample(block_treat_vector)
# Assign them
A[these_inds] <- block_treat_vector
# Record the block
block_id[these_inds] <- j
}
# Now figure out the observed outcome, y_i, by picking from Y.0..Y.n.treat
# We'll do it row by row. For row i, the assigned treatment is A[i]. So the outcome is df_sorted[i, Y.(A[i])]
# We can do that with indexing or by a small loop:
y_obs <- numeric(N)
# column names for the potential outcomes:
pot_outcome_names <- paste0("Y.", all_treat_levels)
for (i in seq_len(N)) {
treat_i <- A[i]
# which column? "Y.0" => column 1, "Y.1" => column 2, etc.
col_idx <- which(all_treat_levels == treat_i)
y_obs[i] <- df_sorted[i, pot_outcome_names[col_idx]]
}
# Final data frame
# Minimal columns: (y, A, W, X, block)
out_df <- data.frame(
y = y_obs,
A = A,
W = df_sorted[["W"]],
block = block_id
)
# If your df_sorted includes multiple X columns (like x_1, x_2, etc.), you can add them:
out_df <- cbind(out_df, df_sorted[, c("x_1", "x_2")])
# if ("x_1" %in% names(df_sorted)) {
# out_df[["X"]] <- df_sorted[["x_1"]] # NB CORRECT FOR MULTIPLE COVARIATES!!! Working on it... No flexibility needed for dgp functions!
# }
# Optionally attach all potential outcomes for debugging/evaluation
if (poutcome) {
out_df <- cbind(
out_df,
df_sorted[, pot_outcome_names, drop = FALSE]
)
}
# Return
rownames(out_df) <- NULL
return(out_df)
}
#-------------------------------------------------------------------
dgp.obs.creg.ss <- function(baseline, n.treat, k, treat_sizes)
#-------------------------------------------------------------------
{
# num.strata <- ncol(I.S)
n <- baseline$G
A <- cbind(rep(0, n))
# Number of total arms is n.treat + 1, labeled 0,1,...,n.treat
all_treat_levels <- 0:n.treat
# l.seq <- num.strata / 2
# pi.matr <- matrix(1, ncol = num.strata, nrow = n.treat)
# pi.matr.w <- pi.matr * pi.vec
# for (k in 1:num.strata)
# {
# index <- which(I.S[, k] == 1)
# ns <- length(index)
# A[index] <- gen.treat.creg(pi.matr.w, ns, k)
# }
Y_mat <- sapply(all_treat_levels, function(a) baseline[[paste0("Yig.", a)]])
colnames(Y_mat) <- paste0("Y.", all_treat_levels)
cluster.indicator <- baseline$cl.id
W <- baseline$Z.g.2
X <- baseline$X
G.seq <- seq(c(1:baseline$G))
# Just on a cluster level cl.id, Ng, W, X
data.short <- data.frame(
"cl.id" = G.seq, Ng = baseline$Ng,
W, X
)
# print(data.short)
# Stratification. We use the cluster level data to first assign strata
match_var <- data.short[["W"]] # or df[["X"]], etc.
sort_index <- order(match_var)
data.short_sorted <- data.short[sort_index, ]
# Now we assign blocks of size k. We'll loop block by block.
N <- nrow(data.short_sorted)
n.blocks <- N %/% k # integer number of full blocks
if (n.blocks * k != N) {
stop("The total number of units (N) is not divisible by the block size (k). Please adjust the sample size or choose a different block size for the small strata design.")
}
# Prepare container for assigned treatments and block labels
A <- rep(NA, N) # the assigned treatment
block_id <- rep(NA, N)
# For each block, randomly permute the treat_sizes among the k units
# so that exactly treat_sizes[1] get treat=0, treat_sizes[2] get treat=1, etc.
# all_treat_levels = c(0,1,...,n.treat)
for (j in seq_len(n.blocks)) {
# indices for block j
these_inds <- ((j - 1) * k + 1):(j * k)
# We create a vector of length k with exactly treat_sizes[a] copies of 'a', for a in all_treat_levels
block_treat_vector <- unlist(mapply(function(tt, tsize) rep(tt, tsize),
tt = all_treat_levels,
tsize = treat_sizes
))
# Randomly permute that vector so we don't always assign in the same pattern
block_treat_vector <- sample(block_treat_vector)
# Assign them
A[these_inds] <- block_treat_vector
# Record the block
block_id[these_inds] <- j
}
data.short <- data.frame(
A,
S = block_id, data.short_sorted
)
# print(data.short)
# print(block_id)
# print(A)
# print(data.short_sorted)
# short data
# data.short <- data.frame(
# "cl.id" = G.seq, A, S = strata.set$S, Ng = baseline$Ng,
# baseline$X
# )
data.long <- data.frame("cl.id" = cluster.indicator)
# print(data.long)
# individual level data frame without Y! Just cl.id, Ng, W, X
merged.data <- merge(data.long, data.short, by = "cl.id")
# print(head(merged.data, 200)) # Works! But we need to append potential outcomes!
A <- merged.data$A
S <- merged.data$S
X <- merged.data[6:ncol(merged.data)]
Ng <- merged.data$Ng
for (a in 0:n.treat)
{
assign(paste("Y.", a, sep = ""), baseline[[paste("Yig.", a, sep = "")]])
}
# print(Y.1)
formula <- gen.rubin.formula.creg(n.treat)
Y.obs <- eval(parse(text = formula)) # observed outcomes for every individual
# print(Y.obs)
ret.list <- list(
"Y" = Y.obs,
"D" = A,
"S" = S,
"W" = baseline$Z.g.2,
"X" = X,
"Ng" = Ng,
"G.id" = cluster.indicator,
"cl.lvl.data" = data.short
)
return(ret.list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.