# R/AME_estimate.R In factorEx: Design and Analysis for Factorial Experiments

#### Defines functions AME_estimate

## (Internal Function): Estimating pAMCE without regularization

AME_estimate <- function(formula,
data,
pair = FALSE, pair_id = NULL, cross_int = TRUE,
cluster,
marginal_dist,
marginal_type,
joint_dist = NULL,
boot, seed = 1234,
difference = FALSE, formula_three_c){

###########
## Check ##
###########
if(any(is.na(data))==TRUE){
stop("Remove NA before using this function.")
}
if(pair==TRUE & is.null(pair_id)==TRUE){
stop("When 'pair=TRUE', specify 'pair_id'.")
}
if(pair==TRUE & all(table(pair_id)==2)==FALSE){
stop("When 'pair=TRUE', each of 'pair_id' should have two observations")
}
if(pair == FALSE){
cross_int <- FALSE
}
if(difference==TRUE & length(marginal_type) < 2){
stop("if 'difference = TRUE', marginal_dist should contain more than one distribution.")
}
if(is.list(marginal_dist)==FALSE){
stop("marginal_dist should be 'list'.")
}
marginal_name_check <- lapply(marginal_dist, colnames)
marginal_name_check_all <- all(unlist(lapply(marginal_name_check,
function(x) all(x == c("factor", "levels", "prop")))))
if(marginal_name_check_all == FALSE){
stop(" 'colnames' of 'marginal_dist' should be c('factor', 'levels', 'prop') ")
}
if(length(marginal_type) > 1){
for(z in 2:length(marginal_type)){
if(all(marginal_dist[[1]][,1] == marginal_dist[[z]][,1]) == FALSE) stop("marginal_dist should have the same order for factor.")
if(all(marginal_dist[[1]][,2] == marginal_dist[[z]][,2]) == FALSE) stop("marginal_dist should have the same order for levels.")
}
}
all.fac <- all(unlist(lapply(model.frame(formula, data=data)[,-1],
FUN=function(x) is.element("factor",class(x)))))
if(all.fac == FALSE) stop("Design matrix should contain only factors."); rm(all.fac)

###########
## Setup ##
###########
# Create two-way interaction formula (allow for three-ways) ----------
factor_l <- length(all.vars(formula)[-1])
combMat <- combn(factor_l,2); intNames <- c()
for(k in 1:ncol(combMat)){
intNames[k] <- paste(all.vars(formula)[-1][combMat[1,k]], "*", all.vars(formula)[-1][combMat[2,k]], sep = "")
}
formula_full0 <- paste(all.vars(formula)[1], "~", paste(intNames, collapse = "+"), sep="")
if(is.null(formula_three_c) == TRUE){
formula_full <- as.formula(formula_full0)
}else{
formula_full <- as.formula(paste(formula_full0, formula_three_c, sep = "+"))
}

# Baseline ----------
baseline <- lapply(model.frame(formula, data=data)[,-1],
FUN = function(x) levels(x)[1])

# pair_id and cluster
if(missing(pair_id) == TRUE) pair_id <- NULL
if(missing(cluster) == TRUE) cluster <- seq(1:nrow(data))
data\$pair_id <- pair_id
data\$cluster <- cluster
# Differencing ----------
if(pair==TRUE){
data0 <- data[order(pair_id),]
side <- rep(c(1,0), times=nrow(data0)/2)
data1 <- data0[side==1,]
data2 <- data0[side==0,]
cluster_original <- cluster
cluster <- cluster[side==1]

X1 <- model.matrix(formula_full, data=data1)[,-1]
X2 <- model.matrix(formula_full, data=data2)[ ,-1]
X <- cbind(1, X1 - X2)

if(cross_int == TRUE){

base_fac <- all.vars(formula_full)[-1]
data2_u <- data2[,base_fac]; colnames(data2_u) <- paste(base_fac,"_rp",sep="")
data_c <- cbind(data1[,base_fac], data2_u)
for_cross <- paste("~", paste(paste(base_fac, paste(base_fac,"_rp",sep=""), sep="*"),
collapse = "+"))
for_cross0 <- paste("~", paste(base_fac, collapse = "+"))
data_cross0 <- model.matrix(as.formula(for_cross0), data = data_c)[,-1]
sing <- 2*ncol(data_cross0)
data_cross <- model.matrix(as.formula(for_cross), data = data_c)
data_cross <- data_cross[,-1]
X_cross <- data_cross[, c((sing + 1): ncol(data_cross))]

# modify X and ind_b
X <- cbind(X, X_cross)
}

y <- model.frame(formula_full,data=data1)[ ,1]
# base_name <- c("(Intercept)", colnames(X1))
}else{
cluster_original <- cluster
X <- model.matrix(formula_full, data=data)
y <- model.frame(formula_full, data=data)[,1]
# base_name <- colnames(X)
side <- NULL
}

# Check ----------
marginal_dist_u0 <- marginal_dist[[1]]
marginal_dist_u <- data.frame(matrix(NA, ncol=0, nrow=nrow(marginal_dist_u0)))
marginal_dist_u\$level <- paste(marginal_dist_u0[,1], marginal_dist_u0[,2],sep="")
marginal_dist_u\$prop  <- marginal_dist_u0[,3]
mar_name <- colnames(model.matrix(formula, data=data))[-1]
if(all(is.element(mar_name, marginal_dist_u[,1])) == FALSE){
stop("The first and second columns of `marginal.dist' should have correct names for 'factors' and 'levels'")
}
rm(marginal_dist_u)

# Fit the model ----------
main_lm <- lm(y ~ X - 1)
coefInt <- coef(main_lm)
coefInt <- coefInt[is.na(coefInt) == FALSE]
base_name <- sub("X", "", names(coefInt))
names(coefInt) <- base_name
vcovInt <- cluster_se_glm(main_lm, cluster = as.factor(cluster))
colnames(vcovInt) <- rownames(vcovInt) <- base_name

# Transform marginal_dist (for internal simplisity) ----------
marginal_dist_u_list <- list()
for(z in 1:length(marginal_dist)){
marginal_dist_u_list[[z]] <- data.frame(matrix(NA, ncol=0, nrow=nrow(marginal_dist[[z]])))
marginal_dist_u_list[[z]]\$level <- paste(marginal_dist[[z]][,1], marginal_dist[[z]][,2],sep="")
marginal_dist_u_list[[z]]\$prop  <- marginal_dist[[z]][,3]
}
marginal_dist_u_base <- marginal_dist_u_list[[1]]

# Incorporate Joint Distribution
if(is.null(joint_dist) == TRUE){
joint_dist_u_list <- NULL
}else{
joint_dist_u_list <- list()
for(z in 1:length(joint_dist)){
temp <- do.call("rbind", joint_dist[[z]])
joint_dist_u_list[[z]] <- data.frame(matrix(NA, ncol=0, nrow=nrow(temp)))
joint_dist_u_list[[z]]\$level_1 <- paste(temp[,1], temp[,3],sep="")
joint_dist_u_list[[z]]\$level_2 <- paste(temp[,2], temp[,4],sep="")
joint_dist_u_list[[z]]\$prop  <- temp[,5]
}
joint_dist_u_base <- joint_dist_u_list[[1]]
}

# Estimate AMEs ----------
## Estimate AMEs from two-ways
if(is.null(formula_three_c) == TRUE){
table_AME <- coefIntAME(coefInt = coefInt, vcovInt = vcovInt, SE = TRUE,
marginal_dist = marginal_dist, marginal_dist_u_list = marginal_dist_u_list,
marginal_dist_u_base = marginal_dist_u_base, marginal_type = marginal_type,
difference = difference, cross_int = cross_int)
}else if(is.null(formula_three_c) == FALSE){
table_AME <- coefIntAME(coefInt = coefInt, vcovInt = vcovInt, SE = TRUE,
marginal_dist = marginal_dist, marginal_dist_u_list = marginal_dist_u_list,
marginal_dist_u_base = marginal_dist_u_base, marginal_type = marginal_type,
difference = difference, cross_int = cross_int, three_way = TRUE,
joint_dist_u_list = joint_dist_u_list)
}

# table_AME <- c()
# for(m in 1:nrow(marginal_dist_u_base)){
#   coef_focus <- coefInt[grep(marginal_dist_u_base\$level[m], names(coefInt), fixed = T)]
#   vcov_focus <- vcovInt[grep(marginal_dist_u_base\$level[m], rownames(vcovInt), fixed = T),
#                         grep(marginal_dist_u_base\$level[m], colnames(vcovInt), fixed = T)]
#   if(length(coef_focus) > 0){
#     estNames <- gsub(paste(marginal_dist_u_base\$level[m], ":", sep = ""), "", names(coef_focus), fixed = T)
#     estNames <- gsub(paste(":", names(coef_focus)[1], sep = ""), "", estNames, fixed = T)
#
#     if(cross_int == TRUE){
#       estNames <- sub(paste(marginal_dist[[1]]\$factor[m],"_rp", sep = ""),
#                       marginal_dist[[1]]\$factor[m], estNames)
#     }
#
#     table_AME_m <- c()
#     # For each marginal distribution,
#     for(z in 1:length(marginal_dist)){
#       marginal_dist_u <- marginal_dist_u_list[[z]]
#       # Find weights
#       coef_prop <- c(1, as.numeric(as.character(marginal_dist_u[match(estNames, marginal_dist_u[, "level"]), "prop"]))[-1])
#       # Compute AMEs
#       coef_AME <- sum(coef_focus * coef_prop)
#       se_AME <- sqrt(coef_prop%*%vcov_focus%*%coef_prop)
#       AME <- data.frame(matrix(NA, ncol = 0, nrow=1))
#       AME\$type <- marginal_type[z]
#       AME\$factor   <- marginal_dist[[z]][m,1]; AME\$level <- marginal_dist[[z]][m,2]
#       AME\$estimate <- coef_AME; AME\$se <- se_AME
#       table_AME_m <- rbind(table_AME_m, AME)
#     }
#     if(difference == TRUE){
#       for(z in 2:length(marginal_dist)){
#         marginal_dist_u <- marginal_dist_u_list[[z]]
#         # Find weights
#         coef_prop <- c(1, as.numeric(as.character(marginal_dist_u[match(estNames, marginal_dist_u[, "level"]), "prop"]))[-1])
#         coef_prop0 <- c(1, as.numeric(as.character(marginal_dist_u_base[match(estNames, marginal_dist_u_base[, "level"]), "prop"]))[-1])
#         # Compute AMEs
#         coef_prop_d <- (coef_prop - coef_prop0)
#         coef_AME_dif <- sum(coef_focus * coef_prop_d)
#         se_AME_dif <- sqrt(coef_prop_d%*%vcov_focus%*%coef_prop_d)
#         AME_dif <- data.frame(matrix(NA, ncol = 0, nrow=1))
#         AME_dif\$type <- paste(marginal_type[z],"-",marginal_type[1],sep="")
#         AME_dif\$factor   <- marginal_dist[[z]][m,1]; AME_dif\$level <- marginal_dist[[z]][m,2]
#         AME_dif\$estimate <- coef_AME_dif; AME_dif\$se <- se_AME_dif
#         table_AME_m <- rbind(table_AME_m, AME_dif)
#       }
#     }
#     table_AME <- rbind(table_AME, table_AME_m)
#   }
# }
# colnames(table_AME) <- c("type", "factor", "level", "estimate", "se")

# Combine STD estimate
table_STD <- AME.fit.STD.se.sep(formula = formula,
data = data,
pair = pair,
marginal_dist = marginal_dist,
marginal_dist_u_base = marginal_dist_u_base)

uniq_fac <- unique(table_AME\$factor)
table_AME_full <- c()
for(i in 1:length(uniq_fac)){
table_AME_main <- table_AME[table_AME\$factor == uniq_fac[i], ]
uniq_level <- unique(table_AME_main\$level)
for(j in 1:length(uniq_level)){
table_AME_main_m <- table_AME_main[table_AME_main\$level == uniq_level[j], ]
STD_base <- table_STD[table_STD\$fac == uniq_fac[i] & table_STD\$level == uniq_level[j], ]
table_AME_main_m <- rbind(STD_base, table_AME_main_m)

if(difference == TRUE){
table_AME_add <- data.frame(matrix(NA, ncol = 0, nrow = length(marginal_type)))
dif_est  <- table_AME_main_m[2:(1+length(marginal_type)), "estimate"] - STD_base\$estimate
dif_se   <- sqrt((table_AME_main_m[2:(1+length(marginal_type)), "se"])^2 + STD_base\$se^2)
}
table_AME_full <- rbind(table_AME_full, table_AME_main_m)
}
}

##  bootstrap coefficients
set.seed(seed)
boot_coef <- rmvnorm(n = boot, mean = coefInt, sigma = vcovInt)

## For Each Factor
AME <- list()
for(g in 1:length(unique(table_AME_full\$factor))){
AME[[g]] <- table_AME_full[table_AME_full\$factor == unique(table_AME_full\$factor)[g], ]
AME[[g]]\$low.95ci <- AME[[g]]\$estimate - 1.96*AME[[g]]\$se
AME[[g]]\$high.95ci <- AME[[g]]\$estimate + 1.96*AME[[g]]\$se
}
names(AME) <- unique(table_AME_full\$factor)
type_all   <- unique(table_AME_full\$type)
type_difference   <- setdiff(unique(table_AME_full\$type), marginal_type)

input  <- list("formula" = formula, "data" = data,
"pair" = pair, "pair_id" = pair_id, "cross_int" = cross_int,
"cluster" = cluster_original, "marginal_dist" = marginal_dist,
"marginal_type" = marginal_type, "difference" = difference)

output <- list("AMCE" = AME, "baseline" = baseline, "coef" = coefInt,
"type_all" = type_all, "type_difference" = type_difference,
"boot_coef" = boot_coef,
"input" = input)
return(output)
}

## Try the factorEx package in your browser

Any scripts or data that you put into this service are public.

factorEx documentation built on July 2, 2020, 12:25 a.m.