set.seed(1242018)
make_wts <- function(ta, ps){
wts <- data.frame(t=(ta/ps) / sum(ta/ps),
c=((1-ta)/(1-ps)) / sum((1-ta)/(1-ps)))
wts <- ifelse(ta==1, wts$t, wts$c)
return(wts)
}
make_wts2 <- function(ta, ps){
wts <- data.frame(t=ta / sum(ta),
c=((1-ta) * ps /(1-ps)) / sum((1-ta) * ps/(1-ps)))
wts <- ifelse(ta==1, wts$t, wts$c)
return(wts)
}
lm_ps_robust <- function(Y, X, wts, true_val = NULL){
W <- diag(wts)
invXtWX <- solve(t(X) %*% W %*% X)
betas <- invXtWX %*% t(X) %*% W %*% Y
Yhat <- X %*% betas
resids <- Y - Yhat
sighat <- as.double(sum(resids^2) / (length(Y) - 1))
varmat <- invXtWX %*% t(X) %*% W %*% diag(as.vector(resids)^2) %*% W %*% X %*% invXtWX
std_errs <- sqrt(diag(varmat))
low_int <- betas - 1.96 * std_errs
upp_int <- betas + 1.96 * std_errs
res <- cbind(betas, std_errs, low_int, upp_int)
colnames(res) <- c('coef', 'stderrs', 'low95', 'upp95')
if(!is.null(true_val)){
cover_test <- res[2,3] < true_val & res[2,4] > true_val
return(list('ests' = data.frame(res),
'covers' = cover_test))
} else{
return(list('ests' = res))
}
}
lm_ps<- function(Y, X, wts, true_val = NULL){
W <- diag(wts)
invXtWX <- solve(t(X) %*% W %*% X)
betas <- invXtWX %*% t(X) %*% W %*% Y
Yhat <- X %*% betas
resids <- Y - Yhat
# sighat <- as.double(sum(resids^2) / (length(Y) - 1))
sighat <- as.double(sum(wts*resids^2) / sum(wts))
varmat <- sighat * invXtWX %*% t(X) %*% W %*% W %*% X %*% invXtWX
std_errs <- sqrt(diag(varmat))
low_int <- betas - 1.96 * std_errs
upp_int <- betas + 1.96 * std_errs
res <- cbind(betas, std_errs, low_int, upp_int)
colnames(res) <- c('coef', 'stderrs', 'low95', 'upp95')
if(!is.null(true_val)){
cover_test <- res[2,3] < true_val & res[2,4] > true_val
return(list('ests' = data.frame(res),
'covers' = cover_test))
} else{
return(list('ests' = res))
}
}
lo <- 100
test_rho <- seq(0.05,5,length.out = lo)
n_sims <- 100
n_obs <- 250
true_ate <- .25^2 + 2.5^2 + 2 - 2.5
mean_gp <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
vars_gp <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
var2_gp <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
bals_gp <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
ess_gp <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
mean_glm <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
vars_glm <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
var2_glm <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
bals_glm <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
ess_glm <- matrix(NA, nrow=n_sims, ncol=length(test_rho))
for(i in 1:n_sims){
n_obs <- 500
Xt <- rnorm(n_obs/5, mean=2.5, sd=0.25)
Xc <- rnorm(n_obs - n_obs/5, mean=0, sd=2)
X <- c(Xt, Xc)
TA <- c(rep(1, n_obs/5), rep(0, n_obs - n_obs/5))
design_mat <- cbind(1, TA)
po_stdev <- 0.25
YT <- X^2 + 2 + rnorm(n_obs, 0, po_stdev)
YC <- X + rnorm(n_obs, 0, po_stdev)
YO <- TA * YT + (1-TA)*YC
for(r in 1:length(test_rho)){
cov_mat <- gpbalancer::par_sqexp(as.matrix(X), c(1, test_rho[r]))
est_obgpps <- gpbalancer::gpbal_fixed(TA, cov_mat, verbose=F)
wts_gp <- make_wts2(TA, est_obgpps$ps)
bal_gp <- gpbalancer::bal_stats(X, TA, wts=wts_gp)
res_gp <- lm_ps_robust(YO, design_mat, wts_gp, true_ate)
mean_gp[i,r] <- res_gp$ests$coef[2]
vars_gp[i,r] <- res_gp$ests$stderrs[2]^2
var2_gp[i,r] <- lm_ps(YO, design_mat, wts_gp, true_ate)$ests$stderrs[2]^2
bals_gp[i,r] <- bal_gp[7]
wts_gp_t <- wts_gp[TA==1]
wts_gp_c <- wts_gp[TA==0]
ess_gp[i,r] <- sum(wts_gp_t)^2 / sum(wts_gp_t^2) + sum(wts_gp_c)^2 / sum(wts_gp_c^2)
est_glm <- glm(TA ~ X + I(X^2), family='binomial')
wts_glm <- make_wts2(TA, fitted(est_glm))
bal_glm <- gpbalancer::bal_stats(X, TA, wts=wts_glm)
res_glm <- lm_ps_robust(YO, design_mat, wts_glm, true_ate)
mean_glm[i,r] <- res_glm$ests$coef[2]
vars_glm[i,r] <- res_glm$ests$stderrs[2]^2
var2_glm[i,r] <- lm_ps(YO, design_mat, wts_glm, true_ate)$ests$stderrs[2]^2
bals_glm[i,r] <- bal_glm[7]
wts_glm_t <- wts_glm[TA==1]
wts_glm_c <- wts_glm[TA==0]
ess_glm[i,r] <- sum(wts_glm_t)^2 / sum(wts_glm_t^2) + sum(wts_glm_c)^2 / sum(wts_glm_c^2)
}
message(paste(i,':',sep=""), appendLF = F)
}
lowers <- mean_gp - 1.96 * sqrt(vars_gp)
uppers <- mean_gp + 1.96 * sqrt(vars_gp)
covers <- apply((lowers < true_ate) & (uppers > true_ate),2, mean)
plot(test_rho, covers, ylim=c(0,1), col=rgb(0,0,0.75,0.5), lwd=5, type='l')
lowers1 <- mean_gp - 1.96 * sqrt(ess_gp * vars_gp / n_obs)
uppers1 <- mean_gp + 1.96 * sqrt(ess_gp * vars_gp / n_obs)
covers1 <- apply((lowers1 < true_ate) & (uppers1 > true_ate),2, mean)
lines(test_rho, covers1, ylim=c(0,1), col=rgb(0.75,0,0.75,0.5), lwd=5, type='l')
emp_var <- matrix(apply(mean_gp, 2, var), nrow=n_sims, ncol = length(test_rho), byrow = T)
lowers2 <- mean_gp - 1.96 * sqrt(emp_var)
uppers2 <- mean_gp + 1.96 * sqrt(emp_var)
covers2 <- apply((lowers2 < true_ate) & (uppers2 > true_ate),2, mean)
lines(test_rho, covers2, ylim=c(0,1), col=rgb(0,0.75,0,0.5), lwd=5, type='l')
abline(h=0.95)
lowers3 <- mean_gp - 1.96 * sqrt(var2_gp)
uppers3 <- mean_gp + 1.96 * sqrt(var2_gp)
covers3 <- apply((lowers3 < true_ate) & (uppers3 > true_ate),2, mean)
lines(test_rho, covers3, ylim=c(0,1), col=rgb(0.75,0.75,0.75,0.5), lwd=5, type='l')
lowers4 <- mean_gp - 1.96 * sqrt(ess_gp * var2_gp / n_obs)
uppers4 <- mean_gp + 1.96 * sqrt(ess_gp * var2_gp / n_obs)
covers4 <- apply((lowers4 < true_ate) & (uppers4 > true_ate),2, mean)
lines(test_rho, covers4, ylim=c(0,1), col=rgb(0,0,0,0.75), lwd=5, type='l')
# pdf('./experiments/simulation_figure_po-sd_three.pdf', height=5, width=8)
plot(0, xlim=range(test_rho), ylim=range(abs(bals_gp)),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='Mean Balance - Abs(Std Diff.)',
main='Covariate Balance')
for(i in 1:n_sims){
lines(test_rho, abs(bals_gp[i,]), lwd=1.5, col=rgb(0,0,0,0.15))
}
lines(test_rho, apply(bals_gp, 2, mean),
lwd=4, col=rgb(0.75,0,0,0.5))
plot(0, xlim=range(test_rho), ylim=range(mean_gp),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='ATE Estimate',
main='Treatment Effect Estimates')
for(i in 1:n_sims){
lines(test_rho, mean_gp[i,], lwd=1.5, col=rgb(0,0,0,0.15))
}
lines(test_rho, apply(mean_gp, 2, mean), lwd=4, col=rgb(0.75,0,0,0.5))
abline(h=true_ate, lwd=4, col=rgb(0,0.75,0,0.75))
legend('topright', 'True ATE', lwd=4, col=rgb(0,0.75,0,0.75))
plot(0, xlim=range(test_rho), ylim=c(0,max(vars_gp)),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='Estimated Variance',
main='Robust Variance Estimates')
for(i in 1:n_sims){
lines(test_rho, vars_gp[i,], lwd=1.5, col=rgb(0,0,0,0.15))
}
lines(test_rho, apply(vars_gp, 2, mean), lwd=4, col=rgb(0.75,0,0,0.75))
lines(test_rho, apply(mean_gp, 2, var), lwd=4, col=rgb(0,0,0.75,0.75))
lines(test_rho, apply(mean_gp, 2, var) / apply(vars_gp, 2, mean),
lwd=4, col=rgb(0.75,0,0.75,0.75))
legend('topright', 'Emp. Var.', lwd=4, col=rgb(0,0,0.75,0.75))
plot(0, xlim=range(test_rho), ylim=c(0,1),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='Coverage Proportion',
main="Coverage")
lines(test_rho, covers, lwd=3, col=rgb(0,0,0,0.5))
abline(h=0.95, lty=3)
abline(h=1, lty=3)
plot(test_rho, apply(mean_gp, 2, var) / apply(vars_gp, 2, mean),
type='l', lwd=4, col=rgb(0.75,0,0,0.5),
xlab='Inverse-Length Scale',
ylab='Ratio',
main='Ratio of Emp Var to Mean of Robust Var. Est')
abline(h=1, lwd=4, col=rgb(0,0,0,0.5))
plot(test_rho, apply(ess_gp, 2, mean),
type='l', lwd=4, col=rgb(0.75,0,0,0.5),
xlab='Inverse-Length Scale',
ylab='Ratio',
main='Ratio of Emp Var to Mean of Robust Var. Est')
abline(h=1, lwd=4, col=rgb(0,0,0,0.5))
# dev.off()
# GLM RESULTS ------------------------------------------------------------------
lowers <- mean_glm - 1.96 * sqrt(vars_glm)
uppers <- mean_glm + 1.96 * sqrt(vars_glm)
covers <- apply((lowers < true_ate) & (uppers > true_ate),2, mean)
plot(test_rho, covers, col=rgb(0,0.75,0.75,0.5), lwd=5)
abline(h=0.95)
plot(0, xlim=range(test_rho), ylim=range(abs(bals_glm)),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='Mean Balance - Abs(Std Diff.)',
main='Covariate Balance')
for(i in 1:n_sims){
lines(test_rho, abs(bals_glm[i,]), lwd=1.5, col=rgb(0,0,0,0.15))
}
lines(test_rho, apply(bals_glm, 2, mean),
lwd=4, col=rgb(0.75,0,0,0.5))
plot(0, xlim=range(test_rho), ylim=range(mean_glm),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='ATE Estimate',
main='Treatment Effect Estimates')
for(i in 1:n_sims){
lines(test_rho, mean_glm[i,], lwd=1.5, col=rgb(0,0,0,0.15))
}
lines(test_rho, apply(mean_glm, 2, mean), lwd=4, col=rgb(0.75,0,0,0.5))
abline(h=3, lwd=4, col=rgb(0,0.75,0,0.75))
legend('topright', 'True ATE', lwd=4, col=rgb(0,0.75,0,0.75))
plot(0, xlim=range(test_rho), ylim=c(0,max(vars_glm)),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='Estimated Variance',
main='Robust Variance Estimates')
for(i in 1:n_sims){
lines(test_rho, vars_glm[i,], lwd=1.5, col=rgb(0,0,0,0.15))
}
lines(test_rho, apply(vars_glm, 2, mean), lwd=4, col=rgb(0.75,0,0,0.75))
lines(test_rho, apply(mean_glm, 2, var), lwd=4, col=rgb(0,0,0.75,0.75))
lines(test_rho, apply(mean_glm, 2, var) / apply(vars_glm, 2, mean),
lwd=4, col=rgb(0.75,0,0.75,0.75))
legend('topright', 'Emp. Var.', lwd=4, col=rgb(0,0,0.75,0.75))
plot(0, xlim=range(test_rho), ylim=c(0,1),
pch=19, col=rgb(0,0,0,0),
xlab='Inverse-Length Scale', ylab='Coverage Proportion',
main="Coverage")
lines(test_rho, covers, lwd=3, col=rgb(0,0,0,0.5))
abline(h=0.95, lty=3)
abline(h=1, lty=3)
plot(test_rho, apply(mean_glm, 2, var) / apply(vars_glm, 2, mean),
type='l', lwd=4, col=rgb(0.75,0,0,0.5),
xlab='Inverse-Length Scale',
ylab='Ratio',
main='Ratio of Emp Var to Mean of Robust Var. Est')
abline(h=1, lwd=4, col=rgb(0,0,0,0.5))
lowers <- mean_glm - 1.96 * sqrt(vars_glm)
uppers <- mean_glm + 1.96 * sqrt(vars_glm)
covers <- apply((lowers < true_ate) & (uppers > true_ate),2, mean)
plot(test_rho, covers, ylim=c(0,1), col=rgb(0,0,0.75,0.5), lwd=5, type='l')
lowers1 <- mean_glm - 1.96 * sqrt(ess_glm * vars_glm / n_obs)
uppers1 <- mean_glm + 1.96 * sqrt(ess_glm * vars_glm / n_obs)
covers1 <- apply((lowers1 < true_ate) & (uppers1 > true_ate),2, mean)
lines(test_rho, covers1, ylim=c(0,1), col=rgb(0.75,0,0.75,0.5), lwd=5, type='l')
emp_var <- matrix(apply(mean_glm, 2, var), nrow=n_sims, ncol = length(test_rho), byrow = T)
lowers2 <- mean_glm - 1.96 * sqrt(emp_var)
uppers2 <- mean_glm + 1.96 * sqrt(emp_var)
covers2 <- apply((lowers2 < true_ate) & (uppers2 > true_ate),2, mean)
lines(test_rho, covers2, ylim=c(0,1), col=rgb(0,0.75,0,0.5), lwd=5, type='l')
abline(h=0.95)
# plot(apply(ess2_gp,2,mean)/n_obs, apply(mean_gp, 2, var) / apply(vars_gp, 2, mean))
# abline(h=1)
# #
# #
# # dset <- data.frame("YO" = yo, "TA" = t_test, "WTS" = wts_old)
# # design.ps <- survey::svydesign(ids=~1, weights=~WTS, data=dset)
# # glm1 <- survey::svyglm(YO ~ TA, design=design.ps)
# # summary(glm1)$coef[2,1:2]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.