interflex.linear <- function(data,
Y, # outcome
D, # treatment indicator
X, # moderator
treat.info,
diff.info,
Z = NULL, # covariates
weights = NULL, # weighting variable
full.moderate = FALSE, # fully moderated model
Z.X = NULL, # fully moderated terms
FE = NULL, # fixed effects
IV = NULL,
neval = 50,
X.eval = NULL,
method = "linear", ## "probit"; "logit"; "poisson"; "nbinom"
vartype = "simu", ## variance type "simu"; "bootstrap"; "delta"
vcov.type = "robust", ##"homoscedastic"; "robust"; "cluster"; "pcse"
time = NULL,
pairwise = TRUE,
nboots = 200,
nsimu = 1000,
parallel = TRUE,
cores = 4,
cl = NULL, # variable to be clustered on
#predict = FALSE,
Z.ref = NULL, # same length as Z, set the value of Z when estimating marginal effects/predicted value
figure = TRUE,
CI=CI,
order = NULL,
subtitles = NULL,
show.subtitles = NULL,
Xdistr = "histogram", # c("density","histogram","none")
main = NULL,
Ylabel = NULL,
Dlabel = NULL,
Xlabel = NULL,
xlab = NULL,
ylab = NULL,
xlim = NULL,
ylim = NULL,
theme.bw = FALSE,
show.grid = TRUE,
cex.main = NULL,
cex.sub = NULL,
cex.lab = NULL,
cex.axis = NULL,
interval = NULL,
file = NULL,
ncols = NULL,
pool = FALSE,
color = NULL,
legend.title = NULL,
show.all = FALSE,
scale = 1.1,
height = 7,
width = 10
){
WEIGHTS <- NULL
n <- dim(data)[1]
diff.values.plot <- diff.info[["diff.values.plot"]]
diff.values <- diff.info[["diff.values"]]
difference.name <- diff.info[["difference.name"]]
treat.type <- treat.info[["treat.type"]]
if(treat.type=='discrete'){
other.treat <- treat.info[["other.treat"]]
other.treat.origin <- names(other.treat)
names(other.treat.origin) <- other.treat
all.treat <- treat.info[["all.treat"]]
all.treat.origin <- names(all.treat)
names(all.treat.origin) <- all.treat
base <- treat.info[["base"]]
}
if(treat.type=='continuous'){
D.sample <- treat.info[["D.sample"]]
label.name <- names(D.sample)
#names(label.name) <- D.sample
}
ncols <- treat.info[["ncols"]]
if(is.null(cl)==FALSE){ ## find clusters
clusters<-unique(data[,cl])
id.list<-split(1:n,data[,cl])
}
if(is.null(FE)==TRUE){
use_fe <- 0
}else{
use_fe <- 1
}
#pcse
if(vcov.type=="pcse"){
data.cl <- data[,cl]
data.time <- data[,time]
}
# evaluation points
X.eval0 <- X.eval
X.eval <- seq(min(data[,X]), max(data[,X]), length.out = neval)
X.eval <- sort(c(X.eval,X.eval0))
neval <- length(X.eval)
# construct the formula
formula <- paste0(Y,"~",X)
if(treat.type=="discrete"){
for(char in other.treat){
data[,paste0("D",".",char)] <- as.numeric(data[,D]==char)
data[,paste0("DX",".",char)] <- as.numeric(data[,D]==char)*data[,X]
formula <- paste0(formula,"+",paste0("D",".",char),"+",paste0("DX",".",char))
}
}
else{
data[,"DX"] <- data[,D]*data[,X]
formula <- paste0(formula,"+",D,"+DX")
}
if(is.null(Z)==FALSE){
formula <- paste0(formula,"+",paste0(Z,collapse="+"))
if(full.moderate==TRUE){
formula <- paste0(formula,"+",paste0(Z.X,collapse="+"))
}
}
if (use_fe==1) {
formula <- paste0(formula, "|",paste0(FE, collapse = "+"))
if (vcov.type=="cluster") {
formula <- paste0(formula, "| 0 |",paste0(cl,collapse = "+"))
}
}
#iv formula
if(!is.null(IV)){
if(use_fe==0){
#mod2 <- ivreg(Y~D+X+D:X+Z1|W+X+W:X+Z1,data=s1)
formula.iv <- X
for(sub.iv in IV){
data[,paste0(X,".",sub.iv)] <- data[,sub.iv]*data[,X]
formula.iv <- paste0(formula.iv,"+",sub.iv)
formula.iv <- paste0(formula.iv,"+",paste0(X,".",sub.iv))
}
if(is.null(Z)==FALSE){
formula.iv <- paste0(formula.iv,"+",paste0(Z,collapse="+"))
if(full.moderate==TRUE){
formula.iv <- paste0(formula.iv,"+",paste0(Z.X,collapse="+"))
}
}
formula <- paste0(formula,"|",formula.iv)
}
if(use_fe==1){
#mod2 <- felm(Y~X+Z1+Z2|unit+year|(D|DX ~ W+WX)|0,data = s1)
formula <- paste0(Y,"~",X)
formula.iv <- ""
name.update <- c(X)
if(is.null(Z)==FALSE){
formula <- paste0(formula,"+",paste0(Z,collapse="+"))
name.update <- c(name.update,Z)
if(full.moderate==TRUE){
formula <- paste0(formula,"+",paste0(Z.X,collapse="+"))
name.update <- c(name.update,Z.X)
}
}
if(treat.type=="discrete"){
for(char in other.treat){
formula.iv <- paste0(formula.iv,"|",paste0("D",".",char),"|",paste0("DX",".",char))
name.update <- c(name.update,paste0("D",".",char),paste0("DX",".",char))
}
}
else{
formula.iv <- paste0(formula.iv,"|",D,"|DX")
name.update <- c(name.update,D,"DX")
}
formula.iv <- substring(formula.iv, 2)
formula.iv <- paste0(formula.iv," ~ ")
for(sub.iv in IV){
data[,paste0(X,".",sub.iv)] <- data[,sub.iv]*data[,X]
formula.iv <- paste0(formula.iv,"+",sub.iv)
formula.iv <- paste0(formula.iv,"+",paste0(X,".",sub.iv))
}
formula <- paste0(formula, "|",paste0(FE, collapse = "+"),"|")
formula.iv <- paste0("(",formula.iv,")")
formula <- paste0(formula,formula.iv)
if (vcov.type=="cluster") {
formula <- paste0(formula, "|",paste0(cl,collapse = "+"))
}
}
}
formula <- as.formula(formula)
if(is.null(weights)==TRUE){
w <- rep(1,n)
}
else{
w <- data[,weights]
}
data[['WEIGHTS']] <- w
# model fit
if(method=='linear'){
if(is.null(IV)){
if(use_fe==0){
suppressWarnings(
model <- glm(formula,data=data,weights=WEIGHTS)
)
}
if(use_fe==1){
suppressWarnings(
model <- felm(formula,data=data,weights=w)
)
model$converged <- TRUE
}
}
if(!is.null(IV)){
if(use_fe==0){
suppressWarnings(
model <- ivreg(formula,data=data,weights=WEIGHTS)
)
model$converged <- TRUE
}
if(use_fe==1){
suppressWarnings(
model <- felm(formula,data=data,weights=w)
)
model$converged <- TRUE
}
}
}
if(method=='logit'){
suppressWarnings(
model <- glm(formula,data=data,family=binomial(link = 'logit'),weights=WEIGHTS)
)
}
if(method=='probit'){
suppressWarnings(
model <- glm(formula,data=data,family=binomial(link = 'probit'),weights=WEIGHTS)
)
}
if(method=='poisson'){
suppressWarnings(
model <- glm(formula,data=data,family=poisson,weights=WEIGHTS)
)
}
if(method=='nbinom'){
suppressWarnings(
model <- glm.nb(formula,data=data,weights=WEIGHTS)
)
}
if(use_fe==0){
if(model$converged==FALSE){
stop("Linear estimator can't converge.")
}
}
model.coef <- coef(model)
if(!is.null(IV)){
if(use_fe==1){
names(model.coef) <- name.update
}
}
if(use_fe==0){
if(vcov.type=="homoscedastic"){
model.vcov <- vcov(model)
}
if(vcov.type=="robust"){
model.vcov <- vcovHC(model,type="HC1")
}
if(vcov.type=="cluster"){
model.vcov <- vcovCluster(model,cluster = data[,cl])
}
if(vcov.type=='pcse'){
model.vcov <- pcse(model,pairwise=pairwise,groupN=data.cl,groupT=data.time)$vcov
rownames(model.vcov)[1] <- "(Intercept)"
colnames(model.vcov)[1] <- "(Intercept)"
}
}
else{ # fe vcov
if (vcov.type=="homoscedastic") {
model.vcov <- vcov(model, type = "iid")
}
if (vcov.type=="robust") {
model.vcov <- vcov(model, type = "robust")
}
if (vcov.type=="cluster") {
model.vcov <- vcov(model, type = "cluster")
}
}
if(!is.null(IV)){
if(use_fe==1){
rownames(model.vcov) <- colnames(model.vcov) <- name.update
}
}
model.df <- model$df.residual
model.coef[which(is.na(model.coef))] <- 0
model.vcov[which(is.na(model.vcov))] <- 0
model.vcov.all <- matrix(0,nrow=length(model.coef),ncol=length(model.coef))
colnames(model.vcov.all) <- names(model.coef)
rownames(model.vcov.all) <- names(model.coef)
for(a1 in rownames(model.vcov)){
for(a2 in colnames(model.vcov)){
model.vcov.all[a1,a2] <- model.vcov[a1,a2]
}
}
if(isSymmetric.matrix(model.vcov.all,tol = 1e-6)==FALSE){
warning(paste0("Option vcov.type==",vcov.type,"leads to unstable standard error in linear estimator, by default use homoscedastic standard error as an alternative.\n"))
model.vcov.all <- vcov(model)
if(!is.null(IV)){
if(use_fe==1){
rownames(model.vcov.all) <- colnames(model.vcov.all) <- name.update
}
}
model.vcov.all[which(is.na(model.vcov.all))] <- 0
}
model.vcov <- model.vcov.all
##Function A
#1, estimate treatment effects/marginal effects given model.coef
#2, estimate difference of treatment effects/marginal effects at different values of the moderator
#3, input: model.coef;X.eval;char(discrete)/D.ref(continuous);diff.values(default to NULL);
#4, output: marginal effects/treatment effects/E.pred/E.base/diff.estimate
gen.general.TE <- function(model.coef,char=NULL,D.ref=NULL,diff.values=NULL){
if(is.null(char)==TRUE){
treat.type='continuous'
}
if(is.null(D.ref)==TRUE){
treat.type='discrete'
}
gen.TE <- function(model.coef,X.eval){
neval <- length(X.eval)
if(treat.type=='discrete'){
link.1 <- model.coef['(Intercept)'] + X.eval*model.coef[X] + 1*model.coef[paste0('D.',char)] + X.eval*model.coef[paste0('DX.',char)]
link.0 <- model.coef['(Intercept)'] + X.eval*model.coef[X]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link.1 <- link.1 + target.Z*model.coef[a]
link.0 <- link.0 +target.Z*model.coef[a]
if(full.moderate==TRUE){
link.1 <- link.1 + target.Z*model.coef[paste0(a,'.X')]*X.eval
link.0 <- link.0 + target.Z*model.coef[paste0(a,'.X')]*X.eval
}
}
}
if(method=='linear'){
TE <- link.1-link.0
E.pred <- link.1
E.base <- link.0
}
if(method=='logit'){
E.pred <- E.prob.1 <- exp(link.1)/(1+exp(link.1))
E.base <- E.prob.0 <- exp(link.0)/(1+exp(link.0))
TE <- E.prob.1-E.prob.0
}
if(method=='probit'){
E.pred <- E.prob.1 <- pnorm(link.1,0,1)
E.base <- E.prob.0 <- pnorm(link.0,0,1)
TE <- E.prob.1-E.prob.0
}
if(method=='poisson' | method=="nbinom"){
E.pred <- E.y.1 <- exp(link.1)
E.base <- E.y.0 <- exp(link.0)
TE <- E.y.1-E.y.0
}
names(link.1) <- rep(paste0("link.1.",char),neval)
names(link.0) <- rep(paste0("link.0.",base),neval)
names(TE) <- rep(paste0("TE.",char),neval)
names(E.pred) <- rep(paste0("Predict.",char),neval)
names(E.base) <- rep(paste0("Predict.",base),neval)
gen.TE.output <- list(TE=TE,E.pred=E.pred,E.base=E.base,
link.1=link.1,link.0=link.0)
}
if(treat.type=='continuous'){
link <- model.coef["(Intercept)"] + X.eval*model.coef[X] + model.coef[D]*D.ref + model.coef["DX"]*X.eval*D.ref
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link <- link + target.Z*model.coef[a]
if(full.moderate==TRUE){
link <- link + target.Z*model.coef[paste0(a,'.X')]*X.eval
}
}
}
if(method=='logit'){
ME <- exp(link)/(1+exp(link))^2*(model.coef[D]+model.coef["DX"]*X.eval)
E.pred <- exp(link)/(1+exp(link))
}
if(method=='probit'){
ME <- (model.coef[D]+model.coef["DX"]*X.eval)*dnorm(link)
E.pred <- pnorm(link,0,1)
}
if(method=='linear'){
ME <- model.coef[D]+model.coef["DX"]*X.eval
E.pred <- link
}
if(method=='poisson'|method=='nbinom'){
ME <- exp(link)*(model.coef[D]+model.coef["DX"]*X.eval)
E.pred <- exp(link)
}
names(link) <- rep("link",neval)
names(ME) <- rep(paste0("ME.",names(D.sample)[D.sample == D.ref]),neval)
names(E.pred) <- rep(paste0("Predict.",names(D.sample)[D.sample == D.ref]),neval)
gen.TE.output <- list(ME=ME,E.pred=E.pred,link=link)
}
return(gen.TE.output)
}
gen.TE.fe <- function(model.coef,X.eval){
neval <- length(X.eval)
if(treat.type=='discrete'){
TE <- model.coef[paste0('D.',char)] + X.eval*model.coef[paste0('DX.',char)]
names(TE) <- rep(paste0("TE.",char),neval)
E.pred <- rep(0,neval) #doesn't return prediction value for fixed effects
E.base <- rep(0,neval)
gen.TE.output <- list(TE=TE,E.pred=E.pred,E.base=E.base,link.1=E.pred,link.0=E.base)
}
if(treat.type=='continuous'){
ME <- model.coef[D] + model.coef["DX"]*X.eval
names(ME) <- rep(paste0("ME.",names(D.sample)[D.sample == D.ref]),neval)
E.pred <- rep(0,neval)
gen.TE.output <- list(ME=ME,E.pred=E.pred,link=E.pred)
}
return(gen.TE.output)
}
if(use_fe==0){
gen.TE.output <- gen.TE(model.coef=model.coef,X.eval=X.eval)
}
if(use_fe==1){
gen.TE.output <- gen.TE.fe(model.coef=model.coef,X.eval=X.eval)
}
if(is.null(diff.values)==FALSE){
if(use_fe==0){
gen.TE.diff <- gen.TE(model.coef=model.coef,X.eval=diff.values)
}
if(use_fe==1){
gen.TE.diff <- gen.TE.fe(model.coef=model.coef,X.eval=diff.values)
}
if(treat.type=='discrete'){
target <- gen.TE.diff$TE
}
if(treat.type=='continuous'){
target <- gen.TE.diff$ME
}
if(length(diff.values)==2){
difference.temp <- c(target[2]-target[1])
}
if(length(diff.values)==3){
difference.temp <- c(target[2]-target[1],
target[3]-target[2],
target[3]-target[1])
}
if(treat.type=='discrete'){
names(difference.temp) <- paste0(char,'.',difference.name)
}
if(treat.type=='continuous'){
names(difference.temp) <- paste0(names(D.sample)[D.sample == D.ref],'.',difference.name)
}
}
else{
difference.temp <- NULL
}
if(treat.type=='discrete'){
return(list(TE=gen.TE.output$TE,
E.pred=gen.TE.output$E.pred,
E.base=gen.TE.output$E.base,
link.1=gen.TE.output$link.1,
link.0=gen.TE.output$link.0,
diff.estimate=difference.temp))
}
if(treat.type=='continuous'){
return(list(ME=gen.TE.output$ME,
E.pred=gen.TE.output$E.pred,
link=gen.TE.output$link,
diff.estimate=difference.temp))
}
}
##Function B delta method
#1, estimate sd of treatment effects/marginal effects using delta method
#2, estimate sd of predicted values using delta method
#3, estimate sd of diff.estimate using delta method
#4, estimate vcov of ME/TE using delta method
#5, estimate sd of linear predictor using delta method
#6, input: model.coef; model.vcov; char(discrete)/D.ref(continuous);diff.values
#7, output: sd of TE/ME; sd of diff.values; vcov of ME/TE
gen.delta.TE <- function(model.coef,model.vcov,char=NULL,D.ref=NULL,diff.values=NULL,vcov=FALSE){
if(is.null(char)==TRUE){
treat.type='continuous'
flag=1
}
if(is.null(D.ref)==TRUE){
treat.type='discrete'
if(char==base){
flag=0
}else{
flag=1
}
}
#sd for TE/ME
gen.sd.fe <- function(x,to.diff=FALSE){
if(treat.type=='discrete'){
target.slice <- c(paste0('D.',char),paste0('DX.',char))
vec.1 <- c(1,x)
vec.0 <- c(0,0)
vec <- vec.1-vec.0
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(to.diff==TRUE){
return(list(vec=vec,temp.vcov.matrix=temp.vcov.matrix))
}
delta.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(delta.sd)
}
if(treat.type=='continuous'){
target.slice <- c(D,'DX')
vec <- c(1,x)
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(to.diff==TRUE){
return(list(vec=vec,temp.vcov.matrix=temp.vcov.matrix))
}
delta.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(delta.sd)
}
}
gen.sd <- function(x,to.diff=FALSE){
if(use_fe==TRUE){
return(gen.sd.fe(x=x,to.diff=to.diff))
}
if(treat.type=='discrete'){
link.1 <- model.coef['(Intercept)'] + x*model.coef[X] + 1*model.coef[paste0('D.',char)] + x*model.coef[paste0('DX.',char)]
link.0 <- model.coef['(Intercept)'] + x*model.coef[X]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link.1 <- link.1 + target.Z*model.coef[a]
link.0 <- link.0 +target.Z*model.coef[a]
if(full.moderate==TRUE){
link.1 <- link.1 + target.Z*model.coef[paste0(a,'.X')]*x
link.0 <- link.0 + target.Z*model.coef[paste0(a,'.X')]*x
}
}
}
if(is.null(Z)==FALSE){
if(full.moderate==FALSE){
vec.1 <- c(1,x,1,x,Z.ref)
vec.0 <- c(1,x,0,0,Z.ref)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char),Z)
}
if(full.moderate==TRUE){
vec.1 <- c(1,x,1,x,Z.ref,x*Z.ref)
vec.0 <- c(1,x,0,0,Z.ref,x*Z.ref)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char),Z,Z.X)
}
}
else{
vec.1 <- c(1,x,1,x)
vec.0 <- c(1,x,0,0)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char))
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(method=='logit'){
vec <- vec.1*exp(link.1)/(1+exp(link.1))^2 - vec.0*exp(link.0)/(1+exp(link.0))^2
}
if(method=='probit'){
vec <- vec.1*dnorm(link.1) - vec.0*dnorm(link.0)
}
if(method=='poisson' | method=='nbinom'){
vec <- vec.1*exp(link.1) - vec.0*exp(link.0)
}
if(method=='linear'){
vec <- vec.1-vec.0
}
if(to.diff==TRUE){
return(list(vec=vec,temp.vcov.matrix=temp.vcov.matrix))
}
delta.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(delta.sd)
}
if(treat.type=='continuous'){
link <- model.coef["(Intercept)"] + x*model.coef[X] + model.coef[D]*D.ref + model.coef["DX"]*x*D.ref
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link <- link + target.Z*model.coef[a]
if(full.moderate==TRUE){
link <- link + x*target.Z*model.coef[paste0(a,'.X')]
}
}
}
if(is.null(Z)==FALSE){
if(full.moderate==FALSE){
vec1 <- c(1,x,D.ref,D.ref*x,Z.ref)
vec0 <- c(0,0,1,x,rep(0,length(Z)))
target.slice <- c('(Intercept)',X,D,'DX',Z)
}
if(full.moderate==TRUE){
vec1 <- c(1,x,D.ref,D.ref*x,Z.ref,Z.ref*x)
vec0 <- c(0,0,1,x,rep(0,2*length(Z)))
target.slice <- c('(Intercept)',X,D,'DX',Z,Z.X)
}
}
else{
vec1 <- c(1,x,D.ref,D.ref*x)
vec0 <- c(0,0,1,x)
target.slice <- c('(Intercept)',X,D,'DX')
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(method=='logit'){
vec <- -(model.coef[D]+x*model.coef['DX'])*(exp(link)-exp(-link))/(2+exp(link)+exp(-link))^2*vec1 + exp(link)/(1+exp(link))^2*vec0
}
if(method=='probit'){
vec <- dnorm(link)*vec0-(model.coef[D]+x*model.coef['DX'])*link*dnorm(link)*vec1
}
if(method=='poisson'|method=='nbinom'){
vec <- (model.coef[D]+x*model.coef['DX'])*exp(link)*vec1+exp(link)*vec0
}
if(method=='linear'){
vec <- vec0
}
if(to.diff==TRUE){
return(list(vec=vec,temp.vcov.matrix=temp.vcov.matrix))
}
delta.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(delta.sd)
}
}
if(flag==1){
TE.sd <- c(sapply(X.eval,function(x) gen.sd(x)))
}
else{
TE.sd <- NULL
}
if(treat.type=='discrete' & is.null(TE.sd)==FALSE){
names(TE.sd) <- rep(paste0("sd.",char),neval)
}
if(treat.type=='continuous'){
names(TE.sd) <- rep(paste0("sd.",names(D.sample)[D.sample == D.ref]),neval)
}
#sd for linear predictor
gen.link.sd <- function(x,base=FALSE){
if(treat.type=='discrete'){
if(is.null(Z)==FALSE){
if(base==FALSE){
vec <- c(1,x,1,x,Z.ref)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char),Z)
}
else{
vec <- c(1,x,Z.ref)
target.slice <- c('(Intercept)',X,Z)
}
if(full.moderate==TRUE){
vec <- c(vec,Z.ref*x)
target.slice <- c(target.slice,Z.X)
}
}
else{
if(base==FALSE){
vec <- c(1,x,1,x)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char))
}
else{
vec <- c(1,x)
target.slice <- c('(Intercept)',X)
}
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
link.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(link.sd)
}
if(treat.type=='continuous'){
if(is.null(Z)==FALSE){
vec <- c(1,x,D.ref,D.ref*x,Z.ref)
target.slice <- c('(Intercept)',X,D,'DX',Z)
if(full.moderate==TRUE){
vec <- c(vec,Z.ref*x)
target.slice <- c(target.slice,Z.X)
}
}
else{
vec <- c(1,x,D.ref,D.ref*x)
target.slice <- c('(Intercept)',X,D,'DX')
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
link.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(link.sd)
}
}
gen.link.sd.fe <- function(x,base=FALSE){
return(0)
}
if(use_fe==FALSE){
if(treat.type=='discrete'){
if(char==base){
link.sd <- c(sapply(X.eval,function(x) gen.link.sd(x,base=TRUE)))
}
else{
link.sd <- c(sapply(X.eval,function(x) gen.link.sd(x)))
}
}
else{
link.sd <- c(sapply(X.eval,function(x) gen.link.sd(x)))
}
}
else{
link.sd <- rep(0,length(X.eval))
}
#sd for predicted value
gen.predict.sd <- function(x,base=FALSE){
if(treat.type=='discrete'){
if(base==FALSE){
link <- model.coef['(Intercept)'] + x*model.coef[X] + 1*model.coef[paste0('D.',char)] + x*model.coef[paste0('DX.',char)]
}
if(base==TRUE){
link <- model.coef['(Intercept)'] + x*model.coef[X]
}
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link <- link + target.Z*model.coef[a]
if(full.moderate==TRUE){
link <- link + target.Z*model.coef[paste0(a,'.X')]*x
}
}
}
if(is.null(Z)==FALSE){
if(base==FALSE){
vec <- c(1,x,1,x,Z.ref)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char),Z)
}
else{
vec <- c(1,x,Z.ref)
target.slice <- c('(Intercept)',X,Z)
}
if(full.moderate==TRUE){
vec <- c(vec,Z.ref*x)
target.slice <- c(target.slice,Z.X)
}
}
else{
if(base==FALSE){
vec <- c(1,x,1,x)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char))
}
else{
vec <- c(1,x)
target.slice <- c('(Intercept)',X)
}
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(method=='logit'){
vec <- vec*exp(link)/(1+exp(link))^2
}
if(method=='probit'){
vec <- vec*dnorm(link)
}
if(method=='poisson' | method=='nbinom'){
vec <- vec*exp(link)
}
if(method=='linear'){
vec <- vec
}
predict.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(predict.sd)
}
if(treat.type=='continuous'){
link <- model.coef["(Intercept)"] + x*model.coef[X] + model.coef[D]*D.ref + model.coef["DX"]*x*D.ref
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link <- link + target.Z*model.coef[a]
if(full.moderate==TRUE){
link <- link + target.Z*model.coef[paste0(a,'.X')]*x
}
}
}
if(is.null(Z)==FALSE){
vec <- c(1,x,D.ref,D.ref*x,Z.ref)
target.slice <- c('(Intercept)',X,D,'DX',Z)
if(full.moderate==TRUE){
vec <- c(vec,Z.ref*x)
target.slice <- c(target.slice,Z.X)
}
}
else{
vec <- c(1,x,D.ref,D.ref*x)
target.slice <- c('(Intercept)',X,D,'DX')
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(method=='logit'){
vec <- vec*exp(link)/(1+exp(link))^2
}
if(method=='probit'){
vec <- vec*dnorm(link)
}
if(method=='poisson' | method=='nbinom'){
vec <- vec*exp(link)
}
if(method=='linear'){
vec <- vec
}
predict.sd <- sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1])
return(predict.sd)
}
}
gen.predict.sd.fe <- function(x,base=FALSE){
return(0)
}
if(use_fe==FALSE){
if(treat.type=='discrete'){
if(char==base){
predict.sd <- c(sapply(X.eval,function(x) gen.predict.sd(x,base=TRUE)))
}
else{
predict.sd <- c(sapply(X.eval,function(x) gen.predict.sd(x)))
}
}
else{
predict.sd <- c(sapply(X.eval,function(x) gen.predict.sd(x)))
}
}
else{
predict.sd <- rep(0,length(X.eval))
}
if(treat.type=='discrete'){
names(predict.sd) <- rep(paste0("predict.sd.",char),neval)
}
if(treat.type=='continuous'){
names(predict.sd) <- rep(paste0("predict.sd.",names(D.sample)[D.sample == D.ref]),neval)
}
#sd for diff.estimates
if(is.null(diff.values)==FALSE & flag==1){
if(length(diff.values)==2){
vec.list2 <- gen.sd(x=diff.values[2],to.diff=TRUE)
vec.list1 <- gen.sd(x=diff.values[1],to.diff=TRUE)
vec1 <- vec.list1$vec
vec2 <- vec.list2$vec
vec <- vec2-vec1
temp.vcov.matrix <- vec.list1$temp.vcov.matrix
delta.diff.sd <- c(sqrt((t(vec)%*%temp.vcov.matrix%*%vec)[1,1]))
}
if(length(diff.values)==3){
vec.list3 <- gen.sd(x=diff.values[3],to.diff=TRUE)
vec.list2 <- gen.sd(x=diff.values[2],to.diff=TRUE)
vec.list1 <- gen.sd(x=diff.values[1],to.diff=TRUE)
vec1 <- vec.list1$vec
vec2 <- vec.list2$vec
vec3 <- vec.list3$vec
vec21 <- vec2-vec1
vec32 <- vec3-vec2
vec31 <- vec3-vec1
temp.vcov.matrix <- vec.list1$temp.vcov.matrix
delta.diff.sd <- c(sqrt((t(vec21)%*%temp.vcov.matrix%*%vec21)[1,1]),
sqrt((t(vec32)%*%temp.vcov.matrix%*%vec32)[1,1]),
sqrt((t(vec31)%*%temp.vcov.matrix%*%vec31)[1,1]))
}
if(treat.type=='discrete'){
names(delta.diff.sd) <- paste0("sd.",char,'.',difference.name)
}
if(treat.type=='continuous'){
names(delta.diff.sd) <- paste0("sd.",names(D.sample)[D.sample == D.ref],'.',difference.name)
}
}
else{
delta.diff.sd <- NULL
}
#vcov matrix
if(flag==1 & vcov==TRUE){
gen.cov.element <- function(x1,x2){
vec.list1 <- gen.sd(x1,to.diff=TRUE)
vec.list2 <- gen.sd(x2,to.diff=TRUE)
vec1 <- vec.list1$vec
vec2 <- vec.list2$vec
temp.vcov.matrix <- vec.list1$temp.vcov.matrix
cov.element <- (t(vec1)%*%temp.vcov.matrix%*%vec2)[1,1]
return(cov.element)
}
gen.cov.vec <- function(colvec,x0){
output <- sapply(colvec,function(xx){gen.cov.element(xx,x0)})
return(output)
}
TE.sd.vcov <- as.matrix(sapply(X.eval,function(xx){gen.cov.vec(X.eval,xx)}))
}
else{
TE.sd.vcov <- NULL
}
#output
if(treat.type=='discrete'){
return(list(TE.sd=TE.sd,
predict.sd=predict.sd,
link.sd=link.sd,
sd.diff.estimate=delta.diff.sd,
TE.vcov=TE.sd.vcov))
}
if(treat.type=='continuous'){
return(list(ME.sd=TE.sd,
predict.sd=predict.sd,
link.sd=link.sd,
sd.diff.estimate=delta.diff.sd,
ME.vcov=TE.sd.vcov))
}
}
##Function C ATE(weighted average)
#1, estimate average treatment effects(ATE)/average marginal effects(AME)
#2, estimate sd of ATE/AME using delta method
#3, input: data(weights); model.coef; model.vcov; char(discrete); delta(Default to FALSE)
#4, output: ATE for char/AME; sd for ATE/AME
gen.ATE.fe <- function(data,model.coef,model.vcov,char=NULL,delta=FALSE){
if(treat.type=='discrete'){
sub.data <- data[data[,D]==char,]
weights <- data[data[,D]==char,'WEIGHTS']
TE <- model.coef[paste0('D.',char)] + sub.data[,X]*model.coef[paste0('DX.',char)]
ATE <- weighted.mean(TE,weights,na.rm=TRUE)
if(delta==FALSE){
return(ATE)
}
}
if(treat.type=='continuous'){
weights <- data[,'WEIGHTS']
ME <- model.coef[D]+model.coef["DX"]*data[,X]
AME <- weighted.mean(ME,weights,na.rm=TRUE)
if(delta==FALSE){
return(AME)
}
}
if(delta==TRUE){
gen.ATE.sd.vec <- function(index){
if(treat.type=='discrete'){
x <- sub.data[index,X]
vec.1 <- c(1,x)
vec.0 <- c(0,0)
vec <- vec.1-vec.0
target.slice <- c(paste0('D.',char),paste0('DX.',char))
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
return(vec)
}
if(treat.type=='continuous'){
vec <- vec0 <- c(1,data[index,X])
target.slice <- c(D,'DX')
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
return(vec)
}
}
if(treat.type=='discrete'){
index.all <- c(1:dim(sub.data)[1])
vec.all <- sapply(index.all,function(x) gen.ATE.sd.vec(x))
vec.mean <- apply(vec.all,1,function(x) weighted.mean(x,weights))
target.slice <- c(paste0('D.',char),paste0('DX.',char))
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
ATE.sd <- sqrt((t(vec.mean)%*%temp.vcov.matrix%*%vec.mean)[1,1])
return(list(ATE=ATE,sd=ATE.sd))
}
if(treat.type=='continuous'){
index.all <- c(1:dim(data)[1])
vec.all <- sapply(index.all,function(x) gen.ATE.sd.vec(x))
vec.mean <- apply(vec.all,1,function(x) weighted.mean(x,weights))
target.slice <- c(D,'DX')
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
AME.sd <- sqrt((t(vec.mean)%*%temp.vcov.matrix%*%vec.mean)[1,1])
return(list(AME=AME,sd=AME.sd))
}
}
}
gen.ATE <- function(data, model.coef,model.vcov,char=NULL,delta=FALSE){
if(use_fe==TRUE){
return(gen.ATE.fe(data=data,model.coef=model.coef,model.vcov=model.vcov,char=char,delta=delta))
}
if(treat.type=='discrete'){
sub.data <- data[data[,D]==char,]
weights <- data[data[,D]==char,'WEIGHTS']
link.1 <- model.coef['(Intercept)'] + sub.data[,X]*model.coef[X] +
model.coef[paste0('D.',char)] + sub.data[,X]*model.coef[paste0('DX.',char)]
link.0 <- model.coef['(Intercept)'] + sub.data[,X]*model.coef[X]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- sub.data[,a]
link.1 <- link.1 + target.Z*model.coef[a]
link.0 <- link.0 +target.Z*model.coef[a]
if(full.moderate==TRUE){
link.1 <- link.1 + target.Z*model.coef[paste0(a,'.X')]*sub.data[,X]
link.0 <- link.0 + target.Z*model.coef[paste0(a,'.X')]*sub.data[,X]
}
}
}
if(method=='linear'){
TE <- link.1-link.0
}
if(method=='logit'){
E.pred <- E.prob.1 <- exp(link.1)/(1+exp(link.1))
E.base <- E.prob.0 <- exp(link.0)/(1+exp(link.0))
TE <- E.prob.1-E.prob.0
}
if(method=='probit'){
E.pred <- E.prob.1 <- pnorm(link.1,0,1)
E.base <- E.prob.0 <- pnorm(link.0,0,1)
TE <- E.prob.1-E.prob.0
}
if(method=='poisson' | method=="nbinom"){
E.pred <- E.y.1 <- exp(link.1)
E.base <- E.y.0 <- exp(link.0)
TE <- E.y.1-E.y.0
}
ATE <- weighted.mean(TE,weights,na.rm=TRUE)
if(delta==FALSE){
return(ATE)
}
}
if(treat.type=='continuous'){
weights <- data[,'WEIGHTS']
link <- model.coef["(Intercept)"] + data[,X]*model.coef[X] + model.coef[D]*data[,D] + model.coef["DX"]*data[,X]*data[,D]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- data[,a]
link <- link + target.Z*model.coef[a]
if(full.moderate==TRUE){
link <- link + target.Z*model.coef[paste0(a,'.X')]*data[,X]
}
}
}
#return(link)
if(method=='logit'){
ME <- exp(link)/(1+exp(link))^2*(model.coef[D]+model.coef["DX"]*data[,X])
}
if(method=='probit'){
ME <- (model.coef[D]+model.coef["DX"]*data[,X])*dnorm(link)
}
if(method=='linear'){
ME <- model.coef[D]+model.coef["DX"]*data[,X]
}
if(method=='poisson'|method=='nbinom'){
ME <- exp(link)*(model.coef[D]+model.coef["DX"]*data[,X])
}
AME <- weighted.mean(ME,weights,na.rm=TRUE)
if(delta==FALSE){
return(AME)
}
}
if(delta==TRUE){
gen.ATE.sd.vec <- function(index){
if(treat.type=='discrete'){
x <- sub.data[index,X]
link.1 <- model.coef['(Intercept)'] + x*model.coef[X] + 1*model.coef[paste0('D.',char)] + x*model.coef[X]*model.coef[paste0('DX.',char)]
link.0 <- model.coef['(Intercept)'] + x*model.coef[X]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- sub.data[index,a]
link.1 <- link.1 + target.Z*model.coef[a]
link.0 <- link.0 + target.Z*model.coef[a]
if(full.moderate==TRUE){
link.1 <- link.1 + target.Z*model.coef[paste0(a,'.X')]*x
link.0 <- link.0 + target.Z*model.coef[paste0(a,'.X')]*x
}
}
}
if(is.null(Z)==FALSE){
vec.1 <- c(1,x,1,x,as.matrix(sub.data[index,Z]))
vec.0 <- c(1,x,0,0,as.matrix(sub.data[index,Z]))
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char),Z)
if(full.moderate==TRUE){
vec.1 <- c(vec.1,as.matrix(sub.data[index,Z]*x))
vec.0 <- c(vec.0,as.matrix(sub.data[index,Z]*x))
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char),Z,Z.X)
}
}
else{
vec.1 <- c(1,x,1,x)
vec.0 <- c(1,x,0,0)
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char))
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(method=='logit'){
vec <- vec.1*exp(link.1)/(1+exp(link.1))^2 - vec.0*exp(link.0)/(1+exp(link.0))^2
}
if(method=='probit'){
vec <- vec.1*dnorm(link.1) - vec.0*dnorm(link.0)
}
if(method=='poisson' | method=='nbinom'){
vec <- vec.1*exp(link.1) - vec.0*exp(link.0)
}
if(method=='linear'){
vec <- vec.1-vec.0
}
return(vec)
}
if(treat.type=='continuous'){
link <- model.coef["(Intercept)"] + data[index,X]*model.coef[X] + model.coef[D]*data[index,D] + model.coef["DX"]*data[index,X]*data[index,D]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- data[index,a]
link <- link + target.Z*model.coef[a]
if(full.moderate==TRUE){
link <- link + target.Z*model.coef[paste0(a,'.X')]*data[index,X]
}
}
}
if(is.null(Z)==FALSE){
vec1 <- c(1,data[index,X],data[index,D],data[index,D]*data[index,X],as.matrix(data[index,Z]))
vec0 <- c(0,0,1,data[index,X],rep(0,length(Z)))
target.slice <- c('(Intercept)',X,D,'DX',Z)
if(full.moderate==TRUE){
vec1 <- c(vec1,as.matrix(data[index,Z]*data[index,X]))
vec0 <- c(vec0,rep(0,length(Z)))
target.slice <- c(target.slice,Z.X)
}
}
else{
vec1 <- c(1,data[index,X],data[index,D],data[index,D]*data[index,X])
vec0 <- c(0,0,1,data[index,X])
target.slice <- c('(Intercept)',X,D,'DX')
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
if(method=='logit'){
vec <- -(model.coef[D]+data[index,X]*model.coef['DX'])*(exp(link)-exp(-link))/(2+exp(link)+exp(-link))^2*vec1 + exp(link)/(1+exp(link))^2*vec0
}
if(method=='probit'){
vec <- dnorm(link)*vec0-(model.coef[D]+data[index,X]*model.coef['DX'])*link*dnorm(link)*vec1
}
if(method=='poisson'|method=='nbinom'){
vec <- (model.coef[D]+data[index,X]*model.coef['DX'])*exp(link)*vec1+exp(link)*vec0
}
if(method=='linear'){
vec <- vec0
}
return(vec)
}
}
if(treat.type=='discrete'){
index.all <- c(1:dim(sub.data)[1])
vec.all <- sapply(index.all,function(x) gen.ATE.sd.vec(x))
vec.mean <- apply(vec.all,1,function(x) weighted.mean(x,weights))
if(is.null(Z)==FALSE){
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char),Z)
if(full.moderate==TRUE){
target.slice <- c(target.slice,Z.X)
}
}
else{
target.slice <- c('(Intercept)',X,paste0('D.',char),paste0('DX.',char))
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
ATE.sd <- sqrt((t(vec.mean)%*%temp.vcov.matrix%*%vec.mean)[1,1])
return(list(ATE=ATE,sd=ATE.sd))
}
if(treat.type=='continuous'){
index.all <- c(1:dim(data)[1])
vec.all <- sapply(index.all,function(x) gen.ATE.sd.vec(x))
vec.mean <- apply(vec.all,1,function(x) weighted.mean(x,weights))
if(is.null(Z)==FALSE){
target.slice <- c('(Intercept)',X,D,'DX',Z)
if(full.moderate==TRUE){
target.slice <- c(target.slice,Z.X)
}
}
else{
target.slice <- c('(Intercept)',X,D,'DX')
}
temp.vcov.matrix <- model.vcov[target.slice,target.slice]
AME.sd <- sqrt((t(vec.mean)%*%temp.vcov.matrix%*%vec.mean)[1,1])
return(list(AME=AME,sd=AME.sd))
}
}
}
#Part 1. Simu Vartype
#generate TE/ME(x,mean,sd,0.025,0.975); matrix form; by group/D.ref
#generate predict(x,mean,sd,0.025,0.975); matrix form; by group/D.ref
#generate vcov of TE/ME; matrix form; by group/D.ref
#generate diff.values of TE/ME(mean,sd,0.025,0.975); matrix form; by group/D.ref
if(vartype=='simu'){
#simu
M <- nsimu
simu.coef <- rmvnorm(M, model.coef, model.vcov)
if(treat.type=='discrete'){
TE.output.all.list <- list()
pred.output.all.list <- list()
link.output.all.list <- list()
diff.output.all.list <- list()
TE.vcov.list <- list()
ATE.output.list <- list()
for(char in other.treat){
gen.general.TE.output <- gen.general.TE(model.coef=model.coef,char=char,diff.values=diff.values)
TE.output <- gen.general.TE.output$TE
E.pred.output <- gen.general.TE.output$E.pred
E.base.output <- gen.general.TE.output$E.base
link.1.output <- gen.general.TE.output$link.1
link.0.output <- gen.general.TE.output$link.0
diff.estimate.output <- gen.general.TE.output$diff.estimate
ATE.estimate <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov,char=char)
#simu
one.simu.output <- function(model.coef,char=char,diff.values=diff.values){
one.simu.TE.output <- gen.general.TE(model.coef=model.coef,char=char,diff.values=diff.values)
output1 <- one.simu.TE.output$TE
names(output1) <- rep("TE",length(output1))
output2 <- one.simu.TE.output$E.pred
names(output2) <- rep("pred",length(output2))
output3 <- one.simu.TE.output$E.base
names(output3) <- rep("base",length(output3))
output3a <- one.simu.TE.output$link.1
names(output3a) <- rep("link.1",length(output3a))
output3b <- one.simu.TE.output$link.0
names(output3b) <- rep("link.0",length(output3b))
output4 <- one.simu.TE.output$diff.estimate
names(output4) <- rep("diff",length(output4))
output5 <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov,char=char)
output5 <- c(output5)
names(output5) <- c("ATE")
output.all <- c(output1,output2,output3,output3a,output3b,output4,output5)
return(output.all)
}
all.simu.matrix <- apply(simu.coef, 1, function(x) one.simu.output(model.coef=x,char=char,diff.values=diff.values))
TE.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="TE",]
pred.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="pred",]
base.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="base",]
link.1.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="link.1",]
link.0.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="link.0",]
diff.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="diff",]
ATE.simu.matrix <- matrix(all.simu.matrix[rownames(all.simu.matrix)=="ATE",],nrow=1)
if(length(diff.values)==2){
diff.simu.matrix <- matrix(diff.simu.matrix,nrow=1)
}
if(length(diff.values)==3){
diff.simu.matrix <- as.matrix(diff.simu.matrix)
}
TE.simu.sd <- apply(TE.simu.matrix, 1, sd, na.rm=TRUE)
pred.simu.sd <- apply(pred.simu.matrix, 1, sd, na.rm=TRUE)
base.simu.sd <- apply(base.simu.matrix, 1, sd, na.rm=TRUE)
link.1.simu.sd <- apply(link.1.simu.matrix, 1, sd, na.rm=TRUE)
link.0.simu.sd <- apply(link.0.simu.matrix, 1, sd, na.rm=TRUE)
diff.simu.sd <- apply(diff.simu.matrix, 1, sd, na.rm=TRUE)
ATE.simu.sd <- apply(ATE.simu.matrix, 1, sd, na.rm=TRUE)
TE.simu.CI <- t(apply(TE.simu.matrix, 1, quantile, c(0.025,0.975), na.rm=TRUE))
pred.simu.CI <- t(apply(pred.simu.matrix, 1, quantile, c(0.025,0.975), na.rm=TRUE))
base.simu.CI <- t(apply(base.simu.matrix, 1, quantile, c(0.025,0.975), na.rm=TRUE))
link.1.simu.CI <- t(apply(link.1.simu.matrix, 1, quantile, c(0.025,0.975), na.rm=TRUE))
link.0.simu.CI <- t(apply(link.0.simu.matrix, 1, quantile, c(0.025,0.975), na.rm=TRUE))
diff.simu.CI <- t(apply(diff.simu.matrix, 1, quantile, c(0.025,0.975), na.rm=TRUE))
ATE.simu.CI <- matrix(t(apply(ATE.simu.matrix, 1, quantile, c(0.025,0.975), na.rm=TRUE)),nrow=1)
if(length(diff.values)==2){
diff.simu.CI <- matrix(diff.simu.CI,nrow=1)
}
if(length(diff.values)==3){
diff.simu.CI <- as.matrix(diff.simu.CI)
}
TE.simu.vcov <- cov(t(TE.simu.matrix),use="na.or.complete")
rownames(TE.simu.vcov) <- NULL
colnames(TE.simu.vcov) <- NULL
TE.output.all <- cbind(X.eval,TE.output,TE.simu.sd,TE.simu.CI[,1],TE.simu.CI[,2])
colnames(TE.output.all) <- c("X","TE","sd","lower CI(95%)","upper CI(95%)")
rownames(TE.output.all) <- NULL
TE.output.all.list[[other.treat.origin[char]]] <- TE.output.all
pred.output.all <- cbind(X.eval,E.pred.output,pred.simu.sd,pred.simu.CI[,1],pred.simu.CI[,2])
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[other.treat.origin[char]]] <- pred.output.all
link.1.output.all <- cbind(X.eval,link.1.output,link.1.simu.sd,link.1.simu.CI[,1],link.1.simu.CI[,2])
colnames(link.1.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.1.output.all) <- NULL
link.output.all.list[[other.treat.origin[char]]] <- link.1.output.all
TE.vcov.list[[other.treat.origin[char]]] <- TE.simu.vcov
z.value <- diff.estimate.output/diff.simu.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output,diff.simu.sd,z.value,p.value,diff.simu.CI[,1],diff.simu.CI[,2])
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[other.treat.origin[char]]] <- diff.output.all
ATE.z.value <- ATE.estimate/ATE.simu.sd
ATE.p.value <- 2*pnorm(-abs(ATE.z.value))
ATE.output <- c(ATE.estimate,ATE.simu.sd,ATE.z.value,ATE.p.value,ATE.simu.CI[,1],ATE.simu.CI[,2])
names(ATE.output) <- c("ATE","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
ATE.output.list[[other.treat.origin[char]]] <- ATE.output
}
#base
base.output.all <- cbind(X.eval,E.base.output,base.simu.sd,base.simu.CI[,1],base.simu.CI[,2])
colnames(base.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(base.output.all) <- NULL
pred.output.all.list[[all.treat.origin[base]]] <- base.output.all
link.0.output.all <- cbind(X.eval,link.0.output,link.0.simu.sd,link.0.simu.CI[,1],link.0.simu.CI[,2])
colnames(link.0.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.0.output.all) <- NULL
link.output.all.list[[all.treat.origin[base]]] <- link.0.output.all
}
if(treat.type=='continuous'){
ME.output.all.list <- list()
pred.output.all.list <- list()
link.output.all.list <- list()
diff.output.all.list <- list()
ME.vcov.list <- list()
AME.output <- list()
k <- 1
for(D.ref in D.sample){
gen.general.ME.output <- gen.general.TE(model.coef=model.coef,D.ref=D.ref,diff.values=diff.values)
ME.output <- gen.general.ME.output$ME
E.pred.output <- gen.general.ME.output$E.pred
link.output <- gen.general.ME.output$link
diff.estimate.output <- gen.general.ME.output$diff.estimate
#AME.estimate <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov)
#simu
one.simu.output2 <- function(model.coef,D.ref=D.ref,diff.values=diff.values){
one.simu.ME.output <- gen.general.TE(model.coef=model.coef,D.ref=D.ref,diff.values=diff.values)
output1 <- one.simu.ME.output$ME
names(output1) <- rep("ME",length(output1))
output2 <- one.simu.ME.output$E.pred
names(output2) <- rep("pred",length(output2))
output3 <- one.simu.ME.output$link
names(output3) <- rep("link",length(output3))
output4 <- one.simu.ME.output$diff.estimate
names(output4) <- rep("diff",length(output4))
#output5 <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov)
#output5 <- c(output5)
#names(output5) <- c("AME")
#output.all <- c(output1,output2,output4,output5)
output.all <- c(output1,output2,output3,output4)
return(output.all)
}
all.simu.matrix <- apply(simu.coef, 1, function(x) one.simu.output2(model.coef=x,D.ref=D.ref,diff.values=diff.values))
ME.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="ME",]
pred.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="pred",]
link.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="link",]
diff.simu.matrix <- all.simu.matrix[rownames(all.simu.matrix)=="diff",]
#AME.simu.matrix <- matrix(all.simu.matrix[rownames(all.simu.matrix)=="AME",],nrow=1)
if(length(diff.values)==2){
diff.simu.matrix <- matrix(diff.simu.matrix,nrow=1)
}
if(length(diff.values)==3){
diff.simu.matrix <- as.matrix(diff.simu.matrix)
}
ME.simu.sd <- apply(ME.simu.matrix, 1, sd)
pred.simu.sd <- apply(pred.simu.matrix, 1, sd)
link.simu.sd <- apply(link.simu.matrix, 1, sd)
diff.simu.sd <- apply(diff.simu.matrix, 1, sd)
#AME.simu.sd <- apply(AME.simu.matrix, 1, sd)
ME.simu.CI <- t(apply(ME.simu.matrix, 1, quantile, c(0.025,0.975)))
pred.simu.CI <- t(apply(pred.simu.matrix, 1, quantile, c(0.025,0.975)))
link.simu.CI <- t(apply(link.simu.matrix, 1, quantile, c(0.025,0.975)))
diff.simu.CI <- t(apply(diff.simu.matrix, 1, quantile, c(0.025,0.975)))
#AME.simu.CI <- matrix(t(apply(AME.simu.matrix, 1, quantile, c(0.025,0.975))),nrow=1)
if(length(diff.values)==2){
diff.simu.CI <- matrix(diff.simu.CI,nrow=1)
}
if(length(diff.values)==3){
diff.simu.CI <- as.matrix(diff.simu.CI)
}
ME.simu.vcov <- cov(t(ME.simu.matrix),use="na.or.complete")
rownames(ME.simu.vcov) <- NULL
colnames(ME.simu.vcov) <- NULL
ME.output.all <- cbind(X.eval,ME.output,ME.simu.sd,ME.simu.CI[,1],ME.simu.CI[,2])
colnames(ME.output.all) <- c("X","ME","sd","lower CI(95%)","upper CI(95%)")
rownames(ME.output.all) <- NULL
ME.output.all.list[[label.name[k]]] <- ME.output.all
pred.output.all <- cbind(X.eval,E.pred.output,pred.simu.sd,pred.simu.CI[,1],pred.simu.CI[,2])
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[label.name[k]]] <- pred.output.all
link.output.all <- cbind(X.eval,link.output,link.simu.sd,link.simu.CI[,1],link.simu.CI[,2])
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[label.name[k]]] <- link.output.all
ME.vcov.list[[label.name[k]]] <- ME.simu.vcov
z.value <- diff.estimate.output/diff.simu.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output,diff.simu.sd,z.value,p.value,diff.simu.CI[,1],diff.simu.CI[,2])
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[label.name[k]]] <- diff.output.all
#AME.z.value <- AME.estimate/AME.simu.sd
#AME.p.value <- 2*pnorm(-abs(AME.z.value))
#AME.output <- c(AME.estimate,AME.simu.sd,AME.z.value,AME.p.value,AME.simu.CI[,1],AME.simu.CI[,2])
#names(AME.output) <- c("AME","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
k <- k+1
}
AME.estimate <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov)
all.simu.AME <- apply(simu.coef, 1, function(x) gen.ATE(model.coef=x,data=data,model.vcov=model.vcov))
AME.simu.CI <- quantile(all.simu.AME,c(0.025,0.975))
AME.simu.sd <- sd(all.simu.AME)
AME.z.value <- AME.estimate/AME.simu.sd
AME.p.value <- 2*pnorm(-abs(AME.z.value))
AME.output <- c(AME.estimate,AME.simu.sd,AME.z.value,AME.p.value,AME.simu.CI[1],AME.simu.CI[2])
names(AME.output) <- c("AME","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
}
}
#Part 2. Delta Vartype
#generate TE/ME(x,mean,sd,0.025,0.975); matrix form; by group/D.ref
#generate predict(x,mean,sd,0.025,0.975); matrix form; by group/D.ref
#generate vcov of TE/ME; matrix form; by group/D.ref
#generate diff.values of TE/ME(mean,sd,0.025,0.975); matrix form; by group/D.ref
if(vartype=='delta'){
crit <- abs(qt(.025, df=model.df))
if(treat.type=='discrete'){
TE.output.all.list <- list()
pred.output.all.list <- list()
link.output.all.list <- list()
diff.output.all.list <- list()
TE.vcov.list <- list()
ATE.output.list <- list()
for(char in other.treat){
gen.general.TE.output <- gen.general.TE(model.coef=model.coef,char=char,diff.values=diff.values)
TE.output <- gen.general.TE.output$TE
E.pred.output <- gen.general.TE.output$E.pred
E.base.output <- gen.general.TE.output$E.base
link.1.output <- gen.general.TE.output$link.1
link.0.output <- gen.general.TE.output$link.0
diff.estimate.output <- gen.general.TE.output$diff.estimate
ATE.estimate.list <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov,char=char,delta=TRUE)
ATE.estimate <- ATE.estimate.list$ATE
ATE.estimate.sd <- ATE.estimate.list$sd
#delta
gen.delta.TE.output <- gen.delta.TE(model.coef=model.coef,model.vcov=model.vcov,
char=char,diff.values=diff.values,vcov=TRUE)
TE.output.sd <- gen.delta.TE.output$TE.sd
E.pred.sd <- gen.delta.TE.output$predict.sd
link.1.sd <- gen.delta.TE.output$link.sd
diff.estimate.sd <- gen.delta.TE.output$sd.diff.estimate
TE.delta.vcov <- gen.delta.TE.output$TE.vcov
colnames(TE.delta.vcov) <- NULL
rownames(TE.delta.vcov) <- NULL
#save
TE.vcov.list[[other.treat.origin[char]]] <- TE.delta.vcov
TE.output.all <- cbind(X.eval,TE.output,TE.output.sd,TE.output-crit*TE.output.sd,TE.output+crit*TE.output.sd)
colnames(TE.output.all) <- c("X","ME","sd","lower CI(95%)","upper CI(95%)")
rownames(TE.output.all) <- NULL
TE.output.all.list[[other.treat.origin[char]]] <- TE.output.all
pred.output.all <- cbind(X.eval, E.pred.output, E.pred.sd, E.pred.output-crit*E.pred.sd, E.pred.output+crit*E.pred.sd)
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[other.treat.origin[char]]] <- pred.output.all
link.output.all <- cbind(X.eval, link.1.output, link.1.sd, link.1.output-crit*link.1.sd, link.1.output+crit*link.1.sd)
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[other.treat.origin[char]]] <- link.output.all
z.value <- diff.estimate.output/diff.estimate.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output, diff.estimate.sd,z.value, p.value, diff.estimate.output-crit*diff.estimate.sd, diff.estimate.output+crit*diff.estimate.sd)
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[other.treat.origin[char]]] <- diff.output.all
ATE.z.value <- ATE.estimate/ATE.estimate.sd
ATE.p.value <- 2*pnorm(-abs(ATE.z.value))
ATE.output <- c(ATE.estimate,ATE.estimate.sd,ATE.z.value,ATE.p.value,ATE.estimate-crit*ATE.estimate.sd,ATE.estimate+crit*ATE.estimate.sd)
names(ATE.output) <- c("ATE","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
ATE.output.list[[other.treat.origin[char]]] <- ATE.output
}
#base
gen.delta.TE.base <- gen.delta.TE(model.coef=model.coef,model.vcov=model.vcov,
char=base,diff.values=diff.values)
E.pred.sd <- gen.delta.TE.base$predict.sd
base.output.all <- cbind(X.eval,E.base.output,E.pred.sd,E.base.output-crit*E.pred.sd,E.base.output+crit*E.pred.sd)
colnames(base.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(base.output.all) <- NULL
pred.output.all.list[[all.treat.origin[base]]] <- base.output.all
link.0.sd <- gen.delta.TE.base$link.sd
link.output.all <- cbind(X.eval, link.0.output, link.0.sd, link.0.output-crit*link.0.sd, link.0.output+crit*link.0.sd)
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[all.treat.origin[base]]] <- link.output.all
}
if(treat.type=='continuous'){
ME.output.all.list <- list()
pred.output.all.list <- list()
link.output.all.list <- list()
diff.output.all.list <- list()
ME.vcov.list <- list()
AME.output <- list()
k <- 1
for(D.ref in D.sample){
gen.general.ME.output <- gen.general.TE(model.coef=model.coef,D.ref=D.ref,diff.values=diff.values)
ME.output <- gen.general.ME.output$ME
E.pred.output <- gen.general.ME.output$E.pred
link.output <- gen.general.ME.output$link
diff.estimate.output <- gen.general.ME.output$diff.estimate
#delta
gen.delta.ME.output <- gen.delta.TE(model.coef=model.coef,model.vcov=model.vcov,
D.ref=D.ref,diff.values=diff.values,vcov=TRUE)
ME.output.sd <- gen.delta.ME.output$ME.sd
E.pred.sd <- gen.delta.ME.output$predict.sd
link.sd <- gen.delta.ME.output$link.sd
diff.estimate.sd <- gen.delta.ME.output$sd.diff.estimate
ME.delta.vcov <- gen.delta.ME.output$ME.vcov
colnames(ME.delta.vcov) <- NULL
rownames(ME.delta.vcov) <- NULL
#save
ME.vcov.list[[label.name[k]]] <- ME.delta.vcov
ME.output.all <- cbind(X.eval,ME.output,ME.output.sd,ME.output-crit*ME.output.sd,ME.output+crit*ME.output.sd)
colnames(ME.output.all) <- c("X","ME","sd","lower CI(95%)","upper CI(95%)")
rownames(ME.output.all) <- NULL
ME.output.all.list[[label.name[k]]] <- ME.output.all
pred.output.all <- cbind(X.eval, E.pred.output, E.pred.sd, E.pred.output-crit*E.pred.sd, E.pred.output+crit*E.pred.sd)
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[label.name[k]]] <- pred.output.all
link.output.all <- cbind(X.eval, link.output, link.sd, link.output-crit*link.sd, link.output+crit*link.sd)
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[label.name[k]]] <- link.output.all
z.value <- diff.estimate.output/diff.estimate.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output, diff.estimate.sd,z.value, p.value, diff.estimate.output-crit*diff.estimate.sd, diff.estimate.output+crit*diff.estimate.sd)
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[label.name[k]]] <- diff.output.all
k <- k+1
}
AME.estimate.list <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov,delta=TRUE)
AME.estimate <- AME.estimate.list$AME
AME.estimate.sd <- AME.estimate.list$sd
AME.z.value <- AME.estimate/AME.estimate.sd
AME.p.value <- 2*pnorm(-abs(AME.z.value))
AME.output <- c(AME.estimate, AME.estimate.sd, AME.z.value, AME.p.value, AME.estimate-crit*AME.estimate.sd, AME.estimate+crit*AME.estimate.sd)
names(AME.output) <- c("AME","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
}
}
#Part 3. Bootstrap
#generate TE/ME(x,mean,sd,0.025,0.975); matrix form; by group/D.ref
#generate predict(x,mean,sd,0.025,0.975); matrix form; by group/D.ref
#generate vcov of TE/ME; matrix form; by group/D.ref
#generate diff.values of TE/ME(mean,sd,0.025,0.975); matrix form; by group/D.ref
if(vartype=='bootstrap'){
if(treat.type=='discrete'){
all.length <- neval*length(other.treat) + #TE
neval*length(all.treat) + #pred
neval*length(all.treat) + #link
length(other.treat)*length(difference.name) + #diff
length(other.treat) #ATE
}
if(treat.type=='continuous'){
all.length <- neval*length(label.name) + #ME
neval*length(label.name) + #pred
neval*length(label.name) + #link
length(label.name)*length(difference.name) + 1 # diff&AME
}
one.boot <- function(){
if (is.null(cl)==TRUE){
smp <- sample(1:n,n,replace=TRUE)
} else{ ## block bootstrap
cluster.boot<-sample(clusters,length(clusters),replace=TRUE)
smp<-unlist(id.list[match(cluster.boot,clusters)])
}
data.boot <- data[smp,]
boot.out <- matrix(NA,nrow=all.length,ncol=0)
## check input...
if(treat.type=='discrete'){
if(length(unique(data.boot[,D]))!=length(unique(data[,D]))){
return(boot.out)
}
}
if(is.null(IV)){
if(use_fe==0){
if(method=='linear'){
suppressWarnings(
model.boot <- glm(formula,data=data.boot,weights=WEIGHTS)
)
}
if(method=='logit'){
suppressWarnings(
model.boot <- glm(formula,data=data.boot,family=binomial(link = 'logit'),weights=WEIGHTS)
)
}
if(method=='probit'){
suppressWarnings(
model.boot <- glm(formula,data=data.boot,family=binomial(link = 'probit'),weights=WEIGHTS)
)
}
if(method=='poisson'){
suppressWarnings(
model.boot <- glm(formula,data=data.boot,family=poisson,weights=WEIGHTS)
)
}
if(method=='nbinom'){
suppressWarnings(
model.boot <- glm.nb(formula,data=data.boot,weights=WEIGHTS)
)
}
## check converge...
if(model.boot$converged==FALSE){
return(boot.out)
}
}
if(use_fe==TRUE){
w.boot <- data.boot[,'WEIGHTS']
model.boot <- felm(formula,data=data.boot,weights=w.boot)
}
}
if(!is.null(IV)){
if(use_fe==0){
suppressWarnings(
model.boot <- ivreg(formula,data=data.boot,weights=WEIGHTS)
)
model.boot$converged <- TRUE
}
if(use_fe==1){
w.boot <- data.boot[,'WEIGHTS']
suppressWarnings(
model.boot <- felm(formula,data=data.boot,weights=w.boot)
)
model.boot$converged <- TRUE
}
}
coef.boot <- coef(model.boot)
if(!is.null(IV)){
if(use_fe==1){
names(coef.boot) <- name.update
}
}
boot.one.round <- c()
if(treat.type=='discrete'){
for(char in other.treat){
gen.general.TE.output <- gen.general.TE(model.coef=coef.boot,char=char,diff.values=diff.values)
TE.output <- gen.general.TE.output$TE
names(TE.output) <- rep(paste0("TE.",char),neval)
E.pred.output <- gen.general.TE.output$E.pred
names(E.pred.output) <- rep(paste0("pred.",char),neval)
E.base.output <- gen.general.TE.output$E.base
link.1.output <- gen.general.TE.output$link.1
names(link.1.output) <- rep(paste0("link.",char),neval)
link.0.output <- gen.general.TE.output$link.0
diff.estimate.output <- c(gen.general.TE.output$diff.estimate)
names(diff.estimate.output) <- rep(paste0("diff.",char),length(difference.name))
ATE.estimate <- c(gen.ATE(data=data.boot,model.coef=coef.boot,model.vcov=NULL,char=char))
names(ATE.estimate) <- paste0("ATE.",char)
boot.one.round <- c(boot.one.round,TE.output,E.pred.output,link.1.output,diff.estimate.output,ATE.estimate)
}
names(E.base.output) <- rep(paste0("pred.",base),neval)
names(link.0.output) <- rep(paste0("link.",base),neval)
boot.one.round <- c(boot.one.round,E.base.output,link.0.output)
}
if(treat.type=='continuous'){
k <- 1
for(D.ref in D.sample){
gen.general.ME.output <- gen.general.TE(model.coef=coef.boot,D.ref=D.ref,diff.values=diff.values)
ME.output <- gen.general.ME.output$ME
names(ME.output) <- rep(paste0("ME.",label.name[k]),neval)
E.pred.output <- gen.general.ME.output$E.pred
names(E.pred.output) <- rep(paste0("pred.",label.name[k]),neval)
link.output <- gen.general.ME.output$link
names(link.output) <- rep(paste0("link.",label.name[k]),neval)
diff.estimate.output <- c(gen.general.ME.output$diff.estimate)
names(diff.estimate.output) <- rep(paste0("diff.",label.name[k]),length(difference.name))
boot.one.round <- c(boot.one.round,ME.output,E.pred.output,link.output,diff.estimate.output)
k <- k + 1
}
AME.estimate <- c(gen.ATE(data=data.boot,model.coef=coef.boot,model.vcov=NULL))
names(AME.estimate) <- c("AME")
boot.one.round <- c(boot.one.round,AME.estimate)
}
boot.out <- cbind(boot.out,boot.one.round)
rownames(boot.out) <- names(boot.one.round)
return(boot.out)
}
if(parallel==TRUE){
requireNamespace("doParallel")
## require(iterators)
maxcores <- detectCores()
cores <- min(maxcores, cores)
pcl<-future::makeClusterPSOCK(cores)
doParallel::registerDoParallel(pcl)
cat("Parallel computing with", cores,"cores...\n")
suppressWarnings(
bootout <- foreach (i=1:nboots, .combine=cbind,
.export=c("one.boot"),.packages=c('lfe','AER'),
.inorder=FALSE) %dopar% {one.boot()}
)
suppressWarnings(stopCluster(pcl))
cat("\r")
}
else{
bootout<-matrix(NA,all.length,0)
for(i in 1:nboots){
tempdata <- one.boot()
if(is.null(tempdata)==FALSE){
bootout<- cbind(bootout,tempdata)
}
if (i%%50==0) cat(i) else cat(".")
}
cat("\r")
}
if(treat.type=='discrete'){
TE.output.all.list <- list()
pred.output.all.list <- list()
link.output.all.list <- list()
diff.output.all.list <- list()
TE.vcov.list <- list()
ATE.output.list <- list()
for(char in other.treat){
gen.general.TE.output <- gen.general.TE(model.coef=model.coef,char=char,diff.values=diff.values)
TE.output <- gen.general.TE.output$TE
E.pred.output <- gen.general.TE.output$E.pred
E.base.output <- gen.general.TE.output$E.base
link.1.output <- gen.general.TE.output$link.1
link.0.output <- gen.general.TE.output$link.0
diff.estimate.output <- gen.general.TE.output$diff.estimate
ATE.estimate <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov,char=char)
TE.boot.matrix <- bootout[rownames(bootout)==paste0("TE.",char),]
pred.boot.matrix <- bootout[rownames(bootout)==paste0("pred.",char),]
base.boot.matrix <- bootout[rownames(bootout)==paste0("pred.",base),]
link.1.boot.matrix <- bootout[rownames(bootout)==paste0("link.",char),]
link.0.boot.matrix <- bootout[rownames(bootout)==paste0("link.",base),]
diff.boot.matrix <- bootout[rownames(bootout)==paste0("diff.",char),]
ATE.boot.matrix <- matrix(bootout[rownames(bootout)==paste0("ATE.",char),],nrow=1)
if(length(diff.values)==2){
diff.boot.matrix <- matrix(diff.boot.matrix,nrow=1)
}
if(length(diff.values)==3){
diff.boot.matrix <- as.matrix(diff.boot.matrix)
}
TE.boot.sd <- apply(TE.boot.matrix, 1, sd ,na.rm=TRUE)
pred.boot.sd <- apply(pred.boot.matrix, 1, sd, na.rm=TRUE)
base.boot.sd <- apply(base.boot.matrix, 1, sd,na.rm=TRUE)
link.1.boot.sd <- apply(link.1.boot.matrix, 1, sd,na.rm=TRUE)
link.0.boot.sd <- apply(link.0.boot.matrix, 1, sd,na.rm=TRUE)
diff.boot.sd <- apply(diff.boot.matrix, 1, sd,na.rm=TRUE)
ATE.boot.sd <- apply(ATE.boot.matrix, 1, sd,na.rm=TRUE)
TE.boot.CI <- t(apply(TE.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
pred.boot.CI <- t(apply(pred.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
base.boot.CI <- t(apply(base.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
link.1.boot.CI <- t(apply(link.1.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
link.0.boot.CI <- t(apply(link.0.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
diff.boot.CI <- t(apply(diff.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
ATE.boot.CI <- matrix(t(apply(ATE.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE)),nrow=1)
if(length(diff.values)==2){
diff.boot.CI <- matrix(diff.boot.CI,nrow=1)
}
if(length(diff.values)==3){
diff.boot.CI <- as.matrix(diff.boot.CI)
}
TE.boot.vcov <- cov(t(TE.boot.matrix),use="na.or.complete")
rownames(TE.boot.vcov) <- NULL
colnames(TE.boot.vcov) <- NULL
TE.output.all <- cbind(X.eval,TE.output,TE.boot.sd,TE.boot.CI[,1],TE.boot.CI[,2])
colnames(TE.output.all) <- c("X","TE","sd","lower CI(95%)","upper CI(95%)")
rownames(TE.output.all) <- NULL
TE.output.all.list[[other.treat.origin[char]]] <- TE.output.all
pred.output.all <- cbind(X.eval,E.pred.output,pred.boot.sd,pred.boot.CI[,1],pred.boot.CI[,2])
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[other.treat.origin[char]]] <- pred.output.all
link.output.all <- cbind(X.eval,link.1.output,link.1.boot.sd,link.1.boot.CI[,1],link.1.boot.CI[,2])
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[other.treat.origin[char]]] <- link.output.all
TE.vcov.list[[other.treat.origin[char]]] <- TE.boot.vcov
z.value <- diff.estimate.output/diff.boot.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output,diff.boot.sd,
z.value,p.value,diff.boot.CI[,1],diff.boot.CI[,2])
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[other.treat.origin[char]]] <- diff.output.all
ATE.z.value <- ATE.estimate/ATE.boot.sd
ATE.p.value <- 2*pnorm(-abs(ATE.z.value))
ATE.output <- c(ATE.estimate,ATE.boot.sd,ATE.z.value,ATE.p.value,ATE.boot.CI[,1],ATE.boot.CI[,2])
names(ATE.output) <- c("ATE","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
ATE.output.list[[other.treat.origin[char]]] <- ATE.output
}
#base
base.output.all <- cbind(X.eval,E.base.output,base.boot.sd,base.boot.CI[,1],base.boot.CI[,2])
colnames(base.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(base.output.all) <- NULL
pred.output.all.list[[all.treat.origin[base]]] <- base.output.all
link.0.output.all <- cbind(X.eval,link.0.output,link.0.boot.sd,link.0.boot.CI[,1],link.0.boot.CI[,2])
colnames(link.0.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.0.output.all) <- NULL
link.output.all.list[[all.treat.origin[base]]] <- link.0.output.all
}
if(treat.type=='continuous'){
ME.output.all.list <- list()
pred.output.all.list <- list()
link.output.all.list <- list()
diff.output.all.list <- list()
ME.vcov.list <- list()
AME.output <- list()
k <- 1
for(D.ref in D.sample){
label <- label.name[k]
gen.general.ME.output <- gen.general.TE(model.coef=model.coef,D.ref=D.ref,diff.values=diff.values)
ME.output <- gen.general.ME.output$ME
E.pred.output <- gen.general.ME.output$E.pred
link.output <- gen.general.ME.output$link
diff.estimate.output <- gen.general.ME.output$diff.estimate
ME.boot.matrix <- bootout[rownames(bootout)==paste0("ME.",label),]
pred.boot.matrix <- bootout[rownames(bootout)==paste0("pred.",label),]
link.boot.matrix <- bootout[rownames(bootout)==paste0("link.",label),]
diff.boot.matrix <- bootout[rownames(bootout)==paste0("diff.",label),]
if(length(diff.values)==2){
diff.boot.matrix <- matrix(diff.boot.matrix,nrow=1)
}
if(length(diff.values)==3){
diff.boot.matrix <- as.matrix(diff.boot.matrix)
}
ME.boot.vcov <- cov(t(ME.boot.matrix),use="na.or.complete")
rownames(ME.boot.vcov) <- NULL
colnames(ME.boot.vcov) <- NULL
ME.boot.sd <- apply(ME.boot.matrix, 1, sd,na.rm=TRUE)
pred.boot.sd <- apply(pred.boot.matrix, 1, sd,na.rm=TRUE)
link.boot.sd <- apply(link.boot.matrix, 1, sd,na.rm=TRUE)
diff.boot.sd <- apply(diff.boot.matrix, 1, sd,na.rm=TRUE)
ME.boot.CI <- t(apply(ME.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
pred.boot.CI <- t(apply(pred.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
link.boot.CI <- t(apply(link.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
diff.boot.CI <- t(apply(diff.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
ME.output.all <- cbind(X.eval,ME.output,ME.boot.sd,ME.boot.CI[,1],ME.boot.CI[,2])
colnames(ME.output.all) <- c("X","ME","sd","lower CI(95%)","upper CI(95%)")
rownames(ME.output.all) <- NULL
ME.output.all.list[[label]] <- ME.output.all
pred.output.all <- cbind(X.eval,E.pred.output,pred.boot.sd,pred.boot.CI[,1],pred.boot.CI[,2])
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[label]] <- pred.output.all
link.output.all <- cbind(X.eval,link.output,link.boot.sd,link.boot.CI[,1],link.boot.CI[,2])
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[label]] <- link.output.all
ME.vcov.list[[label]] <- ME.boot.vcov
z.value <- diff.estimate.output/diff.boot.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output,diff.boot.sd,
z.value,p.value,diff.boot.CI[,1],diff.boot.CI[,2])
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[label]] <- diff.output.all
k <- k + 1
}
AME.estimate <- gen.ATE(data=data,model.coef=model.coef,model.vcov=model.vcov)
AME.boot.matrix <- matrix(bootout[rownames(bootout)=="AME",],nrow=1)
AME.boot.sd <- apply(AME.boot.matrix, 1, sd)
AME.boot.CI <- matrix(t(apply(AME.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE)),nrow=1)
AME.z.value <- AME.estimate/AME.boot.sd
AME.p.value <- 2*pnorm(-abs(AME.z.value))
AME.output <- c(AME.estimate,AME.boot.sd,AME.z.value,AME.p.value,AME.boot.CI[,1],AME.boot.CI[,2])
names(AME.output) <- c("AME","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
}
}
if(TRUE){ # density or histogram
if (treat.type=='discrete'){ ## discrete D
# density
if(is.null(weights)==TRUE){
de <- density(data[,X])
}else {
suppressWarnings(de <- density(data[,X],weights=data[,'WEIGHTS']))
}
treat_den <- list()
for (char in all.treat){
de.name <- paste0("den.",char)
if (is.null(weights)==TRUE){
de.tr <- density(data[data[,D]==char,X])
}
else {
suppressWarnings(de.tr <- density(data[data[,D]==char,X],
weights=data[data[,D]==char,'WEIGHTS']))
}
treat_den[[all.treat.origin[char]]] <- de.tr
}
# histogram
if (is.null(weights)==TRUE) {
hist.out<-hist(data[,X],breaks=80,plot=FALSE)
} else {
suppressWarnings(hist.out<-hist(data[,X],data[,'WEIGHTS'],
breaks=80,plot=FALSE))
}
n.hist<-length(hist.out$mids)
# count the number of treated
treat.hist <- list()
for (char in all.treat) {
count1<-rep(0,n.hist)
treat_index<-which(data[,D]==char)
for (i in 1:n.hist) {
count1[i]<-sum(data[treat_index,X]>=hist.out$breaks[i] &
data[treat_index,X]<hist.out$breaks[(i+1)])
}
count1[n.hist]<-sum(data[treat_index,X]>=hist.out$breaks[n.hist] &
data[treat_index,X]<=hist.out$breaks[n.hist+1])
treat.hist[[all.treat.origin[char]]] <- count1
}
}
if (treat.type=='continuous'){ ## continuous D
if (is.null(weights)==TRUE){
de <- density(data[,X])
} else {
suppressWarnings(de <- density(data[,X],weights=data[,'WEIGHTS']))
}
if (is.null(weights)==TRUE){
hist.out<-hist(data[,X],breaks=80,plot=FALSE)
} else{
suppressWarnings(hist.out<-hist(data[,X],data[,'WEIGHTS'],
breaks=80,plot=FALSE))
}
de.co <- de.tr <- NULL
}
}
## L kurtosis
requireNamespace("Lmoments")
Lkurtosis <- Lmoments(data[,X],returnobject=TRUE)$ratios[4]
tests <- list(
treat.type = treat.type,
X.Lkurtosis = sprintf(Lkurtosis,fmt = '%#.3f')
)
## Output
if(treat.type=='discrete'){
for(char in other.treat.origin){
new.diff.est <- as.data.frame(diff.output.all.list[[char]])
for(i in 1:dim(new.diff.est)[1]){
new.diff.est[i,] <- sprintf(new.diff.est[i,],fmt = '%#.3f')
}
diff.output.all.list[[char]] <- new.diff.est
outss <- data.frame(lapply(ATE.output.list[[char]], function(x) t(data.frame(x))))
colnames(outss) <- c("ATE","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(outss) <- c("Average Treatment Effect")
outss[1,] <- sprintf(outss[1,],fmt = '%#.3f')
ATE.output.list[[char]] <- outss
}
final.output <- list(diff.info=diff.info,
treat.info=treat.info,
est.lin=TE.output.all.list,
pred.lin=pred.output.all.list,
link.lin=link.output.all.list,
diff.estimate=diff.output.all.list,
vcov.matrix=TE.vcov.list,
Avg.estimate=ATE.output.list,
Xlabel = Xlabel,
Dlabel = Dlabel,
Ylabel = Ylabel,
de = de,
de.tr = treat_den, # density
hist.out = hist.out,
count.tr = treat.hist,
tests = tests,
estimator = "linear",
model.linear = model,
use.fe = use_fe
)
}
if(treat.type=='continuous'){
for(label in label.name){
new.diff.est <- as.data.frame(diff.output.all.list[[label]])
for(i in 1:dim(new.diff.est)[1]){
new.diff.est[i,] <- sprintf(new.diff.est[i,],fmt = '%#.3f')
}
diff.output.all.list[[label]] <- new.diff.est
}
outss <- data.frame(lapply(AME.output, function(x) t(data.frame(x))))
colnames(outss) <- c("AME","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(outss) <- c("Average Marginal Effect")
outss[1,] <- sprintf(outss[1,],fmt = '%#.3f')
AME.output <- outss
final.output <- list(diff.info=diff.info,
treat.info=treat.info,
est.lin=ME.output.all.list,
pred.lin=pred.output.all.list,
link.lin=link.output.all.list,
diff.estimate=diff.output.all.list,
vcov.matrix=ME.vcov.list,
Avg.estimate=AME.output,
Xlabel = Xlabel,
Dlabel = Dlabel,
Ylabel = Ylabel,
de = de, # density
de.tr = de.tr,
hist.out = hist.out,
count.tr = NULL,
tests = tests,
estimator = "linear",
model.linear = model,
use.fe = use_fe
)
}
#Plot
if(figure==TRUE){
class(final.output) <- "interflex"
figure.output <- plot.interflex( x=final.output,
order = order,
subtitles = subtitles,
show.subtitles = show.subtitles,
CI = CI,
diff.values = diff.values.plot,
Xdistr = Xdistr,
main = main,
Ylabel = Ylabel,
Dlabel = Dlabel,
Xlabel = Xlabel,
xlab = xlab,
ylab = ylab,
xlim = xlim,
ylim = ylim,
theme.bw = theme.bw,
show.grid = show.grid,
cex.main = cex.main,
cex.sub = cex.sub,
cex.lab = cex.lab,
cex.axis = cex.axis,
#bin.labs = bin.labs, # bin labels
interval = interval, # interval in replicated papers
file = file,
ncols = ncols,
#pool plot
pool = pool,
legend.title = legend.title,
color = color,
show.all = show.all,
scale = scale,
height = height,
width = width
)
final.output <- c(final.output,list(figure=figure.output))
}
class(final.output) <- "interflex"
return(final.output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.