interflex.kernel <- function(data,
Y, # outcome
D, # treatment indicator
X, # moderator
treat.info,
diff.info,
bw = NULL,
kfold = 10,
grid = 30,
metric = "MSE",
Z = NULL, # covariates
FE = NULL, # fixed effects
IV = NULL, # instrumental variables
weights = NULL, # weighting variable
full.moderate = FALSE, # fully moderated model
#Z.X = NULL, # fully moderated terms
neval = 50,
X.eval = NULL,
method = "linear", ## "probit"; "logit"; "poisson"; "nbinom"
CI = TRUE,
nboots = 200,
parallel = TRUE,
cores = 4,
cl = NULL, # variable to be clustered on
#predict = FALSE,
Z.ref = NULL, # same length as Z, set the value of Z when estimating marginal effects/predicted value
figure = TRUE,
order = NULL,
subtitles = NULL,
show.subtitles = NULL,
Xdistr = "histogram", # c("density","histogram","none")
main = NULL,
Ylabel = NULL,
Dlabel = NULL,
Xlabel = NULL,
xlab = NULL,
ylab = NULL,
xlim = NULL,
ylim = NULL,
theme.bw = FALSE,
show.grid = TRUE,
cex.main = NULL,
cex.sub = NULL,
cex.lab = NULL,
cex.axis = NULL,
interval = NULL,
file = NULL,
ncols = NULL,
pool = FALSE,
color = NULL,
legend.title = NULL,
show.all = FALSE,
scale = 1.1,
height = 7,
width = 10
){
WEIGHTS <- NULL
n <- dim(data)[1]
binary <- FALSE
if(length(unique(data[,Y]))==2){
if((0 %in% unique(data[,Y])) & (1 %in% unique(data[,Y]))){
binary <- TRUE
}
}
diff.values.plot <- diff.info[["diff.values.plot"]]
diff.values <- diff.info[["diff.values"]]
difference.name <- diff.info[["difference.name"]]
treat.type <- treat.info[["treat.type"]]
if(treat.type=='discrete'){
other.treat <- treat.info[["other.treat"]]
other.treat.origin <- names(other.treat)
names(other.treat.origin) <- other.treat
all.treat <- treat.info[["all.treat"]]
all.treat.origin <- names(all.treat)
names(all.treat.origin) <- all.treat
base <- treat.info[["base"]]
}
if(treat.type=='continuous'){
D.sample <- treat.info[["D.sample"]]
label.name <- names(D.sample)
#names(label.name) <- D.sample
}
ncols <- treat.info[["ncols"]]
if(is.null(cl)==FALSE){ ## find clusters
clusters<-unique(data[,cl])
id.list<-split(1:n,data[,cl])
}
#X.eval
X.eval0 <- X.eval
X.eval <- seq(min(data[,X]), max(data[,X]), length.out = neval)
X.eval <- sort(c(X.eval,X.eval0))
neval <- length(X.eval)
#X.eval <- seq(min(data[,X]), max(data[,X]), length.out = neval)
if(treat.type=="discrete"){
for(char in other.treat){
data[,paste0("D",".",char)] <- as.numeric(data[,D]==char)
}
}
if(is.null(weights)==TRUE){
w <- rep(1,n)
}
else{
w <- data[,weights]
}
data[,'WEIGHTS'] <- w
#Xdensity
suppressWarnings(Xdensity <- density(data[,X],weights=w))
# fixed effects
if(is.null(FE)==TRUE){
use_fe <- 0
}else{
use_fe <- 1
}
##Function A: weighted glm
#generate estimate of coef at a specific value(x) of the moderator
#input:x, data, bw, weights, Xdensity
#weights: vector
wls.iv.fe <- function(x,data,bw,weights,Xdensity) {
data.touse <- data
xx <- data.touse[,'delta.x'] <- data.touse[,X]-x
use.variable <- c(Y)
n.coef <- 1
# iv_fastplm(Y=outcome,X=endogenous variables,Z=included IV,IV=excluded IV,FE=fixed effects,weight=weight)
Y_matrix <- as.matrix(data.touse[,Y])
# construct endogenous variables
endogenous.var <- c()
if(treat.type=='discrete'){
for(char in other.treat){
data.touse[,paste0("D.delta.x",".",char)] <- data.touse[,paste0("D",".",char)]*data.touse[,'delta.x']
use.variable <- c(use.variable,c(paste0("D",".",char),paste0("D.delta.x",".",char)))
endogenous.var <- c(endogenous.var,c(paste0("D",".",char),paste0("D.delta.x",".",char)))
n.coef <- n.coef+2
}
}
if(treat.type=='continuous'){
data.touse[,'D.delta.x'] <- data.touse[,D]*data.touse[,'delta.x']
use.variable <- c(use.variable,c(D,"D.delta.x"))
endogenous.var <- c(endogenous.var,c(D,"D.delta.x"))
n.coef <- n.coef+2
}
X_matrix <- as.matrix(data.touse[,endogenous.var])
#construct included iv
use.variable <- c(use.variable,'delta.x')
included.iv <- c('delta.x')
if(is.null(Z)==FALSE){
use.variable <- c(use.variable,Z)
included.iv <- c(included.iv,Z)
n.coef <- n.coef+length(Z)
if(full.moderate==TRUE){
for(a in Z){
data.touse[,paste0(a,'.delta.x')] <- data.touse[,a]*data.touse[,'delta.x']
use.variable <- c(use.variable,paste0(a,'.delta.x'))
included.iv <- c(included.iv,paste0(a,'.delta.x'))
n.coef <- n.coef+1
}
}
Z_matrix <- as.matrix(data.touse[,included.iv])
}else{
Z_matrix <- as.matrix(data.touse[,included.iv])
}
#construct excluded iv
excluded.iv <- c()
for(sub.iv in IV){
data.touse[,paste0("delta.x.",sub.iv)] <- data.touse[,sub.iv]*data.touse[,'delta.x']
excluded.iv <- c(excluded.iv,sub.iv,paste0("delta.x.",sub.iv))
}
IV_matrix <- as.matrix(data.touse[,excluded.iv])
#construct FE matrix
FE_matrix <- as.matrix(data.touse[,FE],drop=FALSE)
#construct weight
temp_density <- Xdensity$y[which.min(abs(Xdensity$x-x))]
density.mean <- exp(mean(log(Xdensity$y)))
bw.adapt <- bw*(1+log(max(Xdensity$y)/temp_density))
#bw.adapt <- bw*sqrt(density.mean/temp_density)
w <- dnorm(data.touse[,'delta.x']/bw.adapt)*weights
data.touse[,'WEIGHTS'] <- w
if(max(data.touse[,'WEIGHTS'])==0){
result <- rep(NA,1+n.coef)
return(result)
}
invisible(
capture.output(
fastplm_res <- tryCatch(iv_fastplm(Y = Y_matrix,
X = X_matrix,
Z = Z_matrix,
IV = IV_matrix,
FE = FE_matrix,
weight = w),error = function(e){return("error")}),type='message'
)
)
if(typeof(fastplm_res)!='list'){
result <- rep(NA,1+n.coef)
names(result) <- c("x0","(Intercept)",use.variable[2:length(use.variable)])
return(result)
}
result <- c(x,fastplm_res$mu,c(fastplm_res$coefficients))
result[which(is.nan(result))] <- 0
names(result) <- c("x0","(Intercept)",use.variable[2:length(use.variable)])
return(result)
}
wls.fe <- function(x,data,bw,weights,Xdensity){
data.touse <- data
xx <- data.touse[,'delta.x'] <- data.touse[,X]-x
use.variable <- c(Y,'delta.x')
n.coef <- 1
if(treat.type=='discrete'){
for(char in other.treat){
data.touse[,paste0("D.delta.x",".",char)] <- data.touse[,paste0("D",".",char)]*data.touse[,'delta.x']
use.variable <- c(use.variable,c(paste0("D",".",char),paste0("D.delta.x",".",char)))
n.coef <- n.coef+2
}
}
if(treat.type=='continuous'){
data.touse[,'D.delta.x'] <- data.touse[,D]*data.touse[,'delta.x']
use.variable <- c(use.variable,c(D,"D.delta.x"))
n.coef <- n.coef+2
}
if(is.null(Z)==FALSE){
use.variable <- c(use.variable,Z)
n.coef <- n.coef+length(Z)
if(full.moderate==TRUE){
for(a in Z){
data.touse[,paste0(a,'.delta.x')] <- data.touse[,a]*data.touse[,'delta.x']
use.variable <- c(use.variable,paste0(a,'.delta.x'))
n.coef <- n.coef+1
}
}
}
temp_density <- Xdensity$y[which.min(abs(Xdensity$x-x))]
density.mean <- exp(mean(log(Xdensity$y)))
bw.adapt <- bw*(1+log(max(Xdensity$y)/temp_density))
#bw.adapt <- bw*sqrt(density.mean/temp_density)
w <- dnorm(data.touse[,'delta.x']/bw.adapt)*weights
data.touse[,'WEIGHTS'] <- w
if(max(data.touse[,'WEIGHTS'])==0){
result <- rep(NA,1+n.coef)
return(result)
}
dat1 <- as.matrix(data.touse[,use.variable])
dat.FE <- as.matrix(data.touse[,FE],drop=FALSE)
invisible(
capture.output(
fastplm_res <- tryCatch(fastplm(data = dat1, FE = dat.FE,
weight = w),error = function(e){return("error")}),type='message'
)
)
if(typeof(fastplm_res)!='list'){
result <- rep(NA,1+n.coef)
names(result) <- c("x0","(Intercept)",use.variable[2:length(use.variable)])
return(result)
}
result <- c(x,fastplm_res$mu,c(fastplm_res$coefficients))
result[which(is.nan(result))] <- 0
names(result) <- c("x0","(Intercept)",use.variable[2:length(use.variable)])
return(result)
}
wls.iv <- function(x,data,bw,weights,Xdensity){
data.touse<-data
data.touse[,'delta.x'] <- data.touse[,X]-x
formula <- paste0(Y,"~delta.x")
all.var.name <- c('(Intercept)','delta.x')
n.coef <- 2
if(treat.type=='discrete'){
for(char in other.treat){
data.touse[,paste0("D.delta.x",".",char)] <- data.touse[,paste0("D",".",char)]*data.touse[,'delta.x']
formula <- paste0(formula,"+",paste0("D",".",char),"+",paste0("D.delta.x",".",char))
all.var.name <- c(all.var.name,paste0("D",".",char),paste0("D.delta.x",".",char))
n.coef <- n.coef+2
}
}
if(treat.type=='continuous'){
data.touse[,'D.delta.x'] <- data.touse[,D]*data.touse[,'delta.x']
formula <- paste0(formula,"+",D,"+","D.delta.x")
all.var.name <- c(all.var.name,D,"D.delta.x")
n.coef <- n.coef+2
}
if(is.null(Z)==FALSE){
formula <- paste0(formula,"+",paste0(Z,collapse="+"))
all.var.name <- c(all.var.name,Z)
n.coef <- n.coef+length(Z)
if(full.moderate==TRUE){
for(a in Z){
data.touse[,paste0(a,'.delta.x')] <- data.touse[,a]*data.touse[,'delta.x']
formula <- paste0(formula,"+",paste0(a,'.delta.x'))
all.var.name <- c(all.var.name,paste0(a,'.delta.x'))
n.coef <- n.coef+1
}
}
}
formula.iv <- 'delta.x'
for(sub.iv in IV){
data.touse[,paste0("delta.x.",sub.iv)] <- data.touse[,sub.iv]*data.touse[,'delta.x']
formula.iv <- paste0(formula.iv,"+",sub.iv)
formula.iv <- paste0(formula.iv,"+",paste0("delta.x.",sub.iv))
}
#if(is.null(Z)==FALSE){
# formula.iv <- paste0(formula.iv,"+",paste0(Z,collapse="+"))
# if(full.moderate==TRUE){
# formula.iv <- paste0(formula.iv,"+",paste0(Z.X,collapse="+"))
# }
#}
if(is.null(Z)==FALSE){
formula.iv <- paste0(formula.iv,"+",paste0(Z,collapse="+"))
if(full.moderate==TRUE){
for(a in Z){
formula.iv <- paste0(formula.iv,"+",paste0(a,'.delta.x'))
}
}
}
formula <- paste0(formula,"|",formula.iv)
#print(formula)
formula <- as.formula(formula)
temp_density <- Xdensity$y[which.min(abs(Xdensity$x-x))]
density.mean <- exp(mean(log(Xdensity$y)))
bw.adapt <- bw*(1+log(max(Xdensity$y)/temp_density))
#bw.adapt <- bw*sqrt(density.mean/temp_density)
w <- dnorm(data.touse[,'delta.x']/bw.adapt)*weights
data.touse[,'WEIGHTS'] <- w
if(max(data.touse[,'WEIGHTS'])==0){
result <- rep(NA,1+n.coef)
names(result) <- c("x0",all.var.name)
return(result)
}
suppressWarnings( #correct
iv.reg <- tryCatch(
ivreg(formula,data=data.touse, weights = WEIGHTS),
error = function(e){return("error")}
)
)
if(typeof(iv.reg)!='list'){
result <- rep(NA,1+n.coef)
names(result) <- c("x0",all.var.name)
return(result)
}
result <- c(x, iv.reg$coef)
names(result) <- c("x0",names(iv.reg$coef))
result[which(is.na(result))] <- 0
return(result)
}
wls.nofe<-function(x, data, bw, weights, Xdensity){
data.touse<-data
data.touse[,'delta.x'] <- data.touse[,X]-x
formula <- paste0(Y,"~delta.x")
all.var.name <- c('(Intercept)','delta.x')
n.coef <- 2
if(treat.type=='discrete'){
for(char in other.treat){
data.touse[,paste0("D.delta.x",".",char)] <- data.touse[,paste0("D",".",char)]*data.touse[,'delta.x']
formula <- paste0(formula,"+",paste0("D",".",char),"+",paste0("D.delta.x",".",char))
all.var.name <- c(all.var.name,paste0("D",".",char),paste0("D.delta.x",".",char))
n.coef <- n.coef+2
}
}
if(treat.type=='continuous'){
data.touse[,'D.delta.x'] <- data.touse[,D]*data.touse[,'delta.x']
formula <- paste0(formula,"+",D,"+","D.delta.x")
all.var.name <- c(all.var.name,D,"D.delta.x")
n.coef <- n.coef+2
}
if(is.null(Z)==FALSE){
formula <- paste0(formula,"+",paste0(Z,collapse="+"))
all.var.name <- c(all.var.name,Z)
n.coef <- n.coef+length(Z)
if(full.moderate==TRUE){
for(a in Z){
data.touse[,paste0(a,'.delta.x')] <- data.touse[,a]*data.touse[,'delta.x']
formula <- paste0(formula,"+",paste0(a,'.delta.x'))
all.var.name <- c(all.var.name,paste0(a,'.delta.x'))
n.coef <- n.coef+1
}
}
}
formula <- as.formula(formula)
temp_density <- Xdensity$y[which.min(abs(Xdensity$x-x))]
density.mean <- exp(mean(log(Xdensity$y)))
bw.adapt <- bw*(1+log(max(Xdensity$y)/temp_density))
#bw.adapt <- bw*sqrt(density.mean/temp_density)
w <- dnorm(data.touse[,'delta.x']/bw.adapt)*weights
data.touse[,'WEIGHTS'] <- w
if(max(data.touse[,'WEIGHTS'])==0){
result <- rep(NA,1+n.coef)
names(result) <- c("x0",all.var.name)
return(result)
}
if(method=='linear'){
suppressWarnings( #correct
glm.reg <- tryCatch(
glm(formula,data=data.touse, weights = WEIGHTS),
error = function(e){return("error")}
)
)
}
if(method=='logit'){
suppressWarnings( #correct
glm.reg <- tryCatch(
glm(formula,data=data.touse, weights = WEIGHTS,family = binomial(link = "logit")),
error = function(e){return("error")}
)
)
}
if(method=='probit'){
suppressWarnings( #correct
glm.reg <- tryCatch(
glm(formula,data=data.touse, weights = WEIGHTS,family = binomial(link = "probit")),
error = function(e){return("error")}
)
)
}
if(method=='poisson'){
suppressWarnings( #correct
glm.reg <- tryCatch(
glm(formula,data=data.touse, weights = WEIGHTS,family = poisson),
error = function(e){return("error")}
)
)
}
if(method=='nbinom'){
suppressWarnings( #correct
glm.reg <- tryCatch(
glm.nb(formula,data=data.touse, weights = WEIGHTS,control=glm.control(epsilon = 1e-5, maxit=200)),
error = function(e){return("error")}
)
)
}
if(typeof(glm.reg)!='list'){
result <- rep(NA,1+n.coef)
names(result) <- c("x0",all.var.name)
return(result)
}
if(glm.reg$converged==FALSE){
result <- rep(NA,length(c(x, glm.reg$coef)))
names(result) <- c("x0",names(glm.reg$coef))
return(result)
}
if(glm.reg$converged==TRUE){
result <- c(x, glm.reg$coef)
names(result) <- c("x0",names(glm.reg$coef))
result[which(is.na(result))] <- 0
return(result)
}
}
wls <- function(x, data, bw, weights, Xdensity){
if(use_fe==TRUE & is.null(IV)){
return(wls.fe(x=x,data=data,bw=bw,weights=weights,Xdensity=Xdensity))
}
if(use_fe==FALSE & is.null(IV)){
return(wls.nofe(x=x,data=data,bw=bw,weights=weights,Xdensity=Xdensity))
}
if(use_fe==FALSE & !is.null(IV)){
return(wls.iv(x=x,data=data,bw=bw,weights=weights,Xdensity=Xdensity))
}
if(use_fe==TRUE & !is.null(IV)){
return(wls.iv.fe(x=x,data=data,bw=bw,weights=weights,Xdensity=Xdensity))
}
}
#Function # Cross-Validation
if(is.null(bw)==TRUE){
CV <- TRUE
cat("Cross-validating bandwidth ... \n")
if (length(grid) == 1){
rangeX <- max(data[, X]) - min(data[, X])
bw.grid <- exp(seq(log(rangeX/50), log(rangeX), length.out = grid))
}else{
bw.grid <- grid
}
}
else{
CV <- FALSE
}
if(CV==TRUE){
#weights: name of a variable
getError.CV <- function(train, test, bw, neval,
weights_name, Xdensity){
if(is.null(weights_name)==TRUE){
w.touse.cv <- rep(1,dim(train)[1])
}
else{
w.touse.cv <- train[,weights_name]
}
if(use_fe==TRUE){
train_y <- as.matrix(train[,Y])
train_FE <- as.matrix(train[,FE])
invisible(
capture.output(
fastplm_res <- fastplm(data = train_y,FE = train_FE,weight = w.touse.cv,
FEcoefs = 1L),type='message'
)
)
FEvalues <- fastplm_res$FEvalues
FEnumbers <- dim(fastplm_res$FEvalues)[1]
FE_coef <- matrix(FEvalues[,3],nrow = FEnumbers, ncol = 1)
rowname <- c()
fe_index_name <- c()
for(i in 1:FEnumbers){
rowname <- c(rowname,paste0(FE[FEvalues[i,1]+1],'.',FEvalues[i,2]))
fe_index_name <- c(fe_index_name,FE[FEvalues[i,1]+1])
}
rownames(FE_coef) <- rowname
train[,Y] <- fastplm_res$residuals
}
X.eval.cv <- seq(min(train[,X]), max(train[,X]), length.out = neval)
#demean -> use wls without fixed effects#
if(is.null(IV)){
coef.grid.cv <- t(sapply(X.eval.cv,function(x) wls.nofe(x=x,data = train,bw=bw,weights=w.touse.cv,Xdensity=Xdensity)))
}else{
coef.grid.cv <- t(sapply(X.eval.cv,function(x) wls.iv(x=x,data = train,bw=bw,weights=w.touse.cv,Xdensity=Xdensity)))
}
coef.grid.cv <- na.omit(coef.grid.cv)
eff.eval.point <- dim(coef.grid.cv)[1]
X.eval.cv <- coef.grid.cv[,'x0']
if(dim(coef.grid.cv)[1]<=neval/2){
output <- rep(NA,5)
names(output) <- c("Num.Eff.Points","Cross Entropy","AUC","MSE","MAE")
return(output)
}
esCoef.cv<-function(x){ ##obtain the coefficients for x[i]
Xnew.cv<-abs(X.eval.cv-x)
d1<-min(Xnew.cv)
label1<-which.min(Xnew.cv)
Xnew.cv[label1]<-Inf
d2<-min(Xnew.cv) ## distance between x[i] and the second nearest x in training set
label2<-which.min(Xnew.cv)
if(d1==0){
func <- coef.grid.cv[label1,]
}
else if(d2==0){
func <- coef.grid.cv[label2,]
}
else{ ## weighted average
func1 <- coef.grid.cv[label1,]
func2 <- coef.grid.cv[label2,]
func <- (func1 * d2 + func2 * d1)/(d1 + d2)
}
return(func)
}
# Test
## limit test set in the support of the train dataset
test <- test[which(test[,X]>=min(train[,X]) & test[,X]<=max(train[,X])),]
add_FE <- rep(0,dim(test)[1])
if(use_fe==TRUE){
#addictive FE
add_FE <- matrix(0,nrow = dim(test)[1],ncol = length(FE))
colnames(add_FE) <- FE
for(fe in FE){
add_FE[,fe] <- 0
fe_name <- paste0(fe,".",test[,fe])
find_FE_index <- which(fe_name %in% rownames(FE_coef))
not_find_FE_index <- which(!fe_name %in% rownames(FE_coef))
add_FE[find_FE_index,fe] <- FE_coef[fe_name[find_FE_index],]
add_FE[not_find_FE_index,fe] <- mean(FE_coef[which(fe_index_name==fe),])
}
add_FE <- rowSums(add_FE)
add_FE <- add_FE + fastplm_res$mu #intercept
}
if(dim(test)[1]<3){
output <- rep(NA,5)
names(output) <- c("Num.Eff.Points","Cross Entropy","AUC","MSE","MAE")
return(output)
}
Knn<-t(sapply(test[,X],esCoef.cv))
link <- Knn[,'(Intercept)']
if(treat.type=='discrete'){
for(char in other.treat){
link <- link + Knn[,paste0("D.",char)]*as.numeric(test[,D]==char) + Knn[,paste0("D.delta.x",".",char)]*(test[,X]-Knn[,'x0'])*as.numeric(test[,D]==char)
}
link <- link + Knn[,'delta.x']*(test[,X]-Knn[,'x0'])
}
if(treat.type=='continuous'){
link <- link + Knn[,D]*test[,D] + Knn[,'delta.x']*(test[,X]-Knn[,'x0']) + Knn[,'D.delta.x']*(test[,X]-Knn[,'x0'])*test[,D]
}
if(is.null(Z)==FALSE){
for(a in Z){
link <- link + test[,a]*Knn[,a]
if(full.moderate==TRUE){
for(a in Z){
link <- link + Knn[,paste0(a,'.delta.x')]*(test[,X]-Knn[,'x0'])*test[,a]
}
}
}
}
if(method=='logit'){
E.pred <- exp(link)/(1+exp(link))
}
if(method=='probit'){
E.pred <- pnorm(link,0,1)
}
if(method=='linear'){
E.pred <- link
if(use_fe==TRUE){
E.pred <- E.pred + add_FE
}
if(binary==TRUE){
E.pred[which(E.pred>1)] <- 1
E.pred[which(E.pred<0)] <- 0
}
}
if(method=='poisson'|method=='nbinom'){
E.pred <- exp(link)
}
true <- test[which(is.na(E.pred)==FALSE),Y]
E.pred <- na.omit(E.pred)
if(length(unique(true))<=1 | length(E.pred)<3){ #all 0 or all 1 or few observations(auc)
output <- rep(NA,5)
names(output) <- c("Num.Eff.Points","Cross Entropy","AUC","MSE","MAE")
return(output)
}
# cross entropy
if(binary==TRUE){
cross.entropy <- logLoss(true, E.pred)
suppressMessages(
roc.y <- roc(true, E.pred,levels = c(0,1))
)
# auc
auc <- roc.y$auc
}else{
cross.entropy <- NA
auc <- NA
}
# mse L2
mse <- mean((true-E.pred)^2)
# mse L1
mae <- mean(abs(true-E.pred))
output <- c(cross.entropy,auc,mse,mae)
if(method=='poisson'|method=='nbinom'){
mse.link <- mean((log(true+1)-log(E.pred+1))^2)
mae.link <- mean(abs(log(true+1)-log(E.pred+1)))
output <- c(mse.link,mae.link,mse,mae)
}
output <- c(eff.eval.point,output)
names(output) <- c("Num.Eff.Points","Cross Entropy","AUC","MSE","MAE")
#print(output)
return(output)
}
fold <- createFolds(factor(data[,D]), k = kfold, list = FALSE)
#kfold <- min(n,kfold)
#cat("#folds =",kfold)
#cat("\n")
#fold <- c(0:(n-1))%%kfold + 1
#fold <- sample(fold, n, replace = FALSE)
cv.new<-function(bw,neval){
error<-matrix(NA,kfold,5)
for(j in 1:kfold){ # K-fold CV
testid <- which(fold==j)
train <- data[-testid,]
test <- data[testid,]
error[j,] <- getError.CV(train=train, test=test, bw=bw, neval=neval,weights_name='WEIGHTS',Xdensity=Xdensity)
}
colnames(error) <- c("Num.Eff.Points","cross entropy","auc","L2","L1")
for(pp in colnames(error)){
if(any(is.na(error[,pp]))==TRUE & all(is.na(error[,pp]))==FALSE){
if(pp!='auc'){
error[is.na(error[,pp]),pp] <- max(error[,pp],na.rm=T)
}else{
error[is.na(error[,pp]),pp] <- min(error[,pp],na.rm=T)
}
}
}
output <- c(bw,apply(error,2,mean,na.rm=TRUE))
names(output) <- c("Num.Eff.Points","bw","cross entropy","auc","L2","L1")
return(output)
}
## test
#try <- cv.new(0.02789474,neval=neval)
#return(try)
##
##-----------------------------------------------------------------------------------
if(parallel==TRUE){
requireNamespace("doParallel")
## require(iterators)
maxcores <- detectCores()
cores <- min(maxcores, cores)
pcl<-future::makeClusterPSOCK(cores)
doParallel::registerDoParallel(pcl)
cat("Parallel computing with", cores,"cores...\n")
Error<-suppressWarnings(foreach(bw = bw.grid, .combine = rbind,
.packages = c("ModelMetrics","pROC","MASS","AER"),
.inorder = FALSE) %dopar% {cv.new(bw,neval=neval)})
suppressWarnings(stopCluster(pcl))
#return(Error)
}
else{
Error <- matrix(NA, length(bw.grid), 6)
for (i in 1:length(bw.grid)) {
Error[i, ] <- cv.new(bw=bw.grid[i],neval=neval)
cat(".")
}
}
colnames(Error) <- c("bw","Num.Eff.Points","Cross Entropy","AUC","MSE","MAE")
rownames(Error) <- NULL
if(binary==FALSE){
if(method=='poisson'|method=='nbinom'){
colnames(Error) <- c("bw","Num.Eff.Points","MSE.link","MAE.link","MSE","MAE")
}else{
Error <- Error[,c("bw","Num.Eff.Points","MSE","MAE")]
}
if(metric=='MSE'){
bw <- bw.grid[which.min(Error[,'MSE']/Error[,"Num.Eff.Points"])]
}
if(metric=='MAE'){
bw <- bw.grid[which.min(Error[,'MAE']/Error[,"Num.Eff.Points"])]
}
}
if(binary==TRUE){
if(metric=='MSE'){
bw <- bw.grid[which.min(Error[,'MSE']/Error[,"Num.Eff.Points"])]
}
if(metric=='MAE'){
bw <- bw.grid[which.min(Error[,'MAE']/Error[,"Num.Eff.Points"])]
}
if(metric=='Cross Entropy'){
bw <- bw.grid[which.min(Error[,'Cross Entropy']/Error[,"Num.Eff.Points"])]
}
if(metric=='AUC'){
bw <- bw.grid[which.max(Error[,'AUC']*Error[,"Num.Eff.Points"])]
}
}
cat(paste0("Optimal bw=",round(bw,4),".\n"))
}
else{
Error <- NULL
}
#Core Estimation, gen grid points
coef.grid <- t(sapply(X.eval,function(x) wls(x=x,data = data,bw=bw,weights=w,Xdensity=Xdensity)))
coef.grid <- na.omit(coef.grid)
if(dim(coef.grid)[1]<=neval/2){
warning("Inappropriate bandwidth.\n")
}
if(dim(coef.grid)[1]<=3){
stop("Inappropriate bandwidth.")
}
X.eval <- coef.grid[,'x0']
neval <- length(X.eval)
#print(neval)
cat(paste0("Number of evaluation points:",neval,"\n"))
##Function B: estimate TE/ME; E.predict(E.base);
#1, estimate treatment effects/marginal effects given model.coef;
#2, estimate E.pred given treat/D;
#3, input: coef.grid; char(discrete)/D.ref(continuous);
#4, output: marginal effects/treatment effects/E.pred/E.base
gen.kernel.TE <- function(coef.grid,char=NULL,D.ref=NULL){
if(is.null(char)==TRUE){
treat.type='continuous'
}
if(is.null(D.ref)==TRUE){
treat.type=='discrete'
}
neval <- dim(coef.grid)[1]
gen.TE <- function(coef.grid){
if(treat.type=='discrete'){
link.1 <- coef.grid[,'(Intercept)'] + coef.grid[,paste0('D.',char)]
link.0 <- coef.grid[,'(Intercept)'] + 0
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link.1 <- link.1 + target.Z*coef.grid[,a]
link.0 <- link.0 + target.Z*coef.grid[,a]
#if(full.moderate==TRUE){
# link.1 <- link.1 + target.Z*coef.grid[,paste0(a,".X")]*coef.grid[,'x0']
# link.0 <- link.0 + target.Z*coef.grid[,paste0(a,".X")]*coef.grid[,'x0']
#}
}
}
if(method=='linear'){
TE <- link.1-link.0
E.pred <- link.1
E.base <- link.0
}
if(method=='logit'){
E.pred <- E.prob.1 <- exp(link.1)/(1+exp(link.1))
E.base <- E.prob.0 <- exp(link.0)/(1+exp(link.0))
TE <- E.prob.1-E.prob.0
}
if(method=='probit'){
E.pred <- E.prob.1 <- pnorm(link.1,0,1)
E.base <- E.prob.0 <- pnorm(link.0,0,1)
TE <- E.prob.1-E.prob.0
}
if(method=='poisson' | method=="nbinom"){
E.pred <- E.y.1 <- exp(link.1)
E.base <- E.y.0 <- exp(link.0)
TE <- E.y.1-E.y.0
}
names(TE) <- rep(paste0("TE.",char),neval)
names(E.pred) <- rep(paste0("Predict.",char),neval)
names(E.base) <- rep(paste0("Predict.",base),neval)
gen.TE.output <- list(TE=TE,E.pred=E.pred,E.base=E.base,link.1=link.1,link.0=link.0)
}
if(treat.type=='continuous'){
link <- coef.grid[,"(Intercept)"] + D.ref*coef.grid[,D]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link <- link + target.Z*coef.grid[,a]
#if(full.moderate==TRUE){
# link <- link + target.Z*coef.grid[,paste0(a,".X")]*coef.grid[,'x0']
#}
}
}
if(method=='logit'){
ME <- exp(link)/(1+exp(link))^2*coef.grid[,D]
E.pred <- exp(link)/(1+exp(link))
}
if(method=='probit'){
ME <- coef.grid[,D]*dnorm(link)
E.pred <- pnorm(link,0,1)
}
if(method=='linear'){
ME <- coef.grid[,D]
E.pred <- link
}
if(method=='poisson'|method=='nbinom'){
ME <- exp(link)*coef.grid[,D]
E.pred <- exp(link)
}
names(ME) <- rep(paste0("ME.",names(D.sample)[D.sample == D.ref]),neval)
names(E.pred) <- rep(paste0("Predict.",names(D.sample)[D.sample == D.ref]),neval)
gen.TE.output <- list(ME=ME,E.pred=E.pred,link=link)
}
return(gen.TE.output)
}
gen.TE.output <- gen.TE(coef.grid=coef.grid)
if(treat.type=='discrete'){
return(list(TE=gen.TE.output$TE,
E.pred=gen.TE.output$E.pred,
E.base=gen.TE.output$E.base,
link.1=gen.TE.output$link.1,
link.0=gen.TE.output$link.0))
}
if(treat.type=='continuous'){
return(list(ME=gen.TE.output$ME,
E.pred=gen.TE.output$E.pred,
link=gen.TE.output$link))
}
}
##Function C: estimate difference of TE/ME at different values of the moderator
#1, input: coef.grid; char/D.ref; diff.values
#2, output: difference of TE/ME at different values of the moderator
gen.kernel.difference <- function(coef.grid,diff.values,char=NULL,D.ref=NULL){
if(is.null(char)==TRUE){
treat.type='continuous'
est.ME <- function(x){
Xnew<-abs(X.eval-x)
d1<-min(Xnew)
label1<-which.min(Xnew)
Xnew[label1]<-Inf
d2<-min(Xnew)
label2<-which.min(Xnew)
if(d1==0){
link <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,D]*D.ref
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link <- link + target.Z*coef.grid[label1,a]
#if(full.moderate==TRUE){
# link <- link + target.Z*coef.grid[label1,paste0(a,".X")]*x
#}
}
}
coef.grid.D <- coef.grid[label1,D]
}
else if(d2==0){
link <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,D]*D.ref
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link <- link + target.Z*coef.grid[label2,a]
#if(full.moderate==TRUE){
# link <- link + target.Z*coef.grid[label2,paste0(a,".X")]*x
#}
}
}
coef.grid.D <- coef.grid[label2,D]
}
else{ ## weighted average
link.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,D]*D.ref +
coef.grid[label1,'D.delta.x']*D.ref*(x-X.eval[label1]) +
coef.grid[label1,"delta.x"]*(x-X.eval[label1])
link.2 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,D]*D.ref +
coef.grid[label2,'D.delta.x']*D.ref*(x-X.eval[label2]) +
coef.grid[label2,"delta.x"]*(x-X.eval[label2])
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link.1 <- link.1 + target.Z*coef.grid[label1,a]
link.2 <- link.2 + target.Z*coef.grid[label2,a]
if(full.moderate==TRUE){
link.1 <- link.1 + target.Z*coef.grid[label1,paste0(a,'.delta.x')]*(x-X.eval[label1])
link.2 <- link.2 + target.Z*coef.grid[label2,paste0(a,'.delta.x')]*(x-X.eval[label2])
}
}
}
coef.grid.D.1 <- coef.grid[label1,D]+coef.grid[label1,"D.delta.x"]*(x-X.eval[label1])
coef.grid.D.2 <- coef.grid[label2,D]+coef.grid[label2,"D.delta.x"]*(x-X.eval[label2])
coef.grid.D <- (coef.grid.D.1 * d2 + coef.grid.D.2 * d1)/(d1 + d2)
link <- (link.1 * d2 + link.2 * d1)/(d1 + d2)
}
if(method=='logit'){
ME <- exp(link)/(1+exp(link))^2*coef.grid.D
}
if(method=='probit'){
ME <- coef.grid.D*dnorm(link)
}
if(method=='linear'){
ME <- coef.grid.D
}
if(method=='poisson'|method=='nbinom'){
ME <- exp(link)*coef.grid.D
}
return(ME)
}
}
if(is.null(D.ref)==TRUE){
treat.type=='discrete'
est.TE<-function(x){ ##estimate the treatment effects for an observation
Xnew<-abs(X.eval-x)
d1<-min(Xnew)
label1<-which.min(Xnew)
Xnew[label1]<-Inf
d2<-min(Xnew)
label2<-which.min(Xnew)
if(d1==0){
link.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,paste0('D.',char)]
link.0 <- coef.grid[label1,'(Intercept)'] + 0
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link.1 <- link.1 + target.Z*coef.grid[label1,a]
link.0 <- link.0 + target.Z*coef.grid[label1,a]
#if(full.moderate==TRUE){
# link.1 <- link.1 + target.Z*coef.grid[label1,paste0(a,".X")]*x
# link.0 <- link.0 + target.Z*coef.grid[label1,paste0(a,".X")]*x
#}
}
}
}
else if(d2==0){
link.1 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,paste0('D.',char)]
link.0 <- coef.grid[label2,'(Intercept)'] + 0
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link.1 <- link.1 + target.Z*coef.grid[label2,a]
link.0 <- link.0 + target.Z*coef.grid[label2,a]
#if(full.moderate==TRUE){
# link.1 <- link.1 + target.Z*coef.grid[label2,paste0(a,".X")]*x
# link.0 <- link.0 + target.Z*coef.grid[label2,paste0(a,".X")]*x
#}
}
}
}
else{ ## weighted average
link.1.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,paste0('D.',char)] +
(coef.grid[label1,paste0('D.delta.x.',char)]+coef.grid[label1,"delta.x"])*(x-X.eval[label1])
link.1.2 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,paste0('D.',char)] +
(coef.grid[label2,paste0('D.delta.x.',char)]+coef.grid[label2,"delta.x"])*(x-X.eval[label2])
link.0.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,"delta.x"]*(x-X.eval[label1])
link.0.2 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,"delta.x"]*(x-X.eval[label2])
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- Z.ref[a]
link.1.1 <- link.1.1 + target.Z*coef.grid[label1,a]
link.1.2 <- link.1.2 + target.Z*coef.grid[label2,a]
link.0.1 <- link.0.1 + target.Z*coef.grid[label1,a]
link.0.2 <- link.0.2 + target.Z*coef.grid[label2,a]
if(full.moderate==TRUE){
link.1.1 <- link.1.1 + target.Z*coef.grid[label1,paste0(a,'.delta.x')]*(x-X.eval[label1])
link.1.2 <- link.1.2 + target.Z*coef.grid[label2,paste0(a,'.delta.x')]*(x-X.eval[label2])
link.0.1 <- link.0.1 + target.Z*coef.grid[label1,paste0(a,'.delta.x')]*(x-X.eval[label1])
link.0.2 <- link.0.2 + target.Z*coef.grid[label2,paste0(a,'.delta.x')]*(x-X.eval[label2])
}
}
}
link.1 <- c((link.1.1 * d2 + link.1.2 * d1)/(d1 + d2))
link.0 <- c((link.0.1 * d2 + link.0.2 * d1)/(d1 + d2))
}
if(method=='linear'){
TE <- link.1-link.0
}
if(method=='logit'){
E.prob.1 <- exp(link.1)/(1+exp(link.1))
E.prob.0 <- exp(link.0)/(1+exp(link.0))
TE <- E.prob.1-E.prob.0
}
if(method=='probit'){
E.prob.1 <- pnorm(link.1,0,1)
E.prob.0 <- pnorm(link.0,0,1)
TE <- E.prob.1-E.prob.0
}
if(method=='poisson' | method=="nbinom"){
E.y.1 <- exp(link.1)
E.y.0 <- exp(link.0)
TE <- E.y.1-E.y.0
}
return(TE)
}
}
if(length(diff.values)==2){
if(treat.type=='discrete'){
difference <- c(est.TE(diff.values[2])-est.TE(diff.values[1]))
}
if(treat.type=='continuous'){
difference <- c(est.ME(diff.values[2])-est.ME(diff.values[1]))
}
}
if(length(diff.values)==3){
if(treat.type=='discrete'){
difference1 <- est.TE(diff.values[2])-est.TE(diff.values[1])
difference2 <- est.TE(diff.values[3])-est.TE(diff.values[2])
difference3 <- est.TE(diff.values[3])-est.TE(diff.values[1])
difference <- c(difference1,difference2,difference3)
}
if(treat.type=='continuous'){
difference1 <- est.ME(diff.values[2])-est.ME(diff.values[1])
difference2 <- est.ME(diff.values[3])-est.ME(diff.values[2])
difference3 <- est.ME(diff.values[3])-est.ME(diff.values[1])
difference <- c(difference1,difference2,difference3)
}
}
if(treat.type=='discrete'){
names(difference) <- paste0(char,".",difference.name)
}
if(treat.type=='continuous'){
names(difference) <- paste0(names(D.sample)[D.sample == D.ref],".",difference.name)
}
return(difference)
}
##Function D: estimate ATE/AME
gen.ATE <- function(data,coef.grid,char=NULL){
if(is.null(char)==TRUE){
treat.type <- 'continuous'
weights <- data[,'WEIGHTS']
}
else{
treat.type <- 'discrete'
sub.data <- data[data[,D]==char,]
weights <- data[data[,D]==char,'WEIGHTS']
}
gen.ATE.sub <- function(index){
if(treat.type=='discrete'){
x <- sub.data[index,X]
Xnew<-abs(X.eval-x)
d1<-min(Xnew)
label1<-which.min(Xnew)
Xnew[label1]<-Inf
d2<-min(Xnew)
label2<-which.min(Xnew)
if(d1==0){
link.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,paste0('D.',char)]
link.0 <- coef.grid[label1,'(Intercept)'] + 0
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- sub.data[index,a]
link.1 <- link.1 + target.Z*coef.grid[label1,a]
link.0 <- link.0 + target.Z*coef.grid[label1,a]
#if(full.moderate==TRUE){
# link.1 <- link.1 + target.Z*coef.grid[label1,paste0(a,".X")]*x
# link.0 <- link.0 + target.Z*coef.grid[label1,paste0(a,".X")]*x
#}
}
}
}
else if(d2==0){
link.1 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,paste0('D.',char)]
link.0 <- coef.grid[label2,'(Intercept)'] + 0
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- sub.data[index,a]
link.1 <- link.1 + target.Z*coef.grid[label2,a]
link.0 <- link.0 + target.Z*coef.grid[label2,a]
#if(full.moderate==TRUE){
# link.1 <- link.1 + target.Z*coef.grid[label2,paste0(a,".X")]*x
# link.0 <- link.0 + target.Z*coef.grid[label2,paste0(a,".X")]*x
#}
}
}
}
else{ ## weighted average
link.1.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,paste0('D.',char)] +
(coef.grid[label1,paste0('D.delta.x.',char)]+coef.grid[label1,"delta.x"])*(x-X.eval[label1])
link.1.2 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,paste0('D.',char)] +
(coef.grid[label2,paste0('D.delta.x.',char)]+coef.grid[label2,"delta.x"])*(x-X.eval[label2])
link.0.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,"delta.x"]*(x-X.eval[label1])
link.0.2 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,"delta.x"]*(x-X.eval[label2])
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- sub.data[index,a]
link.1.1 <- link.1.1 + target.Z*coef.grid[label1,a]
link.1.2 <- link.1.2 + target.Z*coef.grid[label2,a]
link.0.1 <- link.0.1 + target.Z*coef.grid[label1,a]
link.0.2 <- link.0.2 + target.Z*coef.grid[label2,a]
if(full.moderate==TRUE){
link.1.1 <- link.1.1 + coef.grid[label1,paste0(a,'.delta.x')]*(x-X.eval[label1])
link.1.2 <- link.1.2 + coef.grid[label2,paste0(a,'.delta.x')]*(x-X.eval[label2])
link.0.1 <- link.0.1 + coef.grid[label1,paste0(a,'.delta.x')]*(x-X.eval[label1])
link.0.2 <- link.0.2 + coef.grid[label2,paste0(a,'.delta.x')]*(x-X.eval[label2])
}
}
}
link.1 <- c((link.1.1 * d2 + link.1.2 * d1)/(d1 + d2))
link.0 <- c((link.0.1 * d2 + link.0.2 * d1)/(d1 + d2))
}
if(method=='linear'){
TE <- link.1-link.0
}
if(method=='logit'){
E.prob.1 <- exp(link.1)/(1+exp(link.1))
E.prob.0 <- exp(link.0)/(1+exp(link.0))
TE <- E.prob.1-E.prob.0
}
if(method=='probit'){
E.prob.1 <- pnorm(link.1,0,1)
E.prob.0 <- pnorm(link.0,0,1)
TE <- E.prob.1-E.prob.0
}
if(method=='poisson' | method=="nbinom"){
E.y.1 <- exp(link.1)
E.y.0 <- exp(link.0)
TE <- E.y.1-E.y.0
}
return(TE)
}
if(treat.type=='continuous'){
x <- data[index,X]
Xnew<-abs(X.eval-x)
d1<-min(Xnew)
label1<-which.min(Xnew)
Xnew[label1]<-Inf
d2<-min(Xnew)
label2<-which.min(Xnew)
if(d1==0){
link <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,D]*data[index,D]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- data[index,a]
link <- link + target.Z*coef.grid[label1,a]
#if(full.moderate==TRUE){
# link <- link + target.Z*coef.grid[label1,paste0(a,".X")]*x
#}
}
}
coef.grid.D <- coef.grid[label1,D]
}
else if(d2==0){
link <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,D]*data[index,D]
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- data[index,a]
link <- link + target.Z*coef.grid[label2,a]
#if(full.moderate==TRUE){
# link <- link + target.Z*coef.grid[label2,paste0(a,".X")]*x
#}
}
}
coef.grid.D <- coef.grid[label2,D]
}
else{ ## weighted average
link.1 <- coef.grid[label1,'(Intercept)'] + coef.grid[label1,D]*data[index,D] +
coef.grid[label1,'D.delta.x']*data[index,D]*(x-X.eval[label1]) +
coef.grid[label1,"delta.x"]*(x-X.eval[label1])
link.2 <- coef.grid[label2,'(Intercept)'] + coef.grid[label2,D]*data[index,D] +
coef.grid[label2,'D.delta.x']*data[index,D]*(x-X.eval[label2]) +
coef.grid[label2,"delta.x"]*(x-X.eval[label2])
if(is.null(Z)==FALSE){
for(a in Z){
target.Z <- data[index,a]
link.1 <- link.1 + target.Z*coef.grid[label1,a]
link.2 <- link.2 + target.Z*coef.grid[label2,a]
if(full.moderate==TRUE){
link.1 <- link.1 + coef.grid[label1,paste0(a,'.delta.x')]*(x-X.eval[label1])
link.2 <- link.2 + coef.grid[label2,paste0(a,'.delta.x')]*(x-X.eval[label2])
}
}
}
coef.grid.D.1 <- coef.grid[label1,D]+coef.grid[label1,"D.delta.x"]*(x-X.eval[label1])
coef.grid.D.2 <- coef.grid[label2,D]+coef.grid[label2,"D.delta.x"]*(x-X.eval[label2])
link <- (link.1*d2+link.2*d1)/(d1+d2)
coef.grid.D <- (coef.grid.D.1*d2+coef.grid.D.2*d1)/(d1+d2)
}
if(method=='logit'){
ME <- exp(link)/(1+exp(link))^2*coef.grid.D
}
if(method=='probit'){
ME <- coef.grid.D*dnorm(link)
}
if(method=='linear'){
ME <- coef.grid.D
}
if(method=='poisson'|method=='nbinom'){
ME <- exp(link)*coef.grid.D
}
names(ME) <- NULL
return(ME)
}
}
if(treat.type=='discrete'){
index.all <- c(1:dim(sub.data)[1])
TE.all.real <- sapply(index.all,function(x) gen.ATE.sub(x))
ATE <- weighted.mean(TE.all.real,weights,na.rm=TRUE)
return(ATE)
}
if(treat.type=='continuous'){
index.all <- c(1:dim(data)[1])
ME.all.real <- sapply(index.all,function(x) gen.ATE.sub(x))
names(ME.all.real) <- NULL
AME <- weighted.mean(ME.all.real,weights,na.rm=TRUE)
return(AME)
}
}
all.output.noCI <- list()
if(treat.type=='discrete'){
for(char in other.treat){
gen.TE.output <- gen.kernel.TE(coef.grid=coef.grid,char=char)
gen.diff.output <- gen.kernel.difference(coef.grid=coef.grid,
diff.values=diff.values,
char=char)
gen.ATE.output <- gen.ATE(coef.grid=coef.grid,data=data,char=char)
all.output.noCI[[char]] <- list(TE=gen.TE.output,diff=gen.diff.output,ATE=gen.ATE.output)
}
}
if(treat.type=='continuous'){
k <- 1
for(D.ref in D.sample){
gen.ME.output <- gen.kernel.TE(coef.grid=coef.grid,D.ref=D.ref)
gen.diff.output <- gen.kernel.difference(coef.grid=coef.grid,
diff.values=diff.values,
D.ref=D.ref)
all.output.noCI[[label.name[k]]] <- list(ME=gen.ME.output,
diff=gen.diff.output)
k <- k + 1
}
AME.estimate <- c(gen.ATE(coef.grid=coef.grid,data=data))
all.output.noCI[['AME']] <- AME.estimate
}
if(CI==TRUE){
#Part1: Bootstrap
if(treat.type=='discrete'){
all.length <- neval*length(other.treat) + #TE
neval*length(all.treat) + #pred
neval*length(all.treat) + #link
length(other.treat)*length(difference.name) + #diff
length(other.treat) #ATE
}
if(treat.type=='continuous'){
all.length <- neval*length(label.name) + #ME
neval*length(label.name) + #pred
neval*length(label.name) + #link
length(label.name)*length(difference.name) + 1 # diff&AME
}
one.boot <- function(){
if (is.null(cl)==TRUE){
smp <- sample(1:n,n,replace=TRUE)
} else{ ## block bootstrap
cluster.boot<-sample(clusters,length(clusters),replace=TRUE)
smp<-unlist(id.list[match(cluster.boot,clusters)])
}
data.boot <- data[smp,]
boot.out <- matrix(NA,nrow=all.length,ncol=0)
if(treat.type=='discrete'){
if(length(unique(data.boot[,D]))!=length(unique(data[,D]))){
return(boot.out)
}
}
if(is.null(weights)==TRUE){
w.touse <- rep(1,dim(data.boot)[1])
}else{
w.touse <- data.boot[,weights]
}
#Xdensity
suppressWarnings(Xdensity.boot <- density(data.boot[,X],weights=w.touse))
coef.grid.boot <- t(sapply(X.eval,function(x) wls(x=x,data = data.boot,bw=bw,weights=w.touse,Xdensity=Xdensity.boot)))
boot.one.round <- c()
if(treat.type=='discrete'){
for(char in other.treat){
gen.TE.output <- gen.kernel.TE(coef.grid=coef.grid.boot,char=char)
gen.diff.output <- gen.kernel.difference(coef.grid=coef.grid.boot,
diff.values=diff.values,
char=char)
gen.ATE.output <- gen.ATE(coef.grid=coef.grid.boot,data=data.boot,char=char)
TE.output <- gen.TE.output$TE
names(TE.output) <- rep(paste0("TE.",char),neval)
E.pred.output <- gen.TE.output$E.pred
names(E.pred.output) <- rep(paste0("pred.",char),neval)
E.base.output <- gen.TE.output$E.base
link.output <- gen.TE.output$link.1
names(link.output) <- rep(paste0("link.",char),neval)
link0.output <- gen.TE.output$link.0
diff.estimate.output <- c(gen.diff.output)
names(diff.estimate.output) <- rep(paste0("diff.",char),length(difference.name))
ATE.estimate <- c(gen.ATE.output)
names(ATE.estimate) <- paste0("ATE.",char)
boot.one.round <- c(boot.one.round,TE.output,E.pred.output,link.output,diff.estimate.output,ATE.estimate)
}
names(E.base.output) <- rep(paste0("pred.",base),neval)
boot.one.round <- c(boot.one.round,E.base.output)
names(link0.output) <- rep(paste0("link.",base),neval)
boot.one.round <- c(boot.one.round,link0.output)
}
if(treat.type=='continuous'){
k <- 1
for(D.ref in D.sample){
gen.kernel.ME.output <- gen.kernel.TE(coef.grid=coef.grid.boot,D.ref=D.ref)
ME.output <- gen.kernel.ME.output$ME
names(ME.output) <- rep(paste0("ME.",label.name[k]),neval)
E.pred.output <- gen.kernel.ME.output$E.pred
names(E.pred.output) <- rep(paste0("pred.",label.name[k]),neval)
link.output <- gen.kernel.ME.output$link
names(link.output) <- rep(paste0("link.",label.name[k]),neval)
gen.diff.output <- gen.kernel.difference(coef.grid=coef.grid.boot,
diff.values=diff.values,
D.ref=D.ref)
diff.estimate.output <- c(gen.diff.output)
names(diff.estimate.output) <- rep(paste0("diff.",label.name[k]),length(difference.name))
boot.one.round <- c(boot.one.round,ME.output,E.pred.output,link.output,diff.estimate.output)
k <- k + 1
}
AME.estimate <- c(gen.ATE(coef.grid=coef.grid.boot,data=data.boot))
names(AME.estimate) <- c("AME")
boot.one.round <- c(boot.one.round,AME.estimate)
}
boot.out <- cbind(boot.out,boot.one.round)
rownames(boot.out) <- names(boot.one.round)
colnames(boot.out) <- NULL
return(boot.out)
}
if(parallel==TRUE){
requireNamespace("doParallel")
## require(iterators)
maxcores <- detectCores()
cores <- min(maxcores, cores)
pcl<-future::makeClusterPSOCK(cores)
doParallel::registerDoParallel(pcl)
cat("Parallel computing with", cores,"cores...\n")
suppressWarnings(
bootout <- foreach (i=1:nboots, .combine=cbind,
.export=c("one.boot"),.packages=c('MASS','AER'),
.inorder=FALSE) %dopar% {one.boot()}
)
suppressWarnings(stopCluster(pcl))
cat("\r")
}
else{
bootout<-matrix(NA,all.length,0)
for(i in 1:nboots){
tempdata <- one.boot()
if(is.null(tempdata)==FALSE){
bootout<- cbind(bootout,tempdata)
}
if (i%%50==0) cat(i) else cat(".")
}
cat("\r")
}
if(treat.type=='discrete'){
TE.output.all.list <- list()
pred.output.all.list <- list()
diff.output.all.list <- list()
TE.vcov.list <- list()
ATE.output.list <- list()
link.output.all.list <- list()
for(char in other.treat){
gen.general.TE.output <- all.output.noCI[[char]]$TE
TE.output <- gen.general.TE.output$TE
E.pred.output <- gen.general.TE.output$E.pred
E.base.output <- gen.general.TE.output$E.base
link.output <- gen.general.TE.output$link.1
link0.output <- gen.general.TE.output$link.0
diff.estimate.output <- all.output.noCI[[char]]$diff
ATE.estimate <- all.output.noCI[[char]]$ATE
TE.boot.matrix <- bootout[rownames(bootout)==paste0("TE.",char),]
pred.boot.matrix <- bootout[rownames(bootout)==paste0("pred.",char),]
base.boot.matrix <- bootout[rownames(bootout)==paste0("pred.",base),]
link.boot.matrix <- bootout[rownames(bootout)==paste0("link.",char),]
link0.boot.matrix <- bootout[rownames(bootout)==paste0("link.",base),]
diff.boot.matrix <- bootout[rownames(bootout)==paste0("diff.",char),]
ATE.boot.matrix <- matrix(bootout[rownames(bootout)==paste0("ATE.",char),],nrow=1)
if(length(diff.values)==2){
diff.boot.matrix <- matrix(diff.boot.matrix,nrow=1)
}
if(length(diff.values)==3){
diff.boot.matrix <- as.matrix(diff.boot.matrix)
}
TE.boot.sd <- apply(TE.boot.matrix, 1, sd, na.rm=TRUE)
pred.boot.sd <- apply(pred.boot.matrix, 1, sd, na.rm=TRUE)
base.boot.sd <- apply(base.boot.matrix, 1, sd, na.rm=TRUE)
link.boot.sd <- apply(link.boot.matrix, 1, sd, na.rm=TRUE)
link0.boot.sd <- apply(link0.boot.matrix, 1, sd, na.rm=TRUE)
diff.boot.sd <- apply(diff.boot.matrix, 1, sd, na.rm=TRUE)
ATE.boot.sd <- apply(ATE.boot.matrix, 1, sd, na.rm=TRUE)
TE.boot.CI <- t(apply(TE.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
pred.boot.CI <- t(apply(pred.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
base.boot.CI <- t(apply(base.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
link.boot.CI <- t(apply(link.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
link0.boot.CI <- t(apply(link0.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
diff.boot.CI <- t(apply(diff.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
ATE.boot.CI <- matrix(t(apply(ATE.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE)),nrow=1)
if(length(diff.values)==2){
diff.boot.CI <- matrix(diff.boot.CI,nrow=1)
}
if(length(diff.values)==3){
diff.boot.CI <- as.matrix(diff.boot.CI)
}
TE.boot.vcov <- cov(t(TE.boot.matrix),use="na.or.complete")
rownames(TE.boot.vcov) <- NULL
colnames(TE.boot.vcov) <- NULL
TE.output.all <- cbind(X.eval,TE.output,TE.boot.sd,TE.boot.CI[,1],TE.boot.CI[,2])
colnames(TE.output.all) <- c("X","TE","sd","lower CI(95%)","upper CI(95%)")
rownames(TE.output.all) <- NULL
TE.output.all.list[[other.treat.origin[char]]] <- TE.output.all
pred.output.all <- cbind(X.eval,E.pred.output,pred.boot.sd,pred.boot.CI[,1],pred.boot.CI[,2])
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[other.treat.origin[char]]] <- pred.output.all
link.output.all <- cbind(X.eval,link.output,link.boot.sd,link.boot.CI[,1],link.boot.CI[,2])
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[other.treat.origin[char]]] <- link.output.all
TE.vcov.list[[other.treat.origin[char]]] <- TE.boot.vcov
z.value <- diff.estimate.output/diff.boot.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output,diff.boot.sd,
z.value,p.value,diff.boot.CI[,1],diff.boot.CI[,2])
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[other.treat.origin[char]]] <- diff.output.all
ATE.z.value <- ATE.estimate/ATE.boot.sd
ATE.p.value <- 2*pnorm(-abs(ATE.z.value))
ATE.output <- c(ATE.estimate,ATE.boot.sd,ATE.z.value,ATE.p.value,ATE.boot.CI[,1],ATE.boot.CI[,2])
names(ATE.output) <- c("ATE","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
ATE.output.list[[other.treat.origin[char]]] <- ATE.output
}
#base
base.output.all <- cbind(X.eval,E.base.output,base.boot.sd,base.boot.CI[,1],base.boot.CI[,2])
colnames(base.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(base.output.all) <- NULL
pred.output.all.list[[all.treat.origin[base]]] <- base.output.all
link0.output.all <- cbind(X.eval,link0.output,link0.boot.sd,link0.boot.CI[,1],link0.boot.CI[,2])
colnames(link0.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link0.output.all) <- NULL
link.output.all.list[[all.treat.origin[base]]] <- link0.output.all
}
if(treat.type=='continuous'){
ME.output.all.list <- list()
pred.output.all.list <- list()
diff.output.all.list <- list()
ME.vcov.list <- list()
link.output.all.list <- list()
k <- 1
for(D.ref in D.sample){
label <- label.name[k]
gen.general.ME.output <- all.output.noCI[[label]]$ME
ME.output <- gen.general.ME.output$ME
E.pred.output <- gen.general.ME.output$E.pred
link.output <- gen.general.ME.output$link
diff.estimate.output <- all.output.noCI[[label]]$diff
ME.boot.matrix <- bootout[rownames(bootout)==paste0("ME.",label),]
pred.boot.matrix <- bootout[rownames(bootout)==paste0("pred.",label),]
link.boot.matrix <- bootout[rownames(bootout)==paste0("link.",label),]
diff.boot.matrix <- bootout[rownames(bootout)==paste0("diff.",label),]
if(length(diff.values)==2){
diff.boot.matrix <- matrix(diff.boot.matrix,nrow=1)
}
if(length(diff.values)==3){
diff.boot.matrix <- as.matrix(diff.boot.matrix)
}
ME.boot.vcov <- cov(t(ME.boot.matrix),use="na.or.complete")
rownames(ME.boot.vcov) <- NULL
colnames(ME.boot.vcov) <- NULL
ME.boot.sd <- apply(ME.boot.matrix, 1, sd,na.rm=TRUE)
pred.boot.sd <- apply(pred.boot.matrix, 1, sd,na.rm=TRUE)
link.boot.sd <- apply(link.boot.matrix, 1, sd,na.rm=TRUE)
diff.boot.sd <- apply(diff.boot.matrix, 1, sd,na.rm=TRUE)
ME.boot.CI <- t(apply(ME.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
pred.boot.CI <- t(apply(pred.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
link.boot.CI <- t(apply(link.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
diff.boot.CI <- t(apply(diff.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE))
ME.output.all <- cbind(X.eval,ME.output,ME.boot.sd,ME.boot.CI[,1],ME.boot.CI[,2])
colnames(ME.output.all) <- c("X","ME","sd","lower CI(95%)","upper CI(95%)")
rownames(ME.output.all) <- NULL
ME.output.all.list[[label]] <- ME.output.all
pred.output.all <- cbind(X.eval,E.pred.output,pred.boot.sd,pred.boot.CI[,1],pred.boot.CI[,2])
colnames(pred.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[label]] <- pred.output.all
link.output.all <- cbind(X.eval,link.output,link.boot.sd,link.boot.CI[,1],link.boot.CI[,2])
colnames(link.output.all) <- c("X","E(Y)","sd","lower CI(95%)","upper CI(95%)")
rownames(link.output.all) <- NULL
link.output.all.list[[label]] <- link.output.all
ME.vcov.list[[label]] <- ME.boot.vcov
z.value <- diff.estimate.output/diff.boot.sd
p.value <- 2*pnorm(-abs(z.value))
diff.output.all <- cbind(diff.estimate.output,diff.boot.sd,
z.value,p.value,diff.boot.CI[,1],diff.boot.CI[,2])
colnames(diff.output.all) <- c("diff.estimate","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[label]] <- diff.output.all
k <- k + 1
}
AME.estimate <- all.output.noCI$AME
AME.boot.matrix <- matrix(bootout[rownames(bootout)=="AME",],nrow=1)
AME.boot.sd <- apply(AME.boot.matrix, 1, sd, na.rm=TRUE)
AME.boot.CI <- matrix(t(apply(AME.boot.matrix, 1, quantile, c(0.025,0.975),na.rm=TRUE)),nrow=1)
AME.z.value <- AME.estimate/AME.boot.sd
AME.p.value <- 2*pnorm(-abs(AME.z.value))
AME.output <- c(AME.estimate,AME.boot.sd,AME.z.value,AME.p.value,AME.boot.CI[,1],AME.boot.CI[,2])
names(AME.output) <- c("AME","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
}
}
if(CI==FALSE){
if(treat.type=='discrete'){
TE.output.all.list <- list()
pred.output.all.list <- list()
diff.output.all.list <- list()
link.output.all.list <- list()
TE.vcov.list <- NULL
ATE.output.list <- list()
for(char in other.treat){
gen.general.TE.output <- all.output.noCI[[char]]$TE
TE.output <- gen.general.TE.output$TE
E.pred.output <- gen.general.TE.output$E.pred
E.base.output <- gen.general.TE.output$E.base
link.output <- gen.general.TE.output$link.1
link0.output <- gen.general.TE.output$link.0
diff.estimate.output <- all.output.noCI[[char]]$diff
ATE.estimate <- all.output.noCI[[char]]$ATE
TE.output.all <- cbind(X.eval,TE.output)
colnames(TE.output.all) <- c("X","TE")
rownames(TE.output.all) <- NULL
TE.output.all.list[[other.treat.origin[char]]] <- TE.output.all
pred.output.all <- cbind(X.eval,E.pred.output)
colnames(pred.output.all) <- c("X","E(Y)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[other.treat.origin[char]]] <- pred.output.all
link.output.all <- cbind(X.eval,link.output)
colnames(link.output.all) <- c("X","E(Y)")
rownames(link.output.all) <- NULL
link.output.all.list[[other.treat.origin[char]]] <- link.output.all
diff.output.all <- cbind(diff.estimate.output)
colnames(diff.output.all) <- c("diff.estimate")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[other.treat.origin[char]]] <- diff.output.all
ATE.output <- c(ATE.estimate)
names(ATE.output) <- c("ATE")
ATE.output.list[[other.treat.origin[char]]] <- ATE.output
}
#base
base.output.all <- cbind(X.eval,E.base.output)
colnames(base.output.all) <- c("X","E(Y)")
rownames(base.output.all) <- NULL
pred.output.all.list[[all.treat.origin[base]]] <- base.output.all
link0.output.all <- cbind(X.eval,link0.output)
colnames(link0.output.all) <- c("X","E(Y)")
rownames(link0.output.all) <- NULL
link.output.all.list[[all.treat.origin[base]]] <- link0.output.all
}
if(treat.type=='continuous'){
ME.output.all.list <- list()
pred.output.all.list <- list()
diff.output.all.list <- list()
ME.vcov.list <- NULL
link.output.all.list <- list()
k <- 1
for(D.ref in D.sample){
label <- label.name[k]
gen.general.ME.output <- all.output.noCI[[label]]$ME
ME.output <- gen.general.ME.output$ME
E.pred.output <- gen.general.ME.output$E.pred
link.output <- gen.general.ME.output$link
diff.estimate.output <- all.output.noCI[[label]]$diff
ME.output.all <- cbind(X.eval,ME.output)
colnames(ME.output.all) <- c("X","ME")
rownames(ME.output.all) <- NULL
ME.output.all.list[[label]] <- ME.output.all
pred.output.all <- cbind(X.eval,E.pred.output)
colnames(pred.output.all) <- c("X","E(Y)")
rownames(pred.output.all) <- NULL
pred.output.all.list[[label]] <- pred.output.all
link.output.all <- cbind(X.eval,link.output)
colnames(link.output.all) <- c("X","E(Y)")
rownames(link.output.all) <- NULL
link.output.all.list[[label]] <- link.output.all
diff.output.all <- cbind(diff.estimate.output)
colnames(diff.output.all) <- c("diff.estimate")
rownames(diff.output.all) <- difference.name
diff.output.all.list[[label]] <- diff.output.all
k <- k + 1
}
AME.estimate <- all.output.noCI$AME
AME.output <- c(AME.estimate)
names(AME.output) <- c("AME")
}
}
# density or histogram
if (treat.type=='discrete'){ ## discrete D
# density
if(is.null(weights)==TRUE){
de <- density(data[,X])
}else {
suppressWarnings(de <- density(data[,X],weights=data[,'WEIGHTS']))
}
treat_den <- list()
for (char in all.treat){
de.name <- paste0("den.",char)
if (is.null(weights)==TRUE){
de.tr <- density(data[data[,D]==char,X])
}
else {
suppressWarnings(de.tr <- density(data[data[,D]==char,X],
weights=data[data[,D]==char,'WEIGHTS']))
}
treat_den[[all.treat.origin[char]]] <- de.tr
}
# histogram
if (is.null(weights)==TRUE){
hist.out<-hist(data[,X],breaks=80,plot=FALSE)
} else {
suppressWarnings(hist.out<-hist(data[,X],data[,'WEIGHTS'],
breaks=80,plot=FALSE))
}
n.hist<-length(hist.out$mids)
# count the number of treated
treat.hist <- list()
for (char in all.treat){
count1<-rep(0,n.hist)
treat_index<-which(data[,D]==char)
for (i in 1:n.hist) {
count1[i]<-sum(data[treat_index,X]>=hist.out$breaks[i] &
data[treat_index,X]<hist.out$breaks[(i+1)])
}
count1[n.hist]<-sum(data[treat_index,X]>=hist.out$breaks[n.hist] &
data[treat_index,X]<=hist.out$breaks[n.hist+1])
treat.hist[[all.treat.origin[char]]] <- count1
}
}
if (treat.type=='continuous'){ ## continuous D
if (is.null(weights)==TRUE){
de <- density(data[,X])
} else {
suppressWarnings(de <- density(data[,X],weights=data[,'WEIGHTS']))
}
if (is.null(weights)==TRUE){
hist.out<-hist(data[,X],breaks=80,plot=FALSE)
} else{
suppressWarnings(hist.out<-hist(data[,X],data[,'WEIGHTS'],
breaks=80,plot=FALSE))
}
de.co <- de.tr <- NULL
}
## Output
if(treat.type=='discrete'){
for(char in other.treat.origin){
if(CI==TRUE){
new.diff.est <- as.data.frame(diff.output.all.list[[char]])
for(i in 1:dim(new.diff.est)[1]){
new.diff.est[i,] <- sprintf(new.diff.est[i,],fmt = '%#.3f')
}
diff.output.all.list[[char]] <- new.diff.est
outss <- data.frame(lapply(ATE.output.list[[char]], function(x) t(data.frame(x))))
colnames(outss) <- c("ATE","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(outss) <- c("Average Treatment Effect")
outss[1,] <- sprintf(outss[1,],fmt = '%#.3f')
ATE.output.list[[char]] <- outss
}
}
final.output <- list(diff.info=diff.info,
treat.info=treat.info,
bw=bw,
CV.output=Error,
CI=CI,
est.kernel=TE.output.all.list,
pred.kernel=pred.output.all.list,
link.kernel=link.output.all.list,
diff.estimate=diff.output.all.list,
vcov.matrix=TE.vcov.list,
Avg.estimate=ATE.output.list,
Xlabel = Xlabel,
Dlabel = Dlabel,
Ylabel = Ylabel,
de = de,
de.tr = treat_den, # density
hist.out = hist.out,
count.tr = treat.hist,
estimator = 'kernel',
use.fe = use_fe
)
}
if(treat.type=='continuous'){
for(label in label.name){
if(CI==TRUE){
new.diff.est <- as.data.frame(diff.output.all.list[[label]])
for(i in 1:dim(new.diff.est)[1]){
new.diff.est[i,] <- sprintf(new.diff.est[i,],fmt = '%#.3f')
}
diff.output.all.list[[label]] <- new.diff.est
}
}
if(CI==TRUE){
outss <- data.frame(lapply(AME.output, function(x) t(data.frame(x))))
colnames(outss) <- c("AME","sd","z-value","p-value","lower CI(95%)","upper CI(95%)")
rownames(outss) <- c("Average Marginal Effect")
outss[1,] <- sprintf(outss[1,],fmt = '%#.3f')
AME.output <- outss
}
final.output <- list(diff.info=diff.info,
treat.info=treat.info,
bw=bw,
CV.output=Error,
CI=CI,
est.kernel=ME.output.all.list,
pred.kernel=pred.output.all.list,
link.kernel=link.output.all.list,
diff.estimate=diff.output.all.list,
vcov.matrix=ME.vcov.list,
Avg.estimate=AME.output,
Xlabel = Xlabel,
Dlabel = Dlabel,
Ylabel = Ylabel,
de = de, # density
de.tr = de.tr,
hist.out = hist.out,
count.tr = NULL,
estimator = 'kernel',
use.fe = use_fe
)
}
#Plot
if(figure==TRUE){
class(final.output) <- "interflex"
figure.output <- plot.interflex( x=final.output,
order = order,
subtitles = subtitles,
show.subtitles = show.subtitles,
CI = CI,
diff.values = diff.values.plot,
Xdistr = Xdistr,
main = main,
Ylabel = Ylabel,
Dlabel = Dlabel,
Xlabel = Xlabel,
xlab = xlab,
ylab = ylab,
xlim = xlim,
ylim = ylim,
theme.bw = theme.bw,
show.grid = show.grid,
cex.main = cex.main,
cex.sub = cex.sub,
cex.lab = cex.lab,
cex.axis = cex.axis,
#bin.labs = bin.labs, # bin labels
interval = interval, # interval in replicated papers
file = file,
ncols = ncols,
#pool plot
pool = pool,
legend.title = legend.title,
color = color,
show.all = show.all,
scale = scale,
height = height,
width = width
)
final.output <- c(final.output,list(figure=figure.output))
}
class(final.output) <- "interflex"
return(final.output)
}
## This is the codes of "createFolds" in the package caret
"createFolds" <-
function(y, k = 10, list = TRUE, returnTrain = FALSE) {
if(class(y)[1] == "Surv") y <- y[,"time"]
if(is.numeric(y)) {
## Group the numeric data based on their magnitudes
## and sample within those groups.
## When the number of samples is low, we may have
## issues further slicing the numeric data into
## groups. The number of groups will depend on the
## ratio of the number of folds to the sample size.
## At most, we will use quantiles. If the sample
## is too small, we just do regular unstratified
## CV
cuts <- floor(length(y)/k)
if(cuts < 2) cuts <- 2
if(cuts > 5) cuts <- 5
breaks <- unique(quantile(y, probs = seq(0, 1, length = cuts)))
y <- cut(y, breaks, include.lowest = TRUE)
}
if(k < length(y)) {
## reset levels so that the possible levels and
## the levels in the vector are the same
y <- factor(as.character(y))
numInClass <- table(y)
foldVector <- vector(mode = "integer", length(y))
## For each class, balance the fold allocation as far
## as possible, then resample the remainder.
## The final assignment of folds is also randomized.
for(i in 1:length(numInClass)) {
## create a vector of integers from 1:k as many times as possible without
## going over the number of samples in the class. Note that if the number
## of samples in a class is less than k, nothing is produced here.
min_reps <- numInClass[i] %/% k
if(min_reps > 0) {
spares <- numInClass[i] %% k
seqVector <- rep(1:k, min_reps)
## add enough random integers to get length(seqVector) == numInClass[i]
if(spares > 0) seqVector <- c(seqVector, sample(1:k, spares))
## shuffle the integers for fold assignment and assign to this classes's data
foldVector[which(y == names(numInClass)[i])] <- sample(seqVector)
} else {
## Here there are less records in the class than unique folds so
## randomly sprinkle them into folds.
foldVector[which(y == names(numInClass)[i])] <- sample(1:k, size = numInClass[i])
}
}
} else foldVector <- seq(along = y)
if(list) {
out <- split(seq(along = y), foldVector)
names(out) <- paste("Fold", gsub(" ", "0", format(seq(along = out))), sep = "")
if(returnTrain) out <- lapply(out, function(data, y) y[-data], y = seq(along = y))
} else out <- foldVector
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.