#' @title SL.stack
#' @description Function that runs fully nested SuperLearner
#' cross validated estimates
#' on V-folds. Only supports binary treatment.
#' @param Y, outcome vector
#' @param X, data.frame of variables that Y is a function of.
#' @param A, treatment vector
#' @param W, vector of variables A is a function of.
#' @param newdata, dataframe of X, stacked with X when A=1 and X when A=0,
#' in that order
#' @param method, the SuperLearner meta learning method
#' @param SL.library, SuperLearner Library for finding outcome model
#' @param SL.libraryG, SuperLearner Library for the treatment mechanism
#' @param V, the number of folds
#' @param mc.cores, number of cores to use for parallel processing the
#' SuperLearner. Note, this parallelizes across the folds not within
#' SuperLearner
#' @return A list with 5 elements:
#' initdata: the initdata argument for running tmle with gentmle function
#'
#' Qcoef: the avg SuperLearner coef for each model in the outcome regression
#'
#' Gcoef: the avg SuperLearner coef for each model in the treatment mech
#' regression
#'
#' Qrisk: the avg SuperLearner risk for each model in the outcome regression
#'
#' Grisk: the avg SuperLearner risk for each model in the treatment mech
#'
#' inds: the indices for all the val sets, stacked to match
#'
#' @export
#' @example /inst/examples/exampleATEandBV.R
SL.stack = function(Y, X, A, W, newdata, method, SL.library,
SL.libraryG, V=10, mc.cores = 1, ...) {
#
# X = X
# Y = data$Y
# A = data$A
# W = X
# W$A = NULL
# newdata = newdata
# method = "method.NNloglik"
# V=10
if (all(A==1 | A == 0)) familyG = binomial() else familyG = gaussian()
if (all(Y==1 | Y == 0)) familyQ = binomial() else familyQ = gaussian()
n = length(Y)
folds = origami::make_folds(n, V=V)
stack = parallel::mclapply(folds, FUN = function(x) {
# x=folds[[5]]
tr = x$training_set
val = x$validation_set
nt=length(tr)
nv = length(val)
Y = Y[tr]
X = X[tr,]
newtr = c(val, (n+val),(2*n+val))
newdata = newdata[newtr,]
if (method == "method.NNloglik") {
control = list(saveFitLibrary = TRUE, trimLogit = .001)
} else {control = list(saveFitLibrary = TRUE)}
Qfit=SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method=method,
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=10), obsWeights = NULL)
A = A[tr]
W1 = W[tr,]
newW = W[val,]
if (method == "method.NNloglik") {
control = list(saveFitLibrary = TRUE, trimLogit = .001)
} else {control = list(saveFitLibrary = TRUE)}
gfit = SuperLearner(Y=A,X=W1,newX = newW, family = familyG,
SL.library=SL.libraryG,method = method,
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=10), obsWeights = NULL)
if (length(gfit$coef[gfit$coef!=0])==1){
gk = gfit$library.predict[1:nv,gfit$coef!=0]
} else {
gk = gfit$library.predict[1:nv,gfit$coef!=0] %*% gfit$coef[gfit$coef!=0]
}
if (length(Qfit$coef[Qfit$coef!=0])==1){
Qk = Qfit$library.predict[1:nv,Qfit$coef!=0]
} else {
Qk = Qfit$library.predict[1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
if (length(Qfit$coef[Qfit$coef!=0])==1){
Q1k = Qfit$library.predict[nv+1:nv,Qfit$coef!=0]
} else {
Q1k = Qfit$library.predict[nv+1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
if (length(Qfit$coef[Qfit$coef!=0])==1){
Q0k = Qfit$library.predict[2*nv+1:nv,Qfit$coef!=0]
} else {
Q0k = Qfit$library.predict[2*nv+1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
Qcoef = Qfit$coef
Gcoef = gfit$coef
Qrisk = Qfit$cvRisk
Grisk = gfit$cvRisk
return(list(Qk = Qk, Q0k = Q0k, Q1k = Q1k, gk = gk, Qcoef = Qcoef, Gcoef = Gcoef,
Qrisk = Qrisk, Grisk = Grisk, inds = x$validation_set))
}, mc.cores = mc.cores)
Qk = unlist(lapply(stack, FUN = function(x) x$Qk))
Q1k = unlist(lapply(stack, FUN = function(x) x$Q1k))
Q0k = unlist(lapply(stack, FUN = function(x) x$Q0k))
gk = unlist(lapply(stack, FUN = function(x) x$gk))
Qcoef_mat = vapply(stack, FUN = function(x) x$Qcoef,
FUN.VALUE = rep(1,length(stack[[1]]$Qcoef)))
Qrisk_mat = vapply(stack, FUN = function(x) x$Qrisk,
FUN.VALUE = rep(1,length(stack[[1]]$Qrisk)))
Gcoef_mat = vapply(stack, FUN = function(x) x$Gcoef,
FUN.VALUE = rep(1,length(stack[[1]]$Gcoef)))
Grisk_mat = vapply(stack, FUN = function(x) x$Grisk,
FUN.VALUE = rep(1,length(stack[[1]]$Gcoef)))
if (length(SL.library)==1) {
Qcoef = mean(Qcoef_mat)
Qrisk = mean(Qrisk_mat)
} else {
Qcoef = rowMeans(Qcoef_mat)
Qrisk = rowMeans(Qrisk_mat)
}
if (length(SL.libraryG)==1) {
Gcoef = mean(Gcoef_mat)
Grisk = mean(Grisk_mat)
} else {
Gcoef = rowMeans(Gcoef_mat)
Grisk = rowMeans(Grisk_mat)
}
inds = unlist(lapply(stack, FUN = function(x) x$inds))
Y = Y[inds]
A = A[inds]
initdata = data.frame(Y=Y,A=A,Qk=Qk,Q1k=Q1k,Q0k=Q0k,gk=gk)
return(list(initdata = initdata, Qcoef = Qcoef, Gcoef = Gcoef, Qrisk = Qrisk,
Grisk = Grisk, inds = inds))
}
#' @export
SL.stack1 = function(Y, X, A, W, newdata, method, SL.library, SL.libraryG,
cv = TRUE, V=10, SL = 10L, gn = NULL, ...) {
#
# X = X
# Y = data$Y
# A = data$A
# W = X
# W$A = NULL
# newdata = newdata
# method = "method.NNloglik"
if (all(A==1 | A == 0)) familyG = binomial() else familyG = gaussian()
if (all(Y==1 | Y == 0)) familyQ = binomial() else familyQ = gaussian()
n = length(Y)
if (!cv) V = 1
folds = origami::make_folds(n, V=V)
stack = lapply(folds, FUN = function(x) {
# x=folds[[5]]
if (!cv) {
tr = val = 1:n
} else {
tr = x$training_set
val = x$validation_set
}
nt=length(tr)
nv = length(val)
Aval = A[val]
Y = Y[tr]
X = X[tr,]
newtr = c(val, (n+val),(2*n+val))
newdata = newdata[newtr,]
if (method == "method.NNloglik") {
control = list(saveFitLibrary = TRUE, trimLogit = .001)
} else {
control = list(saveFitLibrary = TRUE)
}
errorG = errorQ = FALSE
if (method == "method.NNloglik") {
Qfit = try(SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = method,
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=SL), obsWeights = NULL), silent=TRUE)
if (class(Qfit)=="try-error") {
Qfit = SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = "method.NNLS",
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=SL), obsWeights = NULL)
errorQ = TRUE
}
} else {
Qfit = SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = method,
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=SL), obsWeights = NULL)
}
A = A[tr]
W1 = W[tr,]
newW = W[val,]
if (is.null(gn)) {
gfit = try(SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = "method.NNloglik",
id = NULL, verbose = FALSE, control = list(saveFitLibrary = TRUE, trimLogit = .001),
cvControl = list(V=SL), obsWeights = NULL), silent=TRUE)
if (class(gfit)=="try-error") {
gfit = SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = "method.NNLS",
id = NULL, verbose = FALSE, control = list(saveFitLibrary = TRUE, trimLogit = .001),
cvControl = list(V=SL), obsWeights = NULL)
errorG = TRUE
}
}
if (is.null(gn)) {
if (length(gfit$coef[gfit$coef!=0])==1) {
gk = gfit$library.predict[1:nv,gfit$coef!=0]
} else {
gk = gfit$library.predict[1:nv,gfit$coef!=0] %*% gfit$coef[gfit$coef!=0]
}
}
if (length(Qfit$coef[Qfit$coef!=0])==1) {
Qk = Qfit$library.predict[1:nv,Qfit$coef!=0]
} else {
Qk = Qfit$library.predict[1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
if (length(Qfit$coef[Qfit$coef!=0])==1) {
Q1k = Qfit$library.predict[nv+1:nv,Qfit$coef!=0]
} else {
Q1k = Qfit$library.predict[nv+1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
if (length(Qfit$coef[Qfit$coef!=0])==1) {
Q0k = Qfit$library.predict[2*nv+1:nv,Qfit$coef!=0]
} else {
Q0k = Qfit$library.predict[2*nv+1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
Qcoef = Qfit$coef
Qrisk = Qfit$cvRisk
if (is.null(gn)) {
Gcoef = gfit$coef
Grisk = gfit$cvRisk
} else {
gk = gn[val]
Gcoef = 1
Grisk = -mean(Aval*log(gk) + (1 - Aval)*log(1 - gk))
}
return(list(Qk = Qk, Q0k = Q0k, Q1k = Q1k, gk = gk, Qcoef = Qcoef, Gcoef = Gcoef,
Qrisk = Qrisk, Grisk = Grisk, inds = val, errorG = errorG, errorQ = errorQ))
})
Qk = unlist(lapply(stack, FUN = function(x) x$Qk))
Q1k = unlist(lapply(stack, FUN = function(x) x$Q1k))
Q0k = unlist(lapply(stack, FUN = function(x) x$Q0k))
gk = unlist(lapply(stack, FUN = function(x) x$gk))
Qcoef_mat = vapply(stack, FUN = function(x) x$Qcoef,
FUN.VALUE = rep(1,length(stack[[1]]$Qcoef)))
Qrisk_mat = vapply(stack, FUN = function(x) {
if (!x$errorQ) return(x$Qrisk) else return(rep(0,length(stack[[1]]$Qrisk)))
},
FUN.VALUE = rep(1,length(stack[[1]]$Qrisk)))
Gcoef_mat = vapply(stack, FUN = function(x) x$Gcoef,
FUN.VALUE = rep(1,length(stack[[1]]$Gcoef)))
Grisk_mat = vapply(stack, FUN = function(x) {
if (!x$errorG) return(x$Grisk) else return(rep(0,length(stack[[1]]$Grisk)))
},
FUN.VALUE = rep(1,length(stack[[1]]$Gcoef)))
if (is.vector(Qcoef_mat)) {
Qcoef = mean(Qcoef_mat)
if (all(Qrisk_mat == 0)) {
Qrisk = 0
} else {
Qrisk = mean(Qrisk_mat[Qrisk_mat!=0])
}
} else {
Qcoef = rowMeans(Qcoef_mat)
col.check = apply(Qrisk_mat, 2, FUN = function(col) {
any(is.na(col)) | any(col != 0)
})
if (all(!col.check)) Qrisk = rep(0, nrow(Qrisk_mat)) else {
Qrisk = rowMeans(as.data.frame(Qrisk_mat[,col.check]))
}
}
if (is.vector(Gcoef_mat)) {
Gcoef = mean(Gcoef_mat)
if (all(Grisk_mat == 0)) {
Grisk = 0
} else {
Grisk = mean(Grisk_mat[Grisk_mat!=0])
}
} else {
Gcoef = rowMeans(Gcoef_mat)
col.check = apply(Grisk_mat, 2, FUN = function(col) {
any(is.na(col)) | any(col != 0)
})
if (all(!col.check)) Grisk = rep(0, nrow(Grisk_mat)) else {
Grisk = rowMeans(as.data.frame(Grisk_mat[,col.check]))
}
}
inds = unlist(lapply(stack, FUN = function(x) x$inds))
Y = Y[inds]
A = A[inds]
initdata = data.frame(Y=Y,A=A,Qk=Qk,Q1k=Q1k,Q0k=Q0k,gk=gk)
return(list(initdata = initdata, Qcoef = Qcoef, Gcoef = Gcoef, Qrisk = Qrisk,
Grisk = Grisk, inds = inds))
}
#' @export
SL.stacknew = function(Y, X, A, W, newdata, method, SL.library, SL.libraryG,
cv = TRUE, V=10, SL = 10L, gn = NULL, ...) {
#
# X = X
# Y = data$Y
# A = data$A
# W = X
# W$A = NULL
# newdata = newdata
# method = "method.NNloglik"
if (all(A==1 | A == 0)) familyG = binomial() else familyG = gaussian()
if (all(Y==1 | Y == 0)) familyQ = binomial() else familyQ = gaussian()
n = length(Y)
if (!cv) V = 1
folds = origami::make_folds(n, V=V)
stack = lapply(folds, FUN = function(x) {
# x=folds[[5]]
if (!cv) {
tr = val = 1:n
} else {
tr = x$training_set
val = x$validation_set
}
nt=length(tr)
nv = length(val)
Aval = A[val]
Y = Y[tr]
X = X[tr,]
newtr = c(val, (n+val),(2*n+val))
newdata = newdata[newtr,]
if (method == "method.NNloglik") {
control = list(saveFitLibrary = TRUE, trimLogit = .001)
} else {
control = list(saveFitLibrary = TRUE)
}
errorG = errorQ = FALSE
if (method == "method.NNloglik") {
Qfit = try(SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = method,
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=SL), obsWeights = NULL), silent=TRUE)
if (class(Qfit)=="try-error") {
Qfit = SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = "method.NNLS",
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=SL), obsWeights = NULL)
errorQ = TRUE
}
} else {
Qfit = SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = method,
id = NULL, verbose = FALSE, control = control,
cvControl = list(V=SL), obsWeights = NULL)
}
A = A[tr]
W1 = W[tr,]
newW = W[val,]
if (is.null(gn)) {
gfit = try(SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = "method.NNloglik",
id = NULL, verbose = FALSE, control = list(saveFitLibrary = TRUE, trimLogit = .001),
cvControl = list(V=SL), obsWeights = NULL), silent=TRUE)
if (class(gfit)=="try-error") {
gfit = SuperLearner(Y,X,newX=newdata, family = familyQ,
SL.library=SL.library, method = "method.NNLS",
id = NULL, verbose = FALSE, control = list(saveFitLibrary = TRUE, trimLogit = .001),
cvControl = list(V=SL), obsWeights = NULL)
errorG = TRUE
}
}
if (is.null(gn)) {
if (length(gfit$coef[gfit$coef!=0])==1) {
gk = gfit$library.predict[1:nv,gfit$coef!=0]
} else {
gk = gfit$library.predict[1:nv,gfit$coef!=0] %*% gfit$coef[gfit$coef!=0]
}
}
if (length(Qfit$coef[Qfit$coef!=0])==1) {
Qk = Qfit$library.predict[1:nv,Qfit$coef!=0]
} else {
Qk = Qfit$library.predict[1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
if (length(Qfit$coef[Qfit$coef!=0])==1) {
Q1k = Qfit$library.predict[nv+1:nv,Qfit$coef!=0]
} else {
Q1k = Qfit$library.predict[nv+1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
if (length(Qfit$coef[Qfit$coef!=0])==1) {
Q0k = Qfit$library.predict[2*nv+1:nv,Qfit$coef!=0]
} else {
Q0k = Qfit$library.predict[2*nv+1:nv,Qfit$coef!=0] %*% Qfit$coef[Qfit$coef!=0]
}
Qcoef = Qfit$coef
Qrisk = Qfit$cvRisk
if (is.null(gn)) {
Gcoef = gfit$coef
Grisk = gfit$cvRisk
} else {
gk = gn[val]
Gcoef = 1
Grisk = -mean(Aval*log(gk) + (1 - Aval)*log(1 - gk))
}
return(list(Qk = Qk, Q0k = Q0k, Q1k = Q1k, gk = gk, Qcoef = Qcoef, Gcoef = Gcoef,
Qrisk = Qrisk, Grisk = Grisk, inds = val, errorG = errorG, errorQ = errorQ))
})
Qcoef_mat = vapply(stack, FUN = function(x) x$Qcoef,
FUN.VALUE = rep(1,length(stack[[1]]$Qcoef)))
Qrisk_mat = vapply(stack, FUN = function(x) {
if (!x$errorQ) return(x$Qrisk) else return(rep(0,length(stack[[1]]$Qrisk)))
},
FUN.VALUE = rep(1,length(stack[[1]]$Qrisk)))
Gcoef_mat = vapply(stack, FUN = function(x) x$Gcoef,
FUN.VALUE = rep(1,length(stack[[1]]$Gcoef)))
Grisk_mat = vapply(stack, FUN = function(x) {
if (!x$errorG) return(x$Grisk) else return(rep(0,length(stack[[1]]$Grisk)))
},
FUN.VALUE = rep(1,length(stack[[1]]$Gcoef)))
if (is.vector(Qcoef_mat)) {
Qcoef = mean(Qcoef_mat)
if (all(Qrisk_mat == 0)) {
Qrisk = 0
} else {
Qrisk = mean(Qrisk_mat[Qrisk_mat!=0])
}
} else {
Qcoef = rowMeans(Qcoef_mat)
col.check = apply(Qrisk_mat, 2, FUN = function(col) {
any(is.na(col)) | any(col != 0)
})
if (all(!col.check)) Qrisk = rep(0, nrow(Qrisk_mat)) else {
Qrisk = rowMeans(as.data.frame(Qrisk_mat[,col.check]))
}
}
if (is.vector(Gcoef_mat)) {
Gcoef = mean(Gcoef_mat)
if (all(Grisk_mat == 0)) {
Grisk = 0
} else {
Grisk = mean(Grisk_mat[Grisk_mat!=0])
}
} else {
Gcoef = rowMeans(Gcoef_mat)
col.check = apply(Grisk_mat, 2, FUN = function(col) {
any(is.na(col)) | any(col != 0)
})
if (all(!col.check)) Grisk = rep(0, nrow(Grisk_mat)) else {
Grisk = rowMeans(as.data.frame(Grisk_mat[,col.check]))
}
}
inds = lapply(stack, FUN = function(x) x$inds)
initdata = lapply(stack, FUN = function(x) {
Qk = x$Qk
Q1k = x$Q1k
Q0k = x$Q0k
gk = x$gk
inds = x$inds
Ynew = Y[inds]
Anew = A[inds]
data = data.frame(Y=Ynew,A=Anew,Qk=Qk,Q1k=Q1k,Q0k=Q0k,gk=gk)
return(data)
})
return(list(initdata = initdata, Qcoef = Qcoef, Gcoef = Gcoef, Qrisk = Qrisk,
Grisk = Grisk, inds = inds))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.