#'@title Targeted minimum-loss based estimator for the Interventional Disparity Indirect Effect (IDIE) among the exposed
#'@description The \code{idie_exposed} is a Targeted Minumum-loss based
#'estimator (TMLE) for the Interventional Disparity Indirect Effect (IDIE) among the exposed.
#' We consider a structure, where the mediator (Z) is an effect of the exposure (A) and a cause of the outcome (Y),
#' A -> Z -> Y. The function estimates the expected change in outcome risk among the exposed (A=1) if
#'hypothetically the exposed had the same probability of the mediator (Z)
#'as observed for similar unexposed (A=0) individuals.
#'The expected outcome risk under this hypothetical intervention is compared to the outcome risk
#'among the exposed when the distribution of the mediator was set
#'to the observed level among the exposed. Importantly, for this estimator the
#'exposure, mediator, and outcome must all be binary. The
#'underlying model for the exposure, mediator, and outcome, which are needed
#'to estimate any of the parameters, can be modeled using Super learning. Super
#'learning can be used to produce a weighted combination of candidate algorithms
#'that optimize the cross-validated loss-function or to select the single best
#'performing algorithm among the candidate algorithms, also known as the discrete
#'Super learner. One must define a library of candidate algorithms which should
#'be considered by the Super learner. If the Super learner library contains only
#'one algorithm, results will be estimated based on this algorithm alone, and
#'thus, not using Super Learning.
#'
#'@name idie_exposed
#'
#'@author Amalie Lykkemark Moller \email{amlm@@sund.ku.dk} Helene Charlotte Wiese Rytgaard, Thomas Alexander Gerds, and Christian Torp-Pedersen
#'@usage idie_exposed(data, discrete.SL=TRUE, exposure.A=NA, mediator.Z=NA, outcome.Y=NA,
#'cov.A, cov.Z, cov.Y, SL.lib.A=FALSE, SL.lib.Z=FALSE, SL.lib.Y=FALSE, iterations=10)
#'@param data A data frame/data table with a binary exposure, a binary
#' mediator, a binary outcome, and covariates.
#'@param discrete.SL If \code{FALSE} TMLE will use the weighted combination of
#' the algorithms in the super learner library that minimizes the loss
#' function. Defaults to \code{TRUE}, in which case the discrete super learner
#' i used, i.e. the best performing algorithm.
#'@param exposure.A Name of the binary exposure.
#'@param mediator.Z Name of the binary mediator, which is the
#' target of the hypothetical intervention.
#'@param outcome.Y Name of the binary outcome.
#'@param cov.A A vector containing names of possible confounders which should
#' be included in models of the exposure.
#'@param cov.Z A vector of confounders which should be included in models of
#' the mediator. Do not include the exposure as the function does this.
#'@param cov.Y A vector of confounders which should be included in models of
#' the outcome. Do not include the exposure and the mediator as the
#' function does this.
#'@param SL.lib.A A vector of algorithms that should be considered by the super
#' learner when modelling the exposure. All algorithms must be specified as
#' Super Learner objects.
#'@param SL.lib.Z A vector of algorithms for modelling the mediator. All
#' algorithms must be specified as Super Learner objects.
#'@param SL.lib.Y A vector of algorithms for modelling the outcome. All
#' algorithms must be specified as Super Learner objects.
#'@param iterations Number of iterations for the updating step in TMLE.
#' Defaults to 10.
#'
#'@details The structure of the data should be as follows: \item For the binary
#'exposure (\code{exposure.A}) 1 = exposed and 0 = unexposed. \item For the
#'binary mediator (\code{mediator.Z}) 1 = treatment and 0 = no
#'treatment. \item For the binary outcome (\code{outcome.Y}) 1 = event and 0 =
#'no event.
#'
#'@return The function outputs the absolute outcome risk among the exposed had
#' their chance of the mediator been the same as for similar unexposed individuals,
#' the absolute outcome risk among the exposed under no intervention, where the
#' probability of the mediator is as observed (psi1), the
#' absolute risk difference between the two, the interventional disparity indirect
#' effect among the exposed, and standard errors for each estimate.
#'
#' @import data.table
#' @import SuperLearner
#'
#' @examples
#'library(data.table)
#'require(tmleExposed)
#'n=5000
#'set.seed(1)
#'sex <- rbinom(n,1,0.4)
#'age <- rnorm(n,65,sd=5)
#'disease <- rbinom(n,1,0.6)
#'
#'A <- rbinom(n, 1, plogis(-3+0.05*age+1*sex))
#'Z <- rbinom(n, 1, plogis(5-0.08*age+1*sex-1.2*disease-0.8*A+0.01*A*disease))
#'Y <- rbinom(n, 1, plogis(-9+0.09*age+0.5*sex+0.8*disease-1.2*Z+0.7*A))
#'
#'d <- data.table(id=1:n, exposure=as.integer(A), mediator=as.integer(Z),
#' outcome=as.integer(Y), age, sex, disease)
#'
#'##### Define algorithms for the Super Learner library #####
#'lib = c('SL.glm','SL.step.interaction')
#'
#' #intervention: changing probability of the mediator (Z=1) among the exposed (A=1)
#' #to what it would have been had they been unexposed (A=0).
#' #target parameter: the change in outcome among the exposed (A=1) had their chance of
#' #the mediator (Z=1) been as among similar unexposed individuals (A=0).
#'
#'res<-idie_exposed(data=d,
#' exposure.A='exposure',
#' mediator.Z='mediator',
#' outcome.Y='outcome',
#' cov.A=c('sex','age'),
#' cov.Z =c('sex','age','disease'),
#' cov.Y=c('sex','age','disease'),
#' SL.lib.A = lib,
#' SL.lib.Z = lib,
#' SL.lib.Y = lib,
#' discrete.SL = FALSE)
#'
#'summary(res)
#'
#'
#'@export
idie_exposed<-function(data,
discrete.SL=TRUE,
exposure.A=NA,
mediator.Z=NA,
outcome.Y=NA,
cov.A,
cov.Z,
cov.Y,
SL.lib.A=FALSE,
SL.lib.Z=FALSE,
SL.lib.Y=FALSE,
iterations=10){
requireNamespace('data.table')
requireNamespace('SuperLearner')
requireNamespace('riskRegression')
#variable and input check
if(is.na(exposure.A)|is.na(mediator.Z)|is.na(outcome.Y)) {
stop(paste('Please speficy names for the exposure, mediator, and outcome variables'))
}
if(is.data.table(data)){
dt <- data.table::copy(data)
}else{
dt <- data.table::as.data.table(data)
}
if(exists('id',dt)==FALSE){
dt[,id:=1:.N]
}
#convert variable names
if (exposure.A!='A'){
setnames(dt,exposure.A,'A')
}
if (mediator.Z!='Z'){
setnames(dt,mediator.Z,'Z')
}
if (outcome.Y!='Y'){
setnames(dt,outcome.Y,'Y')
}
#stop if bigger than 2 instead of this
if (length(unique(dt[,A]))!=2 | length(unique(dt[,Z]))!=2 | length(unique(dt[,Y]))!=2) {
stop('Exposure, interediate, and outcome must be binary.')
}
#
# if (!is.integer(dt[,A])){
# dt[,A:=as.integer(A)]
# warning(paste('The exposure variable has been converted to integer'))
# }
#
# if (!is.integer(dt[,Z])) {
# dt[,Z:=as.integer(Z)]
# warning(paste('The mediator variable has been converted to integer'))
# }
#
# if (!is.integer(dt[,Y])) {
# dt[,Y:=as.integer(Y)]
# warning(paste('The outcome variable has been converted to integer'))
# }
pibar <- dt[,mean(A==1)]
pifit <- SuperLearner::SuperLearner(Y=dt[,A], #dt[,as.numeric(A==1)],
X=dt[,.SD,.SDcols=cov.A],
family = binomial(),
SL.library = SL.lib.A)
gammafit <-SuperLearner::SuperLearner(Y=dt[,Z],#dt[,as.numeric(Z==1)],
X=dt[,.SD,.SDcols=c(cov.Z,'A')],
family = binomial(),
SL.library = SL.lib.Z)
Qfit <-SuperLearner::SuperLearner(Y=dt[,Y],#dt[,as.numeric(Y==1)],
X=dt[,.SD,.SDcols=c(cov.Y,'A','Z')],
family = binomial(),
SL.library = SL.lib.Y)
if (discrete.SL==FALSE){
dt[, pihat:=predict(pifit, newdata=dt[,.SD,.SDcols=cov.A], onlySL = T)$pred]
dt[, gammahat.a0:=predict(gammafit, newdata=copy(dt[,.SD,.SDcols=c(cov.Z)])[, A:=0], onlySL = T)$pred]
dt[, gammahat.a1:=predict(gammafit, newdata=copy(dt[,.SD,.SDcols=c(cov.Z)])[,A:=1], onlySL = T)$pred]
dt.full <- data.table(rbind(copy(dt)[, Z:=1],
copy(dt)[, Z:=0]),
Z.obs=c(dt[, Z], dt[, Z]))
dt.full[, Qhat:=predict(Qfit, newdata=dt.full[,.SD,.SDcols=c(cov.Y,'A','Z')], onlySL = T)$pred]
dt.full[, Qhat.a1:=predict(Qfit, newdata=copy(dt.full[,.SD,.SDcols=c(cov.Y,'Z')])[,A:=1], onlySL = T)$pred]
dt.full[, Qhat.a1.z0:=predict(Qfit, newdata=copy(dt.full[,.SD,.SDcols=c(cov.Y)])[, `:=`(A=1, Z=0)], onlySL = T)$pred]
dt.full[, Qhat.a1.z1:=predict(Qfit, newdata=copy(dt.full[,.SD,.SDcols=c(cov.Y)])[, `:=`(A=1, Z=1)], onlySL = T)$pred]
}
if (discrete.SL==TRUE){
p<-pifit$libraryNames[which.max(pifit$coef)]
pifit.discrete<-pifit$fitLibrary[[p]]
g<-gammafit$libraryNames[which.max(gammafit$coef)]
gammafit.discrete<-gammafit$fitLibrary[[g]]
Q<-Qfit$libraryNames[which.max(Qfit$coef)]
Qfit.discrete<-Qfit$fitLibrary[[Q]]
dt[, pihat:=predict(pifit.discrete, newdata=copy(dt[,.SD,.SDcols=c(cov.A)]), type="response")]
dt[, gammahat.a0:=predict(gammafit.discrete, newdata=copy(dt[,.SD,.SDcols=c(cov.Z)])[, A:=0], type="response")]
dt[, gammahat.a1:=predict(gammafit.discrete, newdata=copy(dt[,.SD,.SDcols=c(cov.Z)])[, A:=1], type="response")]
dt.full <- data.table(rbind(copy(dt)[, Z:=1],
copy(dt)[, Z:=0]),
Z.obs=c(dt[, Z], dt[, Z]))
dt.full[, Qhat:=predict(Qfit.discrete, newdata=dt.full, type="response")]
dt.full[, Qhat.a1:=predict(Qfit.discrete, newdata=copy(dt.full)[, A:=1], type="response")]
dt.full[, Qhat.a1.z0:=predict(Qfit.discrete, newdata=copy(dt.full)[, `:=`(A=1, Z=0)], type="response")]
dt.full[, Qhat.a1.z1:=predict(Qfit.discrete, newdata=copy(dt.full)[, `:=`(A=1, Z=1)], type="response")]
}
# no intervention
dt.full[, psi.1:=sum(Qhat.a1*(gammahat.a1*Z+(1-gammahat.a1)*(1-Z))), by="id"]
# as among the unexposed
dt.full[, psi.0:=sum(Qhat.a1*(gammahat.a0*Z+(1-gammahat.a0)*(1-Z))), by="id"]
psihat.1 <- dt.full[Z==Z.obs, mean((A==1)/pibar*(Y - Qhat.a1) +
(A==1)/pibar*(Qhat.a1 - psi.1) +
(A==1)/pibar*(psi.1))]
dt.full[Z==Z.obs,eic1:=((A==1)/pibar*(Y - Qhat.a1) +
(A==1)/pibar*(Qhat.a1 - psi.1) +
(A==1)/pibar*(psi.1 - psihat.1))]
psihat.0 <-dt.full[Z==Z.obs, mean((A==1)/pibar*((Z*gammahat.a0+(1-Z)*(1-gammahat.a0))/
(Z*gammahat.a1+(1-Z)*(1-gammahat.a1)))*
(Y - Qhat.a1) +
(A==0)/(1-pihat)*(pihat)/pibar*(Qhat.a1 - psi.0) +
(A==1)/pibar*(psi.0))]
dt.full[Z==Z.obs,eic0:=((A==1)/pibar*((Z*gammahat.a0+(1-Z)*(1-gammahat.a0))/
(Z*gammahat.a1+(1-Z)*(1-gammahat.a1)))*
(Y - Qhat.a1) +
(A==0)/(1-pihat)*(pihat)/pibar*(Qhat.a1 - psi.0) +
(A==1)/pibar*(psi.0 - psihat.0))]
se0 <- sqrt(mean(dt.full[Z==Z.obs,eic0]^2)/nrow(dt))
se1 <- sqrt(mean(dt.full[Z==Z.obs,eic1]^2)/nrow(dt))
dt.full[Z==Z.obs,eic:=eic0-eic1]
se.diff <- sqrt(mean((dt.full[Z==Z.obs,eic])^2)/nrow(dt))
dt.full.copy <- copy(dt.full)
#-------- tmle
#-- psi0;
for (iter in 1:iterations) { #-- iterative updating;
#-- update Q;
dt.full[, H.Y:=1/pibar*((Z*gammahat.a0+(1-Z)*(1-gammahat.a0))/
(Z*gammahat.a1+(1-Z)*(1-gammahat.a1)))]
eps.Y <- coef(glm(Y ~ offset(qlogis(Qhat.a1))+H.Y-1, data=dt.full[Z==Z.obs & A==1], family=binomial()))
dt.full[, Qhat.a1:=plogis(qlogis(Qhat.a1) + eps.Y/pibar*((Z*gammahat.a0+(1-Z)*(1-gammahat.a0))/
(Z*gammahat.a1+(1-Z)*(1-gammahat.a1))))]
dt.full[, Qhat.a1.z0:=plogis(qlogis(Qhat.a1.z0) + eps.Y/pibar*(((1-gammahat.a0))/
((1-gammahat.a1))))]
dt.full[, Qhat.a1.z1:=plogis(qlogis(Qhat.a1.z1) + eps.Y/pibar*((gammahat.a0)/
(gammahat.a1)))]
#-- update Z;
dt.full[, H.Z:=1/(1-pihat)*(pihat)/pibar*(Qhat.a1.z1 - Qhat.a1.z0)]
eps.Z <- coef(glm(Z ~ offset(qlogis(gammahat.a0))+H.Z-1, data=dt.full[Z==Z.obs & A==0], family=binomial()))
dt.full[, gammahat.a0:=plogis(qlogis(gammahat.a0) + eps.Z*H.Z)]
dt.full[, psi.0:=sum(Qhat.a1*(gammahat.a0*Z+(1-gammahat.a0)*(1-Z))), by="id"]
tmle.est0 <- dt.full[Z==Z.obs, (1/pibar)*mean((pihat)*psi.0)]
#-- update A;
dt.full[, H.A:=(psi.0-tmle.est0)/pibar]
eps.A <- coef(glm(A==1 ~ offset(qlogis(pihat))+H.A-1, data=dt.full[Z==Z.obs], family=binomial()))
dt.full[, pihat:=plogis(qlogis(pihat) + eps.A*H.A)]
#updated estimate
tmle.est0 <- dt.full[Z==Z.obs, (1/pibar)*mean((A)*psi.0)]
solve.eic0 <- abs(dt.full[Z==Z.obs, mean((A==1)/pibar*((Z*gammahat.a0+(1-Z)*(1-gammahat.a0))/
(Z*gammahat.a1+(1-Z)*(1-gammahat.a1)))*
(Y - Qhat.a1) +
(A==0)/(1-pihat)*(pihat)/pibar*(Qhat.a1.z1 - Qhat.a1.z0)*(Z-gammahat.a0) +
(psi.0 - tmle.est0)/pibar*((A==1) - pihat) +
pihat/pibar*(psi.0-tmle.est0))])
if (solve.eic0<=se0/(log(nrow(dt))*sqrt(nrow(dt)))) break
if(iter==iterations) {
warning(paste('Efficient influence function for psi0 was not solved in',iterations,'iterations'))
}
}
#-- psi1;
dt.full <- copy(dt.full.copy)
for (iter in 1:iterations) { #-- iterative updating;
#-- update Q;
dt.full[, H.Y:=1/pibar]
eps.Y <- coef(glm(Y ~ offset(qlogis(Qhat.a1))+H.Y-1, data=dt.full[Z==Z.obs & A==1], family=binomial()))
dt.full[, Qhat.a1:=plogis(qlogis(Qhat.a1) + eps.Y/pibar)]
dt.full[, Qhat.a1.z0:=plogis(qlogis(Qhat.a1.z0) + eps.Y/pibar)]
dt.full[, Qhat.a1.z1:=plogis(qlogis(Qhat.a1.z1) + eps.Y/pibar)]
dt.full[, psi.1:=sum(Qhat.a1*(gammahat.a1*Z+(1-gammahat.a1)*(1-Z))), by="id"]
tmle.est1 <- dt.full[Z==Z.obs, (1/pibar)*mean((pihat)*psi.1)]
#-- update A;
dt.full[, H.A:=(psi.1-tmle.est1)/pibar]
eps.A <- coef(glm(A==1 ~ offset(qlogis(pihat))+H.A-1, data=dt.full[Z==Z.obs], family=binomial()))
dt.full[, pihat:=plogis(qlogis(pihat) + eps.A*H.A)]
#-- updated estimate
tmle.est1 <- dt.full[Z==Z.obs, (1/pibar)*mean((A)*psi.1)]
solve.eic1 <- abs(dt.full[Z==Z.obs, mean((A==1)/pibar*(Y - Qhat.a1) +
(psi.1 - tmle.est1)/pibar*((A==1) - (pihat)) +
(pihat)/pibar*(psi.1-tmle.est1))])
if (solve.eic1<=se1/(log(nrow(dt))*sqrt(nrow(dt)))) break
if(iter==iterations) {
warning(paste('Efficient influence function for psi1 was not solved in',iterations,'iterations'))
}
}
# prepare output
out<-list()
psi.diff.tmle<-tmle.est0-tmle.est1
out$estimate=list(psi0=tmle.est0,psi1=tmle.est1,psi=psi.diff.tmle)
out$se=list(se0=se0,se1=se1,se.diff=se.diff)
#CV risk for exposure, mediator, and outcome
out$superlearner.CVrisk$A.exposure<-pifit$cvRisk
out$superlearner.CVrisk$Z.mediator<-gammafit$cvRisk
out$superlearner.CVrisk$Y.outcome<-Qfit$cvRisk
#weights assigned to each algorithm in the super learner library
if (discrete.SL==TRUE){
out$superlearner.discrete$A.exposure<-p
out$superlearner.discrete$Z.mediator<-g
out$superlearner.discrete$Y.outcome<-Q
}
if (discrete.SL==FALSE){
out$superlearner.weight$A.exposure<-pifit$coef
out$superlearner.weight$Z.mediator<-gammafit$coef
out$superlearner.weight$Y.outcome<-Qfit$coef
}
out$distributions=rbind(distribution.A1=dt.full[Z==Z.obs & A==1, summary(pihat)],
distribution.Z.a1=dt.full[Z==Z.obs & A==1, summary(gammahat.a1)],
distribution.Z.a0=dt.full[Z==Z.obs & A==1, summary(gammahat.a0)],
distribution.Y=dt.full[Z==Z.obs & A==1, summary(Qhat)],
distribution.Y.a1=dt.full[Z==Z.obs & A==1, summary(Qhat.a1)])
out$output.dataset<-dt.full
class(out)<-'idie_exposed'
return(invisible(out))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.