R/support_functions_txrwd.R

Defines functions bvt_txinrwd selector_func_txrwd

#' @importFrom SuperLearner SuperLearner
#' @importFrom stats predict
#Function for training initial estimators for the outcome, treatment mechanism, and study selection mechanism regressions for RCT with or without real-world data (when active treatment is available in real-world data)
selector_func_txrwd <- function(train_s, data, Q.SL.library, d.SL.library.RCT, d.SL.library.RWD, g.SL.library, pRCT, family, family_nco, fluctuation = "logistic", NCO=NULL, Delta=NULL, Delta_NCO = NULL, adjustnco, target.gwt, Q.discreteSL, d.discreteSL, g.discreteSL, bounds=NULL, cvControl=list()){

  #Train regressions for different experiments
  if(any(train_s$S==0)){

    Y <- train_s$Y
    if(family=="gaussian" & fluctuation == "logistic"){
      Y <- (Y - min(data$Y, na.rm = TRUE))/(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE))
    }

    if(adjustnco == FALSE){
      X <- train_s[,which((colnames(train_s) %in% c("Y", "nco", "NCO_delta", "v", "id"))==FALSE)]
    } else {
      X <- train_s[,which((colnames(train_s) %in% c("Y", "NCO_delta", "v", "id"))==FALSE)]
    }


    # set A=0 in X0 and A=1 in X1
    X0 <- X1 <- X
    X0$A <- 0
    X1$A <- 1

    Ynomiss <- Y[which(X$Delta==1)]
    Xnomiss <- X[which(X$Delta==1),]
    Xnomiss <- Xnomiss[ , -which(colnames(Xnomiss) %in% c("Delta"))]


    # call Super Learner for estimation of QbarAW
    if(fluctuation == "logistic"){
      QbarSL<- suppressWarnings(SuperLearner(Y=Ynomiss, X=Xnomiss, SL.library=Q.SL.library, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id[which(X$Delta==1)]))
      if(Q.discreteSL==TRUE){
        keepAlg <- which.min(QbarSL$cvRisk)
        QbarSL <- QbarSL$fitLibrary[[keepAlg]]
      }

    } else {
      QbarSL<- suppressWarnings(SuperLearner(Y=Ynomiss, X=Xnomiss, SL.library=Q.SL.library, family=family, cvControl = cvControl, id=train_s$id[which(X$Delta==1)]))
      if(Q.discreteSL==TRUE){
        keepAlg <- which.min(QbarSL$cvRisk)
        QbarSL <- QbarSL$fitLibrary[[keepAlg]]
      }
    }

    # initial estimates of the outcome, given the observed exposure & covariates
    if(Q.discreteSL==TRUE){
      QbarAW <- predict(QbarSL, newdata=X)
      # estimates of the outcome, given A=0 and covariates
      Qbar0W<- predict(QbarSL, newdata=X0)
      Qbar1W<- predict(QbarSL, newdata=X1)
    } else {
      QbarAW <- predict(QbarSL, newdata=X)$pred
      # estimates of the outcome, given A=0 and covariates
      Qbar0W<- predict(QbarSL, newdata=X0)$pred
      Qbar1W<- predict(QbarSL, newdata=X1)$pred
    }

    dHat1W <- list()
    if(is.null(Delta)==FALSE){
      DbarSL<- suppressWarnings(SuperLearner(Y=X$Delta, X=X[ , -which(colnames(X) %in% c("Delta"))], SL.library=d.SL.library.RWD, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id))
      if(d.discreteSL==TRUE){
        keepAlg <- which.min(DbarSL$cvRisk)
        DbarSL <- DbarSL$fitLibrary[[keepAlg]]
        dHat1W$A1 <- predict(DbarSL, newdata = X1)
        dHat1W$A0 <- predict(DbarSL, newdata = X0)
      } else {
        dHat1W$A1 <- predict(DbarSL, newdata = X1)$pred
        dHat1W$A0 <- predict(DbarSL, newdata = X0)$pred
      }
    } else {
      X$Delta <- dHat1W$A1 <- dHat1W$A0 <- rep(1, nrow(X))
      DbarSL <- NULL
    }

    # Estimate the exposure mechanism g(A|W)
    gHatSL<- suppressWarnings(SuperLearner(Y=X$A, X=X[ , -which(colnames(X) %in% c("A","Delta"))], SL.library=g.SL.library, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id))
    if(g.discreteSL==TRUE){
      keepAlg <- which.min(gHatSL$cvRisk)
      gHatSL <- gHatSL$fitLibrary[[keepAlg]]
      gHat1W<- predict(gHatSL, newdata = X)
    } else {
      gHat1W<- gHatSL$SL.predict
    }

    # predicted prob of not being exposed, given baseline covariates
    gHat0W<- 1- gHat1W

    #-------------------------------------------------
    # Clever covariate H(A,W) for each subject
    #-------------------------------------------------

    if(target.gwt){
      wt <- (as.numeric(X$A==1 & X$Delta==1))/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds) + 1*(as.numeric(X$A==0 & X$Delta==1))/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)
      H.AW1 <- as.numeric(X$A==1 & X$Delta==1)
      H.AW0 <- -1*(as.numeric(X$A==0 & X$Delta==1))
      H.AW <- (as.numeric(X$A==1 & X$Delta==1))-1*(as.numeric(X$A==0 & X$Delta==1))

      # also want to evaluate the clever covariates at A=0 for all subjects
      H.0W<- rep(-1, nrow(X))
      H.1W<- rep(1, nrow(X))
    } else{
      wt <- rep(1, nrow(X))
      H.AW1 <- (as.numeric(X$A==1 & X$Delta==1))/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds)
      H.AW0 <- -1*(as.numeric(X$A==0 & X$Delta==1))/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)

      H.AW <- (as.numeric(X$A==1 & X$Delta==1))/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds)-1*(as.numeric(X$A==0 & X$Delta==1))/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)

      # also want to evaluate the clever covariates at A=0 for all subjects
      H.0W<- -1/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)
      H.1W<- 1/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds)
    }

    #TMLE for E[E[Y|W,A=0,S=1]]

    # set S=1 and A to 0 or 1
    XS1 <- XS0 <- X
    XS1$S <- XS0$S <- 1
    XS1$A <- 1
    XS0$A <- 0

    # initial estimates of the outcome, given the observed exposure & covariates
    if(Q.discreteSL==TRUE){
      QbarSAW <- predict(QbarSL, newdata=X)
      # estimates of the outcome, given S=1 and A=1 or A=0 and covariates
      QbarS1W<- predict(QbarSL, newdata=XS1)
      QbarS0W<- predict(QbarSL, newdata=XS0)
    } else {
      QbarSAW <- predict(QbarSL, newdata=X)$pred
      # estimates of the outcome, given S=1 and A=1 or A=0 and covariates
      QbarS1W<- predict(QbarSL, newdata=XS1)$pred
      QbarS0W<- predict(QbarSL, newdata=XS0)$pred
    }

    # Estimate the trial participation mechanism g(S|A,W)
    gSHatSL<- suppressWarnings(SuperLearner(Y=X$S, X=X[ , -which(names(X) %in% c("S","A","Delta"))], SL.library=g.SL.library, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id))
    if(g.discreteSL==TRUE){
      keepAlg <- which.min(gSHatSL$cvRisk)
      gSHatSL <- gSHatSL$fitLibrary[[keepAlg]]
      gSHat1W <- predict(gSHatSL, newdata = X)
    } else {
      # generate predicted prob of being in trial, given baseline covariates
      gSHat1W <- predict(gSHatSL, newdata = X)$pred
    }

    #get missingness mechanism setting S=1: P(Delta=1|A,S=1,W)
    if(is.null(Delta)==FALSE){
      if(d.discreteSL==TRUE){
        dHat1W$S1A1 <- predict(DbarSL, newdata = XS1)
        dHat1W$S1A0 <- predict(DbarSL, newdata = XS0)
      } else {
        dHat1W$S1A1 <- predict(DbarSL, newdata = XS1)$pred
        dHat1W$S1A0 <- predict(DbarSL, newdata = XS0)$pred
      }
    } else {
      dHat1W$S1A1 <- dHat1W$S1A0 <- rep(1, nrow(X))
    }

    #P(A|S=1,W)
    if(g.discreteSL==TRUE){
      gHata1_S1W<- predict(gHatSL, newdata = XS1)
    } else {
      gHata1_S1W<- predict(gHatSL, newdata = XS1)$pred
    }

    gHata0_S1W = 1-gHata1_S1W

    #-------------------------------------------------
    # Clever covariate H(S,A,W) for each subject
    #-------------------------------------------------
    if(target.gwt){
      wt_s <- as.numeric(X$A==1 & X$S==1 & X$Delta == 1)/.bound((gSHat1W*gHata1_S1W*dHat1W$S1A1),nrow(train_s), bounds) + as.numeric(X$A==0 & X$S==1 & X$Delta == 1)/.bound((gSHat1W*gHata0_S1W*dHat1W$S1A0),nrow(train_s), bounds)
      H.SAW1 <- as.numeric(X$A==1 & X$S==1 & X$Delta == 1)
      H.SAW0 <- -1*as.numeric(X$A==0 & X$S==1 & X$Delta == 1)

      # also want to evaluate the clever covariates at S=1 and A=1 or 0 for all subjects
      H.S1W<- rep(1, nrow(X))
      H.S0W<- rep(-1, nrow(X))
    } else{
      wt_s <- rep(1, nrow(X))
      H.SAW1 <- as.numeric(X$A==1 & X$S==1 & X$Delta == 1)/.bound((gSHat1W*gHata1_S1W*dHat1W$S1A1),nrow(train_s), bounds)
      H.SAW0 <- -1*as.numeric(X$A==0 & X$S==1 & X$Delta == 1)/.bound((gSHat1W*gHata0_S1W*dHat1W$S1A0),nrow(train_s), bounds)

      # also want to evaluate the clever covariates at S=1 and A=1 or 0 for all subjects
      H.S1W<- 1/.bound((gSHat1W*gHata1_S1W*dHat1W$S1A1),nrow(train_s), bounds)
      H.S0W<- -1/.bound((gSHat1W*gHata0_S1W*dHat1W$S1A0),nrow(train_s), bounds)
    }

    if(is.null(NCO)==FALSE){
      # call Super Learner for estimation of NCObarAW
      if(family_nco=="gaussian" & fluctuation == "logistic"){
        train_s_nco <- (train_s$nco - min(data$nco, na.rm=TRUE))/(max(data$nco, na.rm=TRUE) - min(data$nco, na.rm=TRUE))
      } else {
        train_s_nco <- train_s$nco
      }

      X <- train_s[,which((colnames(train_s) %in% c("Y", "nco", "Delta", "v", "id"))==FALSE)]
      X0 <- X1 <- X
      X0$A <- 0
      X1$A <- 1


      NCOnomiss <- train_s_nco[which(X$NCO_delta==1)]
      Xnomiss <- X[which(X$NCO_delta==1),]
      Xnomiss <- Xnomiss[ , -which(colnames(Xnomiss) %in% c("NCO_delta"))]


      if(fluctuation == "logistic"){
        NCObarSL<- suppressWarnings(SuperLearner(Y=NCOnomiss, X=Xnomiss, SL.library=Q.SL.library, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id[which(X$NCO_delta==1)]))
        if(Q.discreteSL==TRUE){
          keepAlg <- which.min(NCObarSL$cvRisk)
          NCObarSL <- NCObarSL$fitLibrary[[keepAlg]]
        }
      } else {
        NCObarSL<- suppressWarnings(SuperLearner(Y=NCOnomiss, X=Xnomiss, SL.library=Q.SL.library, family=family, control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id[which(X$NCO_delta==1)]))
        if(Q.discreteSL==TRUE){
          keepAlg <- which.min(NCObarSL$cvRisk)
          NCObarSL <- NCObarSL$fitLibrary[[keepAlg]]
        }
      }

      if(Q.discreteSL==TRUE){
        NCObarAW <- predict(NCObarSL, X)
        NCObar1W <- predict(NCObarSL, X1)
        NCObar0W <- predict(NCObarSL, X0)
      } else {
        NCObarAW <- predict(NCObarSL, X)$pred
        NCObar1W <- predict(NCObarSL, X1)$pred
        NCObar0W <- predict(NCObarSL, X0)$pred
      }

      # Estimate the exposure mechanism g(A|W)
      gHatSLnco<- suppressWarnings(SuperLearner(Y=X$A, X=X[ , -which(colnames(X) %in% c("A","NCO_delta"))], SL.library=g.SL.library, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id))
      if(g.discreteSL==TRUE){
        keepAlg <- which.min(gHatSLnco$cvRisk)
        gHatSLnco <- gHatSLnco$fitLibrary[[keepAlg]]
        gHat1W<- predict(gHatSLnco, X)
      } else {
        gHat1W<- gHatSLnco$SL.predict
      }

      # predicted prob of not being exposed, given baseline covariates
      gHat0W<- 1- gHat1W

      dHat1Wnco <- list()
      if(is.null(Delta_NCO)==FALSE){
        DbarSLnco<- suppressWarnings(SuperLearner(Y=X$NCO_delta, X=X[ , -which(colnames(X) %in% c("NCO_delta"))], SL.library=d.SL.library.RWD, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id))
        if(d.discreteSL==TRUE){
          keepAlg <- which.min(DbarSLnco$cvRisk)
          DbarSLnco <- DbarSLnco$fitLibrary[[keepAlg]]
          dHat1Wnco$A1 <- predict(DbarSLnco, newdata = X1)
          dHat1Wnco$A0 <- predict(DbarSLnco, newdata = X0)
        } else {
          dHat1Wnco$A1 <- predict(DbarSLnco, newdata = X1)$pred
          dHat1Wnco$A0 <- predict(DbarSLnco, newdata = X0)$pred
        }
      } else {
        dHat1Wnco$A1 <- dHat1Wnco$A0 <- rep(1, nrow(X))
        DbarSLnco <- NULL
      }

      #-------------------------------------------------
      # Clever covariate H(A,W) for each subject
      #-------------------------------------------------
      if(target.gwt){
        wt_nco <- (as.numeric(X$A==1 & X$NCO_delta==1))/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds) + 1*(as.numeric(X$A==0 & X$NCO_delta==1))/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)
        H.AW1nco <- as.numeric(X$A==1 & X$NCO_delta==1)
        H.AW0nco <- -1*(as.numeric(X$A==0 & X$NCO_delta==1))
        H.AWnco <- (as.numeric(X$A==1 & X$NCO_delta==1))-1*(as.numeric(X$A==0 & X$NCO_delta==1))

        # also want to evaluate the clever covariates at A=0 for all subjects
        H.0Wnco<- rep(-1, nrow(X))
        H.1Wnco<- rep(1, nrow(X))
      } else{
        wt_nco <- rep(1, nrow(X))
        H.AW1nco <- (as.numeric(X$A==1 & X$NCO_delta==1))/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds)
        H.AW0nco <- -1*(as.numeric(X$A==0 & X$NCO_delta==1))/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)

        H.AWnco <- (as.numeric(X$A==1 & X$NCO_delta==1))/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds)-1*(as.numeric(X$A==0 & X$NCO_delta==1))/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)

        # also want to evaluate the clever covariates at A=0 for all subjects
        H.0Wnco<- -1/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)
        H.1Wnco<- 1/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds)
      }

    } else {
      NCObarAW <- NULL
      NCObar1W <- NULL
      NCObar0W <- NULL
      H.AWnco <- NULL
      H.0Wnco <- NULL
      H.1Wnco <- NULL
      train_s_nco <- NULL
      wt_nco <- NULL
    }

  } else {
    Y <- train_s$Y
    if(family=="gaussian" & fluctuation == "logistic"){
      Y <- (Y - min(data$Y, na.rm = TRUE))/(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE))
    }

    #run tmle for E[E[Y|W,A=0]]
    if(adjustnco == FALSE){
      X <- train_s[,which((colnames(train_s) %in% c("S","Y", "nco", "NCO_delta", "v", "id"))==FALSE)]
    } else {
      X <- train_s[,which((colnames(train_s) %in% c("S","Y", "NCO_delta", "v", "id"))==FALSE)]
    }


    # set the A=0 in X0, A=1 in X1
    X0 <- X1 <- X
    X0$A <- 0
    X1$A <- 1

    Ynomiss <- Y[which(X$Delta==1)]
    Xnomiss <- X[which(X$Delta==1),]
    Xnomiss <- Xnomiss[ , -which(colnames(Xnomiss) %in% c("Delta"))]

    # call Super Learner for estimation of QbarAW
    if(fluctuation == "logistic"){
      QbarSL<- suppressWarnings(SuperLearner(Y=Ynomiss, X=Xnomiss, SL.library=Q.SL.library, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id[which(X$Delta==1)]))
      if(Q.discreteSL==TRUE){
        keepAlg <- which.min(QbarSL$cvRisk)
        QbarSL <- QbarSL$fitLibrary[[keepAlg]]
      }
    } else {
      QbarSL<- suppressWarnings(SuperLearner(Y=Ynomiss, X=Xnomiss, SL.library=Q.SL.library, family=family, control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id[which(X$Delta==1)]))
      if(Q.discreteSL==TRUE){
        keepAlg <- which.min(QbarSL$cvRisk)
        QbarSL <- QbarSL$fitLibrary[[keepAlg]]
      }
    }

    if(Q.discreteSL==TRUE){
      # initial estimates of the outcome, given the observed exposure & covariates
      QbarAW <- predict(QbarSL, newdata=X)
      # estimates of the outcome, given A=0 or A=1 and covariates
      Qbar0W<- predict(QbarSL, newdata=X0)
      Qbar1W<- predict(QbarSL, newdata=X1)
    } else {
      # initial estimates of the outcome, given the observed exposure & covariates
      QbarAW <- predict(QbarSL, newdata=X)$pred
      # estimates of the outcome, given A=0 or A=1 and covariates
      Qbar0W<- predict(QbarSL, newdata=X0)$pred
      Qbar1W<- predict(QbarSL, newdata=X1)$pred
    }

    dHat1W <- list()
    if(is.null(Delta)==FALSE){
      DbarSL<- suppressWarnings(SuperLearner(Y=X$Delta, X=X[ , -which(colnames(X) %in% c("Delta"))], SL.library=d.SL.library.RCT, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id))
      if(d.discreteSL==TRUE){
        keepAlg <- which.min(DbarSL$cvRisk)
        DbarSL <- DbarSL$fitLibrary[[keepAlg]]
        dHat1W$A1 <- predict(DbarSL, newdata = X1)
        dHat1W$A0 <- predict(DbarSL, newdata = X0)
      } else {
        dHat1W$A1 <- predict(DbarSL, newdata = X1)$pred
        dHat1W$A0 <- predict(DbarSL, newdata = X0)$pred
      }
    } else {
      X$Delta <- dHat1W$A1 <- dHat1W$A0 <- rep(1, nrow(X))
      DbarSL <- NULL
    }

    # Known probability of being exposed for RCT
    gHatSL<- NULL
    gHat1W<- rep(pRCT, nrow(X))
    # predicted prob of not being exposed, given baseline covariates
    gHat0W<- 1- gHat1W

    #-------------------------------------------------
    # Clever covariate H(A,W) for each subject
    #-------------------------------------------------
    if(target.gwt){
      wt <- (as.numeric(X$A==1 & X$Delta==1))/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds) + 1*(as.numeric(X$A==0 & X$Delta==1))/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)
      H.AW1 <- as.numeric(X$A==1 & X$Delta==1)
      H.AW0 <- -1*(as.numeric(X$A==0 & X$Delta==1))
      H.AW <- (as.numeric(X$A==1 & X$Delta==1))-1*(as.numeric(X$A==0 & X$Delta==1))

      # also want to evaluate the clever covariates at A=0 and A=1 for all subjects
      H.0W<- rep(-1, nrow(X))
      H.1W<- rep(1, nrow(X))
    } else{
      wt <- rep(1, nrow(X))
      H.AW1 <- (as.numeric(X$A==1 & X$Delta==1))/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds)
      H.AW0 <- -1*(as.numeric(X$A==0 & X$Delta==1))/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)

      H.AW <- (as.numeric(X$A==1 & X$Delta==1))/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds)-1*(as.numeric(X$A==0 & X$Delta==1))/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)

      # also want to evaluate the clever covariates at A=0 for all subjects
      H.0W<- -1/.bound((gHat0W*dHat1W$A0),nrow(train_s), bounds)
      H.1W<- 1/.bound((gHat1W*dHat1W$A1),nrow(train_s), bounds)
    }

    QbarSAW <- NULL
    QbarS1W <- NULL
    QbarS0W <- NULL
    H.SAW1 <- NULL
    H.SAW0 <- NULL
    H.S1W <- NULL
    H.S0W <- NULL
    wt_s <- rep(1, nrow(X))

    if(is.null(NCO)==FALSE){
      # call Super Learner for estimation of NCObarAW
      if(family_nco=="gaussian" & fluctuation == "logistic"){
        train_s_nco <- (train_s$nco - min(data$nco, na.rm=TRUE))/(max(data$nco, na.rm=TRUE) - min(data$nco, na.rm=TRUE))
      } else {
        train_s_nco <- train_s$nco
      }

      X <- train_s[,which((colnames(train_s) %in% c("S","Y", "nco", "Delta", "v", "id"))==FALSE)]
      X0 <- X1 <- X
      X0$A <- 0
      X1$A <- 1

      NCOnomiss <- train_s_nco[which(X$NCO_delta==1)]
      Xnomiss <- X[which(X$NCO_delta==1),]
      Xnomiss <- Xnomiss[ , -which(colnames(Xnomiss) %in% c("NCO_delta"))]


      if(fluctuation == "logistic"){
        NCObarSL<- suppressWarnings(SuperLearner(Y=NCOnomiss, X=Xnomiss, SL.library=Q.SL.library, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id[which(X$NCO_delta==1)]))
        if(Q.discreteSL==TRUE){
          keepAlg <- which.min(NCObarSL$cvRisk)
          NCObarSL <- NCObarSL$fitLibrary[[keepAlg]]
        }
      } else {
        NCObarSL<- suppressWarnings(SuperLearner(Y=NCOnomiss, X=Xnomiss, SL.library=Q.SL.library, family=family, control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id[which(X$NCO_delta==1)]))
        if(Q.discreteSL==TRUE){
          keepAlg <- which.min(NCObarSL$cvRisk)
          NCObarSL <- NCObarSL$fitLibrary[[keepAlg]]
        }
      }

      if(Q.discreteSL==TRUE){
        NCObarAW <- predict(NCObarSL, X)
        NCObar1W <- predict(NCObarSL, X1)
        NCObar0W <- predict(NCObarSL, X0)
      } else {
        NCObarAW <- predict(NCObarSL, X)$pred
        NCObar1W <- predict(NCObarSL, X1)$pred
        NCObar0W <- predict(NCObarSL, X0)$pred
      }

      gHat1W<- rep(pRCT, nrow(X))
      # predicted prob of not being exposed, given baseline covariates
      gHat0W<- 1- gHat1W

      dHat1Wnco <- list()
      if(is.null(Delta_NCO)==FALSE){
        DbarSLnco<- suppressWarnings(SuperLearner(Y=X$NCO_delta, X=X[ , -which(colnames(X) %in% c("NCO_delta"))], SL.library=d.SL.library.RCT, family="binomial", control = list(saveFitLibrary=TRUE), cvControl = cvControl, id=train_s$id))
        if(d.discreteSL==TRUE){
          keepAlg <- which.min(DbarSLnco$cvRisk)
          DbarSLnco <- DbarSLnco$fitLibrary[[keepAlg]]
          dHat1Wnco$A1 <- predict(DbarSLnco, newdata = X1)
          dHat1Wnco$A0 <- predict(DbarSLnco, newdata = X0)
        } else {
          dHat1Wnco$A1 <- predict(DbarSLnco, newdata = X1)$pred
          dHat1Wnco$A0 <- predict(DbarSLnco, newdata = X0)$pred
        }
      } else {
        X$NCO_delta <- dHat1Wnco$A1 <- dHat1Wnco$A0 <- rep(1, nrow(X))
        DbarSLnco <- NULL
      }

      #-------------------------------------------------
      # Clever covariate H(A,W) for each subject
      #-------------------------------------------------
      if(target.gwt){
        wt_nco <- (as.numeric(X$A==1 & X$NCO_delta==1))/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds) + 1*(as.numeric(X$A==0 & X$NCO_delta==1))/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)
        H.AW1nco <- as.numeric(X$A==1 & X$NCO_delta==1)
        H.AW0nco <- -1*(as.numeric(X$A==0 & X$NCO_delta==1))
        H.AWnco <- (as.numeric(X$A==1 & X$NCO_delta==1))-1*(as.numeric(X$A==0 & X$NCO_delta==1))

        # also want to evaluate the clever covariates at A=0 and A=1 for all subjects
        H.0Wnco<- rep(-1, nrow(X))
        H.1Wnco<- rep(1, nrow(X))
      } else{
        wt_nco <- rep(1, nrow(X))
        H.AW1nco <- (as.numeric(X$A==1 & X$NCO_delta==1))/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds)
        H.AW0nco <- -1*(as.numeric(X$A==0 & X$NCO_delta==1))/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)

        H.AWnco <- (as.numeric(X$A==1 & X$NCO_delta==1))/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds)-1*(as.numeric(X$A==0 & X$NCO_delta==1))/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)

        # also want to evaluate the clever covariates at A=0 and A=1 for all subjects
        H.0Wnco<- -1/.bound((gHat0W*dHat1Wnco$A0),nrow(train_s), bounds)
        H.1Wnco<- 1/.bound((gHat1W*dHat1Wnco$A1),nrow(train_s), bounds)
      }

    } else {
      NCObarAW <- NULL
      NCObar1W <- NULL
      NCObar0W <- NULL
      H.AWnco <- NULL
      H.0Wnco <- NULL
      H.1Wnco <- NULL
      train_s_nco <- NULL
      wt_nco <- NULL
    }
  }

  out <- list("Y" = Y, "QbarSL" = QbarSL, "QbarAW" = QbarAW, "Qbar1W" = Qbar1W, "Qbar0W" = Qbar0W, "QbarSAW" = QbarSAW, "QbarS1W" = QbarS1W, "QbarS0W" = QbarS0W, "gHatSL" = gHatSL, "train_s_nco" = train_s_nco, "NCObarAW" = NCObarAW, "NCObar1W" = NCObar1W, "NCObar0W" = NCObar0W, "H.AW1" = H.AW1, "H.AW0" = H.AW0,"H.AW" = H.AW, "H.0W" = H.0W, "H.1W" = H.1W, "H.SAW1" = H.SAW1,"H.SAW0" = H.SAW0, "H.S1W" = H.S1W, "H.S0W" = H.S0W, "DbarSL" = DbarSL, "H.AWnco" = H.AWnco, "H.0Wnco" = H.0Wnco, "H.1Wnco"= H.1Wnco, "wt"=wt, "wt_s"=wt_s, "wt_nco"=wt_nco)

  return(out)

}

#' @importFrom stats glm
#' @importFrom stats plogis
#' @importFrom stats qlogis
#Function to estimate the bias-variance tradeoff using the experiment-selection set for each fold when active treatment is available in the real-world data
bvt_txinrwd <- function(v, selector, NCO, comparisons, train, data, fluctuation, family, n.id){
  out <- list()

  out$b2v <- vector()
  out$bias <- vector()
  out$var <- vector()
  out$EIClambdav <- list()

  datid <- data[which(duplicated(data$id)==FALSE),]

  if(is.null(NCO)==FALSE){
    out$addncobias <- vector()
    out$bias_nco <- vector()
  }

  out$EICpsipound <- matrix(0, nrow=n.id, ncol=length(comparisons))
  out$EICnco <- matrix(0, nrow=n.id, ncol=length(comparisons))


  for(s in 1:length(comparisons)){
    train_s <- train[which(train$S %in% comparisons[[s]]),]

    #tmle
    if(fluctuation == "logistic"){
      logitUpdate<- glm(selector[[v]][[s]]$Y[which(train_s$Delta==1)] ~ -1 + offset(qlogis(selector[[v]][[s]]$QbarAW[which(train_s$Delta==1)])) +  selector[[v]][[s]]$H.AW1[which(train_s$Delta==1)] + selector[[v]][[s]]$H.AW0[which(train_s$Delta==1)], family='quasibinomial', weights = selector[[v]][[s]]$wt[which(train_s$Delta==1)])

      epsilon <- logitUpdate$coef

      QbarAW.star<- plogis(qlogis(selector[[v]][[s]]$QbarAW) + epsilon[1]*selector[[v]][[s]]$H.AW1 + epsilon[2]*selector[[v]][[s]]$H.AW0)
      Qbar1W.star<- plogis(qlogis(selector[[v]][[s]]$Qbar1W) + epsilon[1]*selector[[v]][[s]]$H.1W)
      Qbar0W.star<- plogis(qlogis(selector[[v]][[s]]$Qbar0W) + epsilon[2]*selector[[v]][[s]]$H.0W)

    } else {
      logitUpdate<- glm(selector[[v]][[s]]$Y[which(train_s$Delta==1)] ~ -1 + offset(selector[[v]][[s]]$QbarAW[which(train_s$Delta==1)]) +  selector[[v]][[s]]$H.AW1[which(train_s$Delta==1)] + selector[[v]][[s]]$H.AW0[which(train_s$Delta==1)], family='gaussian', weights = selector[[v]][[s]]$wt[which(train_s$Delta==1)])

      epsilon <- logitUpdate$coef

      QbarAW.star<- selector[[v]][[s]]$QbarAW + epsilon[1]*selector[[v]][[s]]$H.AW1 + epsilon[2]*selector[[v]][[s]]$H.AW0
      Qbar1W.star<- selector[[v]][[s]]$Qbar1W + epsilon[1]*selector[[v]][[s]]$H.1W
      Qbar0W.star<- selector[[v]][[s]]$Qbar0W + epsilon[2]*selector[[v]][[s]]$H.0W
    }

    if(family=="gaussian" & fluctuation == "logistic"){
      QbarAW.star <- QbarAW.star*(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE)) + min(data$Y, na.rm = TRUE)
      Qbar1W.star <- Qbar1W.star*(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE)) + min(data$Y, na.rm = TRUE)
      Qbar0W.star <- Qbar0W.star*(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE)) + min(data$Y, na.rm = TRUE)
      trainsY <- selector[[v]][[s]]$Y*(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE)) + min(data$Y, na.rm = TRUE)
    } else {
      trainsY <- selector[[v]][[s]]$Y
    }

    #EIC
    EIClambdav_s <- (selector[[v]][[s]]$wt*(selector[[v]][[s]]$H.AW1 + selector[[v]][[s]]$H.AW0)*(trainsY - QbarAW.star) + Qbar1W.star - Qbar0W.star - mean(Qbar1W.star - Qbar0W.star))
    #account for dependent units
    #take mean of EIC observations by trains_s$id (confirmed taking mean correctly)
    out$EIClambdav[[s]] <- as.vector(by(EIClambdav_s, train_s$id, mean))

    if(s==1){
      QbarS1W.star <- Qbar1W.star
      QbarS0W.star <- Qbar0W.star
    } else {
      # Update the initial estimator
      if(fluctuation == "logistic"){
        logitUpdate<- glm(selector[[v]][[s]]$Y[which(train_s$Delta==1)] ~ -1 + offset(qlogis(selector[[v]][[s]]$QbarSAW[which(train_s$Delta==1)])) +  selector[[v]][[s]]$H.SAW1[which(train_s$Delta==1)] + selector[[v]][[s]]$H.SAW0[which(train_s$Delta==1)], family='quasibinomial', weights = selector[[v]][[s]]$wt_s[which(train_s$Delta==1)])

        epsilon <- logitUpdate$coef

        QbarSAW.star<- plogis(qlogis(selector[[v]][[s]]$QbarSAW) + epsilon[1]*selector[[v]][[s]]$H.SAW1 + epsilon[2]*selector[[v]][[s]]$H.SAW0)
        QbarS1W.star<- plogis(qlogis(selector[[v]][[s]]$QbarS1W) + epsilon[1]*selector[[v]][[s]]$H.S1W)
        QbarS0W.star<- plogis(qlogis(selector[[v]][[s]]$QbarS0W) + epsilon[2]*selector[[v]][[s]]$H.S0W)
      } else {
        logitUpdate<- glm(selector[[v]][[s]]$Y[which(train_s$Delta==1)] ~ -1 + offset(selector[[v]][[s]]$QbarSAW[which(train_s$Delta==1)]) +  selector[[v]][[s]]$H.SAW1[which(train_s$Delta==1)] + selector[[v]][[s]]$H.SAW0[which(train_s$Delta==1)], family='gaussian', weights = selector[[v]][[s]]$wt_s[which(train_s$Delta==1)])

        epsilon <- logitUpdate$coef

        QbarSAW.star<- selector[[v]][[s]]$QbarSAW + epsilon[1]*selector[[v]][[s]]$H.SAW1 + epsilon[2]*selector[[v]][[s]]$H.SAW0
        QbarS1W.star<- selector[[v]][[s]]$QbarS1W + epsilon[1]*selector[[v]][[s]]$H.S1W
        QbarS0W.star<- selector[[v]][[s]]$QbarS0W + epsilon[2]*selector[[v]][[s]]$H.S0W
      }

      if(family=="gaussian" & fluctuation == "logistic"){
        QbarSAW.star <- QbarSAW.star*(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE)) + min(data$Y, na.rm = TRUE)
        QbarS1W.star <- QbarS1W.star*(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE)) + min(data$Y, na.rm = TRUE)
        QbarS0W.star <- QbarS0W.star*(max(data$Y, na.rm = TRUE) - min(data$Y, na.rm = TRUE)) + min(data$Y, na.rm = TRUE)
      }
    }

    if(s>1){
      EICpsipound_s <- (selector[[v]][[s]]$wt*(selector[[v]][[s]]$H.AW1 + selector[[v]][[s]]$H.AW0)*(trainsY - QbarAW.star) + Qbar1W.star - Qbar0W.star - mean(Qbar1W.star - Qbar0W.star) - (selector[[v]][[s]]$wt_s*(selector[[v]][[s]]$H.SAW1 + selector[[v]][[s]]$H.SAW0)*(trainsY - QbarSAW.star) + QbarS1W.star - QbarS0W.star - mean(QbarS1W.star - QbarS0W.star)))
      EICpsipound_s <- as.vector(by(EICpsipound_s, train_s$id, mean))
      out$EICpsipound[which(datid$v!=v & datid$S %in% comparisons[[s]]),s] <- EICpsipound_s/(length(which(train$S %in% comparisons[[s]] & duplicated(train$id)==FALSE))/n.id)
    }

    out$bias[s] <- mean(Qbar1W.star - Qbar0W.star) - mean(QbarS1W.star - QbarS0W.star)
    out$var[s] <- var(out$EIClambdav[[s]])/length(unique(train_s$id))

    #nco
    if(is.null(NCO)==FALSE){

      if(fluctuation == "logistic"){
        logitUpdate<- glm(selector[[v]][[s]]$train_s_nco[which(train_s$NCO_delta==1)] ~ -1 + offset(qlogis(selector[[v]][[s]]$NCObarAW[which(train_s$NCO_delta==1)])) +  selector[[v]][[s]]$H.AWnco[which(train_s$NCO_delta==1)], family='quasibinomial', weights = selector[[v]][[s]]$wt_nco[which(train_s$NCO_delta==1)])

        epsilon <- logitUpdate$coef

        NCObarAW.star<- plogis(qlogis(selector[[v]][[s]]$NCObarAW) + epsilon*selector[[v]][[s]]$H.AWnco)
        NCObar1W.star<- plogis(qlogis(selector[[v]][[s]]$NCObar1W) + epsilon*selector[[v]][[s]]$H.1Wnco)
        NCObar0W.star<- plogis(qlogis(selector[[v]][[s]]$NCObar0W) + epsilon*selector[[v]][[s]]$H.0Wnco)

      } else {
        logitUpdate<- glm(selector[[v]][[s]]$train_s_nco[which(train_s$NCO_delta==1)] ~ -1 + offset(selector[[v]][[s]]$NCObarAW[which(train_s$NCO_delta==1)]) +  selector[[v]][[s]]$H.AWnco[which(train_s$NCO_delta==1)], family='gaussian', weights = selector[[v]][[s]]$wt_nco[which(train_s$NCO_delta==1)])

        epsilon <- logitUpdate$coef

        NCObarAW.star<- selector[[v]][[s]]$NCObarAW + epsilon*selector[[v]][[s]]$H.AWnco
        NCObar1W.star<- selector[[v]][[s]]$NCObar1W + epsilon*selector[[v]][[s]]$H.1Wnco
        NCObar0W.star<- selector[[v]][[s]]$NCObar0W + epsilon*selector[[v]][[s]]$H.0Wnco
      }

      if(family=="gaussian" & fluctuation == "logistic"){
        NCObarAW.star <- NCObarAW.star*(max(data$nco, na.rm = TRUE) - min(data$nco, na.rm = TRUE)) + min(data$nco, na.rm = TRUE)
        NCObar1W.star <- NCObar1W.star*(max(data$nco, na.rm = TRUE) - min(data$nco, na.rm = TRUE)) + min(data$nco, na.rm = TRUE)
        NCObar0W.star <- NCObar0W.star*(max(data$nco, na.rm = TRUE) - min(data$nco, na.rm = TRUE)) + min(data$nco, na.rm = TRUE)
        train_s_nco <- selector[[v]][[s]]$train_s_nco*(max(data$nco, na.rm = TRUE) - min(data$nco, na.rm = TRUE)) + min(data$nco, na.rm = TRUE)
      } else {
        train_s_nco <- selector[[v]][[s]]$train_s_nco
      }

      EICnco_s <- as.vector(selector[[v]][[s]]$wt_nco*selector[[v]][[s]]$H.AWnco*(train_s_nco - NCObarAW.star) + NCObar1W.star - NCObar0W.star - mean(NCObar1W.star - NCObar0W.star))
      EICnco_s <- as.vector(by(EICnco_s, train_s$id, mean))

      out$EICnco[which(datid$v!=v & datid$S %in% comparisons[[s]]),s] <- EICnco_s/(length(which(train$S %in% comparisons[[s]] & duplicated(train$id)==FALSE))/n.id)

      out$bias_nco[s] <- mean(NCObar1W.star - NCObar0W.star)

      out$addncobias[s] <- (out$bias[s] + out$bias_nco[s])^2 + out$var[s]
    }
    out$b2v[s] <- out$bias[s]^2 + out$var[s]
  }
  return(out)
}

Try the EScvtmle package in your browser

Any scripts or data that you put into this service are public.

EScvtmle documentation built on Jan. 6, 2023, 1:21 a.m.