R/avgDensityValueTMLE.R

library(R6)
library(hal9001)

#' onestep TMLE of average density parameter
#'
#' @docType class
#' @keywords data
#' @return Object of \code{\link{R6Class}} with methods
#' @format \code{\link{R6Class}} object.
#' @examples
#' # avgDensityTMLE$new()
#' @field x random sample from distribution
#' @field p_hat (\code{empiricalDensity}) containing density estimates
#' @field Psi paramter value
#' @field EIC vector of EIC
#' @field epsilon_step step size for one-step targeting
#' @field CI (numeric vector) length 2; lower + upper CI
#' @field longDataOut (\code{longiData}) for transforming x into longitudinal format dataframe
#' @field HAL_tuned (\code{hal9001}) generated by \code{cvDensityHAL} class.
#' @section Methods:
#' \describe{
#'   \item{\code{new(x, epsilon_step = NULL, verbose = NULL)}}{
#'      specify data; define targeting step size
#'   }
#'   \item{\code{fit_density(bin_width = .1, lambda_grid)}}{
#'      use `cvDensityHAL` to fit density. bin_width for pre-binning of
#'      continuous x; lambda_grid for grid search of lambda in HAL
#'   }
#' }
#' @importFrom R6 R6Class
#' @importFrom Matrix colSums
#' @export
avgDensityTMLE <- R6Class("avgDensityTMLE",
  public = list(
    x = NULL,
    p_hat = NULL,
    Psi = NULL,
    EIC = NULL,
    epsilon_step = 1e-3,
    tol = NULL,
    CI = NULL,
    verbose = FALSE,
    max_iter = 1e2,
    longDataOut = NULL,
    hal_best = NULL,
    HAL_tuned = NULL,

    p_hat_best = NULL,
    initialize = function(x, epsilon_step = NULL, verbose = NULL, max_iter = 1e2) {
      self$x <- x
      self$tol <- 1e-3 / length(x)
      self$max_iter <- max_iter
      if (!is.null(epsilon_step)) self$epsilon_step <- epsilon_step
      if (!is.null(verbose)) self$verbose <- verbose
    },
    fit_density = function(bin_width = .1, lambda_grid = NULL, M = NULL, n_fold = 3, ...) {
      use_penalized_mode <- !is.null(lambda_grid)
      use_constrained_mode <- !is.null(M)

      if (use_constrained_mode) {
        stop("not implemented")
      } else {
        hal_out <- self$fit_density_pen_likeli(
          bin_width = bin_width,
          lambda_grid = lambda_grid,
          n_fold = n_fold,
          ...
        )
      }
      yhat <- hal_out$predict(new_x = self$longDataOut$x)
      density_intial <- empiricalDensity$new(p_density = yhat, x = self$x)
      self$p_hat <- density_intial$normalize()
      self$hal_best <- hal_out
    },
    fit_density_pen_likeli = function(bin_width = .1, lambda_grid = NULL, lambda_min_ratio = NULL, n_fold = 3, ...) {
      self$longDataOut <- longiData$new(x = self$x, bin_width = bin_width)

      verbose <- FALSE
      # tune HAL for density
      cvHAL_fit <- cvDensityHAL$new(x = self$x, longiData = self$longDataOut)
      cvHAL_fit$assign_fold(n_fold = n_fold)
      cvHAL_fit$cv_lambda_grid(
        lambda_grid = lambda_grid, lambda_min_ratio = lambda_min_ratio, ...
      )
      hal_out <- cvHAL_fit$compute_model_full_data(cvHAL_fit$lambda.min)
      self$HAL_tuned <- hal_out$hal_fit
      return(hal_out)
    },
    compute_Psi = function(p_hat, to_return = FALSE) {
      # compute Psi; use x, p_hat
      dummy_df <- data.frame(
        id = 1:length(p_hat$x), x = p_hat$x, p_density = p_hat$p_density
      )
      dummy_df <- dummy_df[order(dummy_df$x), ]
      dx <- c(0, diff(dummy_df$x))
      Psi <- sum(dummy_df$p_density^2 * dx)
      if (to_return) return(Psi) else self$Psi <- Psi
    },
    compute_EIC = function(p_hat, Psi, to_return = FALSE) {
      # calc EIC
      EIC <- 2 * (p_hat$p_density - Psi)
      if (to_return) return(EIC) else self$EIC <- EIC
    },
    update_once = function() {
      # one iteration in onestep tmle
      if (mean(self$EIC) < 0) self$epsilon_step <- -self$epsilon_step
      self$p_hat$p_density <- self$p_hat$p_density * exp(self$epsilon_step * self$EIC)
      self$p_hat$normalize()
    },
    target_iterative = function(verbose = FALSE) {
      self$max_iter = 1e3
      n_iter <- 0
      absmeanEIC_prev <- abs(mean(self$EIC))
      absmeanEIC_current <- abs(mean(self$EIC))
      # initiate the best one
      meanEIC_best <- mean(self$EIC)
      self$p_hat_best <- self$p_hat$clone(deep = TRUE)
      while (absmeanEIC_current >= self$tol) {
        if (self$verbose | verbose) {
          df_debug <- data.frame(n_iter, mean(self$EIC), self$Psi)
          colnames(df_debug) <- NULL
          print(df_debug)
        }

        absmeanEIC_prev <- abs(mean(self$EIC))
        self$compute_Psi(self$p_hat, to_return = FALSE)
        self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
        # iterative update
        # I(xi in box) - pn(xi) ~ D*(pn)(xi) * pn(xi) * epsilon  -  intercept
        indicator_y = self$longDataOut$generate_df()$Y
        pn_x = self$hal_best$predict(self$longDataOut$grids)
        pn_x = rep(pn_x, length(self$x))
        D_pn = 2 * (pn_x - self$Psi)
        reg_var = D_pn * pn_x
        ols_fit = lm(indicator_y -  pn_x ~ -1 + reg_var)
        epsilon = ols_fit$coefficients
        self$p_hat$p_density <- self$p_hat$p_density * (1 + epsilon * self$EIC)
        self$p_hat$normalize()
        #
        absmeanEIC_current <- abs(mean(self$EIC))
        n_iter <- n_iter + 1
        if (absmeanEIC_current > absmeanEIC_prev) {
          # the update caused PnEIC to increase, change direction
          self$epsilon_step <- -self$epsilon_step
        }
        if (absmeanEIC_current < abs(meanEIC_best)) {
          # the update caused PnEIC to beat the current best
          # update our best candidate
          self$p_hat_best <- self$p_hat$clone(deep = TRUE)
          meanEIC_best <- mean(self$EIC)
        }
        if (n_iter >= self$max_iter) {
          break()
          message("max iteration number reached!")
        }
      }
      # always output the best candidate for final result
      self$p_hat <- self$p_hat_best
      self$compute_Psi(self$p_hat, to_return = FALSE)
      self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
      if (self$verbose | verbose) {
        message(paste(
          "Pn(EIC)=",
          formatC(meanEIC_best, format = "e", digits = 2),
          "Psi=",
          formatC(self$Psi, format = "e", digits = 2)
        ))
      }

    },
    target_onestep = function(verbose = FALSE) {
      # recursive targeting of onestep
      n_iter <- 0
      absmeanEIC_prev <- abs(mean(self$EIC))
      absmeanEIC_current <- abs(mean(self$EIC))
      # initiate the best one
      meanEIC_best <- mean(self$EIC)
      self$p_hat_best <- self$p_hat$clone(deep = TRUE)
      while (absmeanEIC_current >= self$tol) {
        if (self$verbose | verbose) {
          df_debug <- data.frame(n_iter, mean(self$EIC), self$Psi)
          colnames(df_debug) <- NULL
          print(df_debug)
        }
        absmeanEIC_prev <- abs(mean(self$EIC))
        self$compute_Psi(self$p_hat, to_return = FALSE)
        self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
        self$update_once()
        absmeanEIC_current <- abs(mean(self$EIC))
        n_iter <- n_iter + 1
        if (absmeanEIC_current > absmeanEIC_prev) {
          # the update caused PnEIC to increase, change direction
          self$epsilon_step <- -self$epsilon_step
        }
        if (absmeanEIC_current < abs(meanEIC_best)) {
          # the update caused PnEIC to beat the current best
          # update our best candidate
          self$p_hat_best <- self$p_hat$clone(deep = TRUE)
          meanEIC_best <- mean(self$EIC)
        }
        if (n_iter >= self$max_iter) {
          break()
          message("max iteration number reached!")
        }
      }
      # always output the best candidate for final result
      self$p_hat <- self$p_hat_best
      self$compute_Psi(self$p_hat, to_return = FALSE)
      self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
      if (self$verbose | verbose) {
        message(paste(
          "Pn(EIC)=",
          formatC(meanEIC_best, format = "e", digits = 2),
          "Psi=",
          formatC(self$Psi, format = "e", digits = 2)
        ))
      }
    },
    inference = function() {
      # generate CI using EIC
      sd_EIC <- sd(self$EIC)
      upper <- self$Psi + 1.96 / sqrt(length(self$EIC)) * sd_EIC
      lower <- self$Psi - 1.96 / sqrt(length(self$EIC)) * sd_EIC
      self$CI <- c(lower, upper)
    },
    compute_min_phi_ratio = function() {
      # return the ratio of 1 in the basis.
      # argmin over all columns where there are non-zero beta value (intercept excluded)
      Qbasis_list <- self$HAL_tuned$basis_list
      Qcopy_map <- self$HAL_tuned$copy_map
      # recover longitudinal form data. do weighted average by sample frequency
      df_compressed <- self$longDataOut$generate_df_compress(x = self$x)
      freq_weight <- df_compressed$Freq
      X <- df_compressed[, "box"]

      if (length(Qbasis_list) > 0) {
        x_basis <- hal9001::make_design_matrix(as.matrix(X), Qbasis_list)
        unique_columns <- as.numeric(names(Qcopy_map))
        # design matrix. each column correspond to Q_fit$coefs.
        # don't have intercept column
        x_basis <- x_basis[, unique_columns]
        phi_ratio <- Matrix::colSums(x_basis * freq_weight) / sum(freq_weight)

        beta_nonIntercept <- self$HAL_tuned$coefs[-1]
        beta_nonzero <- beta_nonIntercept != 0
        nonzeroBeta_phiRatio <- phi_ratio[beta_nonzero]
      } else {
        # there is no coef left
        nonzeroBeta_phiRatio <- numeric()
      }

      # return NULL if: "all beta are zero" OR "Qbasis has zero length"
      if (length(nonzeroBeta_phiRatio) != 0) {
        return(min(nonzeroBeta_phiRatio))
      } else {
        return(NULL)
      }
    }
  )
)
wilsoncai1992/TMLEbootstrap documentation built on Feb. 27, 2021, 7:57 a.m.