library(R6)
library(hal9001)
#' onestep TMLE of average density parameter
#'
#' @docType class
#' @keywords data
#' @return Object of \code{\link{R6Class}} with methods
#' @format \code{\link{R6Class}} object.
#' @examples
#' # avgDensityTMLE$new()
#' @field x random sample from distribution
#' @field p_hat (\code{empiricalDensity}) containing density estimates
#' @field Psi paramter value
#' @field EIC vector of EIC
#' @field epsilon_step step size for one-step targeting
#' @field CI (numeric vector) length 2; lower + upper CI
#' @field longDataOut (\code{longiData}) for transforming x into longitudinal format dataframe
#' @field HAL_tuned (\code{hal9001}) generated by \code{cvDensityHAL} class.
#' @section Methods:
#' \describe{
#' \item{\code{new(x, epsilon_step = NULL, verbose = NULL)}}{
#' specify data; define targeting step size
#' }
#' \item{\code{fit_density(bin_width = .1, lambda_grid)}}{
#' use `cvDensityHAL` to fit density. bin_width for pre-binning of
#' continuous x; lambda_grid for grid search of lambda in HAL
#' }
#' }
#' @importFrom R6 R6Class
#' @importFrom Matrix colSums
#' @export
avgDensityTMLE <- R6Class("avgDensityTMLE",
public = list(
x = NULL,
p_hat = NULL,
Psi = NULL,
EIC = NULL,
epsilon_step = 1e-3,
tol = NULL,
CI = NULL,
verbose = FALSE,
max_iter = 1e2,
longDataOut = NULL,
hal_best = NULL,
HAL_tuned = NULL,
p_hat_best = NULL,
initialize = function(x, epsilon_step = NULL, verbose = NULL, max_iter = 1e2) {
self$x <- x
self$tol <- 1e-3 / length(x)
self$max_iter <- max_iter
if (!is.null(epsilon_step)) self$epsilon_step <- epsilon_step
if (!is.null(verbose)) self$verbose <- verbose
},
fit_density = function(bin_width = .1, lambda_grid = NULL, M = NULL, n_fold = 3, ...) {
use_penalized_mode <- !is.null(lambda_grid)
use_constrained_mode <- !is.null(M)
if (use_constrained_mode) {
stop("not implemented")
} else {
hal_out <- self$fit_density_pen_likeli(
bin_width = bin_width,
lambda_grid = lambda_grid,
n_fold = n_fold,
...
)
}
yhat <- hal_out$predict(new_x = self$longDataOut$x)
density_intial <- empiricalDensity$new(p_density = yhat, x = self$x)
self$p_hat <- density_intial$normalize()
self$hal_best <- hal_out
},
fit_density_pen_likeli = function(bin_width = .1, lambda_grid = NULL, lambda_min_ratio = NULL, n_fold = 3, ...) {
self$longDataOut <- longiData$new(x = self$x, bin_width = bin_width)
verbose <- FALSE
# tune HAL for density
cvHAL_fit <- cvDensityHAL$new(x = self$x, longiData = self$longDataOut)
cvHAL_fit$assign_fold(n_fold = n_fold)
cvHAL_fit$cv_lambda_grid(
lambda_grid = lambda_grid, lambda_min_ratio = lambda_min_ratio, ...
)
hal_out <- cvHAL_fit$compute_model_full_data(cvHAL_fit$lambda.min)
self$HAL_tuned <- hal_out$hal_fit
return(hal_out)
},
compute_Psi = function(p_hat, to_return = FALSE) {
# compute Psi; use x, p_hat
dummy_df <- data.frame(
id = 1:length(p_hat$x), x = p_hat$x, p_density = p_hat$p_density
)
dummy_df <- dummy_df[order(dummy_df$x), ]
dx <- c(0, diff(dummy_df$x))
Psi <- sum(dummy_df$p_density^2 * dx)
if (to_return) return(Psi) else self$Psi <- Psi
},
compute_EIC = function(p_hat, Psi, to_return = FALSE) {
# calc EIC
EIC <- 2 * (p_hat$p_density - Psi)
if (to_return) return(EIC) else self$EIC <- EIC
},
update_once = function() {
# one iteration in onestep tmle
if (mean(self$EIC) < 0) self$epsilon_step <- -self$epsilon_step
self$p_hat$p_density <- self$p_hat$p_density * exp(self$epsilon_step * self$EIC)
self$p_hat$normalize()
},
target_iterative = function(verbose = FALSE) {
self$max_iter = 1e3
n_iter <- 0
absmeanEIC_prev <- abs(mean(self$EIC))
absmeanEIC_current <- abs(mean(self$EIC))
# initiate the best one
meanEIC_best <- mean(self$EIC)
self$p_hat_best <- self$p_hat$clone(deep = TRUE)
while (absmeanEIC_current >= self$tol) {
if (self$verbose | verbose) {
df_debug <- data.frame(n_iter, mean(self$EIC), self$Psi)
colnames(df_debug) <- NULL
print(df_debug)
}
absmeanEIC_prev <- abs(mean(self$EIC))
self$compute_Psi(self$p_hat, to_return = FALSE)
self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
# iterative update
# I(xi in box) - pn(xi) ~ D*(pn)(xi) * pn(xi) * epsilon - intercept
indicator_y = self$longDataOut$generate_df()$Y
pn_x = self$hal_best$predict(self$longDataOut$grids)
pn_x = rep(pn_x, length(self$x))
D_pn = 2 * (pn_x - self$Psi)
reg_var = D_pn * pn_x
ols_fit = lm(indicator_y - pn_x ~ -1 + reg_var)
epsilon = ols_fit$coefficients
self$p_hat$p_density <- self$p_hat$p_density * (1 + epsilon * self$EIC)
self$p_hat$normalize()
#
absmeanEIC_current <- abs(mean(self$EIC))
n_iter <- n_iter + 1
if (absmeanEIC_current > absmeanEIC_prev) {
# the update caused PnEIC to increase, change direction
self$epsilon_step <- -self$epsilon_step
}
if (absmeanEIC_current < abs(meanEIC_best)) {
# the update caused PnEIC to beat the current best
# update our best candidate
self$p_hat_best <- self$p_hat$clone(deep = TRUE)
meanEIC_best <- mean(self$EIC)
}
if (n_iter >= self$max_iter) {
break()
message("max iteration number reached!")
}
}
# always output the best candidate for final result
self$p_hat <- self$p_hat_best
self$compute_Psi(self$p_hat, to_return = FALSE)
self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
if (self$verbose | verbose) {
message(paste(
"Pn(EIC)=",
formatC(meanEIC_best, format = "e", digits = 2),
"Psi=",
formatC(self$Psi, format = "e", digits = 2)
))
}
},
target_onestep = function(verbose = FALSE) {
# recursive targeting of onestep
n_iter <- 0
absmeanEIC_prev <- abs(mean(self$EIC))
absmeanEIC_current <- abs(mean(self$EIC))
# initiate the best one
meanEIC_best <- mean(self$EIC)
self$p_hat_best <- self$p_hat$clone(deep = TRUE)
while (absmeanEIC_current >= self$tol) {
if (self$verbose | verbose) {
df_debug <- data.frame(n_iter, mean(self$EIC), self$Psi)
colnames(df_debug) <- NULL
print(df_debug)
}
absmeanEIC_prev <- abs(mean(self$EIC))
self$compute_Psi(self$p_hat, to_return = FALSE)
self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
self$update_once()
absmeanEIC_current <- abs(mean(self$EIC))
n_iter <- n_iter + 1
if (absmeanEIC_current > absmeanEIC_prev) {
# the update caused PnEIC to increase, change direction
self$epsilon_step <- -self$epsilon_step
}
if (absmeanEIC_current < abs(meanEIC_best)) {
# the update caused PnEIC to beat the current best
# update our best candidate
self$p_hat_best <- self$p_hat$clone(deep = TRUE)
meanEIC_best <- mean(self$EIC)
}
if (n_iter >= self$max_iter) {
break()
message("max iteration number reached!")
}
}
# always output the best candidate for final result
self$p_hat <- self$p_hat_best
self$compute_Psi(self$p_hat, to_return = FALSE)
self$compute_EIC(self$p_hat, self$Psi, to_return = FALSE)
if (self$verbose | verbose) {
message(paste(
"Pn(EIC)=",
formatC(meanEIC_best, format = "e", digits = 2),
"Psi=",
formatC(self$Psi, format = "e", digits = 2)
))
}
},
inference = function() {
# generate CI using EIC
sd_EIC <- sd(self$EIC)
upper <- self$Psi + 1.96 / sqrt(length(self$EIC)) * sd_EIC
lower <- self$Psi - 1.96 / sqrt(length(self$EIC)) * sd_EIC
self$CI <- c(lower, upper)
},
compute_min_phi_ratio = function() {
# return the ratio of 1 in the basis.
# argmin over all columns where there are non-zero beta value (intercept excluded)
Qbasis_list <- self$HAL_tuned$basis_list
Qcopy_map <- self$HAL_tuned$copy_map
# recover longitudinal form data. do weighted average by sample frequency
df_compressed <- self$longDataOut$generate_df_compress(x = self$x)
freq_weight <- df_compressed$Freq
X <- df_compressed[, "box"]
if (length(Qbasis_list) > 0) {
x_basis <- hal9001::make_design_matrix(as.matrix(X), Qbasis_list)
unique_columns <- as.numeric(names(Qcopy_map))
# design matrix. each column correspond to Q_fit$coefs.
# don't have intercept column
x_basis <- x_basis[, unique_columns]
phi_ratio <- Matrix::colSums(x_basis * freq_weight) / sum(freq_weight)
beta_nonIntercept <- self$HAL_tuned$coefs[-1]
beta_nonzero <- beta_nonIntercept != 0
nonzeroBeta_phiRatio <- phi_ratio[beta_nonzero]
} else {
# there is no coef left
nonzeroBeta_phiRatio <- numeric()
}
# return NULL if: "all beta are zero" OR "Qbasis has zero length"
if (length(nonzeroBeta_phiRatio) != 0) {
return(min(nonzeroBeta_phiRatio))
} else {
return(NULL)
}
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.