R/Repeated.R
In AIPW: Augmented Inverse Probability Weighting

#' @title Repeated Crossfitting Procedure for AIPW
#'
#' @description An R6Class that allows repeated crossfitting procedure for an [AIPW] object
#'
#' @importFrom stats predict
#' @importFrom utils head
#' @importFrom future.apply future_lapply
#' @importFrom progressr progressor
#'
#' @details
#' See examples for illustration.
#'
#' @section Constructor:
#' \code{Repeated$new(aipw_obj = NULL)}
#'
#' ## Constructor Arguments
#' \tabular{lll}{
#' \strong{Argument}      \tab   \strong{Type}     \tab     \strong{Details} \cr
#' \code{aipw_obj}               \tab   [AIPW] object       \tab    an [AIPW] object \cr
#' }
#'
#'
#' @section Public Methods:
#'  \tabular{lll}{
#'  \strong{Methods}      \tab   \strong{Details}                                      \tab \strong{Link}     \cr
#'  \code{repfit()}          \tab   Fit the data to the [AIPW] object `num_reps` times  \tab   [repfit.Repeated]  \cr
#'  \code{summary_median()}      \tab   Summary (median) of estimates from the `repfit()`    \tab   [summary_median.Repeated]\cr
#'  }
#'
#' @section Public Variables:
#'  \tabular{lll}{
#'  \strong{Variable}     \tab   \strong{Generated by}      \tab     \strong{Return} \cr
#'  \code{repeated_estimates}      \tab   `repfit()`                \tab     A data.frame of estiamtes form `num_reps` cross-fitting \cr
#'  \code{repeated_results}      \tab   `summary_median()`                \tab     A list of sumarised estimates \cr
#'  \code{result}         \tab   `summary_median()`                \tab     A data.frame of sumarised estimates \cr
#'  }
#'
#' ## Public Variable Details
#' \describe{
#'    \item{\code{repeated_estimates}}{Estimates from `num_reps` cross-fitting.}
#'    \item{\code{result}}{Summarised estimates from ``repeated_estimates` using median methods.}
#' }
#'
#' @return \code{AIPW} object
#'
#' @references Zhong Y, Kennedy EH, Bodnar LM, Naimi AI (2021). AIPW: An R Package for Augmented Inverse Probability Weighted Estimation of Average Causal Effects. \emph{American Journal of Epidemiology}.
#' @references Robins JM, Rotnitzky A (1995). Semiparametric efficiency in multivariate regression models with missing data. \emph{Journal of the American Statistical Association}.
#' @references Chernozhukov V, Chetverikov V, Demirer M, et al (2018). Double/debiased machine learning for treatment and structural parameters. \emph{The Econometrics Journal}.
#' @references Kennedy EH, Sjolander A, Small DS (2015). Semiparametric causal inference in matched cohort studies. \emph{Biometrika}.
#'
#'
#' @examples
#' library(SuperLearner)
#' library(ggplot2)
#'
#' #create an object
#' aipw_sl <- AIPW$new(Y=rbinom(100,1,0.5), A=rbinom(100,1,0.5),
#'                     W.Q=rbinom(100,1,0.5), W.g=rbinom(100,1,0.5),
#'                     Q.SL.library="SL.mean",g.SL.library="SL.mean",
#'                     k_split=2,verbose=FALSE)
#'
#' #create a repeated crossfitting object from the previous step
#' repeated_aipw_sl <- Repeated$new(aipw_sl)
#'
#' #fit repetitively (stratified = TRUE will use stratified_fit() method in AIPW class)
#' repeated_aipw_sl$repfit(num_reps = 3, stratified = FALSE)
#'
#' #summarise the results
#' repeated_aipw_sl$summary_median()
#'
#' @export
Repeated <- R6Class("RepeatedFit",
                          inherit = AIPW,
                          public = list(
                            #public variables
                            aipw_obj = NULL,
                            num_reps = NULL,
                            stratified_fitted = NULL,
                            repeated_estimates = NULL,
                            repeated_results = NULL,
                            result = NULL,

                            #constructor
                            initialize = function(aipw_obj) {
                              self$aipw_obj = aipw_obj
                              private$verbose = self$aipw_obj$.__enclos_env__$private$verbose
                              private$Y.type = self$aipw_obj$.__enclos_env__$private$Y.type
                              self$aipw_obj$.__enclos_env__$private$verbose = FALSE

                              #--check if future.apply is loaded otherwise lapply would be used.--#
                              if (any(names(sessionInfo()$otherPkgs) %in% c("future.apply"))){
                                private$use.f_lapply = TRUE
                              } else {
                                private$use.f_lapply = FALSE
                              }
                            },
                            #repfit method
                            repfit = function(num_reps, stratified){
                              self$num_reps = num_reps
                              self$stratified_fitted = stratified

                              iter = 1:self$num_reps
                              self$repeated_estimates =
                                private$.f_lapply(iter,function(i,...){
                                  if (self$stratified_fitted) {
                                    self$aipw_obj$stratified_fit()$summary()
                                  } else {
                                    self$aipw_obj$fit()$summary()
                                  }

                                  estimates = data.frame(self$aipw_obj$result[,1:2])
                                  estimates$Estimand = rownames(estimates)
                                  estimates$Estimand = factor(estimates$Estimand,
                                                               levels = estimates$Estimand)
                                  return(estimates)
                                })
                              names(self$repeated_estimates) = iter
                              self$repeated_estimates = data.frame(do.call(rbind, self$repeated_estimates))
                              colnames(self$repeated_estimates) = c("Estimate","SE","Estimand")

                            },

                            #summary_median method
                            summary_median = function(){
                              self$repeated_results = split(self$repeated_estimates, self$repeated_estimates$Estimand)
                              self$repeated_results = lapply(self$repeated_results,
                                                             function(x) {
                                                               if(unique(x$Estimand) %in% c("RR","OR")){
                                                                 x$Estimate = log(x$Estimate)
                                                                 median_adjusted = private$get_median_variance(x$Estimate,x$SE)
                                                                 median_adjusted[
                                                                   names(median_adjusted) %in%
                                                                     c("Median Estimate","95% LCL Median Estimate", "95% UCL Median Estimate")
                                                                   ] = exp(median_adjusted[
                                                                     names(median_adjusted) %in%
                                                                       c("Median Estimate","95% LCL Median Estimate", "95% UCL Median Estimate")
                                                                   ] )
                                                               } else{
                                                                 median_adjusted = private$get_median_variance(x$Estimate,x$SE)
                                                               }

                                                               return(median_adjusted)
                                                             })
                              self$result = do.call(rbind, self$repeated_results)

                              if (private$verbose){
                                print(self$result,digit=3)
                              }

                            }
                            ),

                       private = list(
                         get_median_variance = function(est, se){
                           est_median = median(est)
                           est_var = se^2
                           se_median = median(se)
                           est_median_var = median( est_var +  (est - est_median)^2)
                           est_median_se = sqrt(est_median_var)
                           ci_median_se = get_ci(est_median,est_median_se)
                           res = c(est_median,se_median,est_median_se,ci_median_se)
                           names(res) = c("Median Estimate", "Median SE", "SE of Median Estimate","95% LCL Median Estimate","95% UCL Median Estimate")

                           return(res)
                         }
                       )
)

#' @name repfit
#' @aliases repfit.Repeated
#' @title Fit the data to the [AIPW] object repeatedly
#'
#' @description
#' Fitting the data into the [AIPW] object with cross-fitting repeatedly to obtain multiple estimates from repetitions to avoid randomness due to splits in cross-fitting
#'
#' @section R6 Usage:
#' \code{$repfit(num_reps = 20, stratified = FALSE)}
#'
#' @param num_reps Integer. Number of repetition of cross-fitting procedures (`fit()` or `stratified_fit()` see blow).
#' @param stratified Boolean. `stratified = TRUE` will use `stratified_fit()` in the [AIPW] object to cross-fitting.
#'
#' @references Chernozhukov V, Chetverikov V, Demirer M, et al (2018). Double/debiased machine learning for treatment and structural parameters. \emph{The Econometrics Journal}.
#' @return A [Repeated] object with `repeated_estimates` (`estimates` from num_reps times repetition)
#'
#' @seealso [Repeated] and [AIPW]
NULL

#' @name summary_median
#' @aliases summary_median.Repeated
#' @title Summary of the `repeated_estimates` from `repfit()` in the [Repeated] object using median methods.
#'
#' @description
#' From `repeated_estimates`, calculate the median estimate (`median(Estimates)`), median SE (`median(SE)`), SE adjusting for variations across `num_reps` times,
#' and 95% CI using SE adjusting for SE adjusted for variability.
#'
#' @section R6 Usage:
#' \code{$summary_median.Repeated()}
#'
#' @references Chernozhukov V, Chetverikov V, Demirer M, et al (2018). Double/debiased machine learning for treatment and structural parameters. \emph{The Econometrics Journal}.
#'
#' @return `repeated_results` and `result` (public variables).
#'
#' @seealso [Repeated] and [AIPW]
NULL