R/summary_functions.R

Defines functions estimate_bias_variance estimate_ci_coverage estimate_truth combine_estimates

Documented in combine_estimates estimate_bias_variance estimate_ci_coverage estimate_truth

#' Combine estimates from different pipelines for comparison
#'
#' @param truth tibble. Return value of `estimate_truth()`
#' @param iptw tibble. Return value of `iptw_pipeline()`
#' @param ice tibble. Return value of `ice_pipeline()`
#' @param or tibble. Return value of `or_pipeline()`
#' @param Tt max periods.
#'
#' @return tibble.
#' @export
#'
#' @examples
combine_estimates <- function(truth, iptw, ice, or, Tt) {

  result = tibble::tibble(t=1:Tt)

  if(!missing(truth)) result = dplyr::left_join(result, dplyr::rename(truth, truth=estimate), by='t')
  if(!missing(iptw)) result = dplyr::left_join(result, dplyr::rename(iptw, iptw=estimate), by='t')
  if(!missing(ice)) result = dplyr::left_join(result, dplyr::rename(ice, ice=estimate), by='t')
  if(!missing(or)) result = dplyr::left_join(result, dplyr::rename(or, or=estimate), by='t')

  return(result)
}


#' Estimate true values of the parameters from the simulated potential outcomes
#'
#' @param df_po data frame. Return value of `generate_data(potential_outcomes=TRUE)`
#' @param Tt max periods.
#'
#' @return tibble.
#' @export
#'
#' @examples
estimate_truth <- function(df_po, Tt, link_fun=NULL, binomial_n=1, type='differences') {

  row_freqs = rep(1, nrow(df_po))*binomial_n #frequecy weights applied to the rows of df_po. Return a column of 1's if not aggregate binomial data
  y_colnames = sapply(0:Tt, function(t) glue('Y{t}'))
  y_means = colSums(df_po[ , y_colnames ]) / sum(row_freqs)

  if(is.null(link_fun)) {
    link_fun = function(x) x
  } else {
    stopifnot(is.function(link_fun))
  }

  if(type=='differences') {
     return(
       tibble::tibble(t=1:Tt, estimate = diff(link_fun(y_means)))
     )
  } else if(type=='levels') {
    return(
      tibble::tibble(t=0:Tt, estimate = y_means)
    )
  } else {
    stop('type must be either "differences" or "levels"')
  }
}


#' Estimate confidence interval coverage for a normality-assuming interval based on the simulation variance.
#'
#' @param results_df Data frame with columns 'rep' and 'estimates' where 'estimates' is a list of tibble output results from either iptw_pipeline(), ice_pipeline(), or or_pipeline().
#' @param estimates_truth Data frame with columns 't' and 'estimate', with the 'true' value of E(Y_t(a)-Y_t-1(a)) for t=1,...,T
#' @param conf_level num. level of confidence, e.g. .95 is a 95% confidence interval
#'
#' @return data frame with coverage estimates
#' @export
#'
#' @examples
estimate_ci_coverage = function(results_df, estimates_truth, conf_level=0.95) {
  z = -qnorm((1 - conf_level)/2)
  results_df %>%
    tidyr::unnest(estimates) %>%
    dplyr::left_join(estimates_truth %>% dplyr::rename(truth=estimate)) %>%
    dplyr::group_by(t) %>%
    dplyr::mutate(lwr_normal = estimate - z*sd(estimate),
           upr_normal = estimate + z*sd(estimate),
           cover = 1*(lwr_normal < truth)*(truth < upr_normal))  %>%
    dplyr::summarise(coverage = mean(cover))
}


#' Estimate the bias and variance of DID g-formula estimators
#'
#' @param results_df Data frame with columns 'rep' and 'estimates' where 'estimates' is a list of tibble output results from either iptw_pipeline(), ice_pipeline(), or or_pipeline().
#' @param estimates_truth Data frame with columns 't' and 'estimate', with the 'true' value of E(Y_t(a)-Y_t-1(a)) for t=1,...,T
#'
#' @return data frame with bias and variance estimates
#' @export
#'
#' @examples
estimate_bias_variance = function(results_df, estimates_truth) {
  results_df %>%
    tidyr::unnest(estimates) %>%
    dplyr::left_join(estimates_truth %>% dplyr::rename(truth = estimate)) %>%
    dplyr::group_by(t) %>%
    dplyr::summarise(bias = mean(estimate) - mean(truth),
              variance = var(estimate))
}
audreyrenson/didgformula documentation built on Oct. 9, 2022, 11:45 a.m.