R/grab-methods.R

Defines functions grab_balance_table grab_balance_table grab_significance grab_significance grab_synthetic_control grab_synthetic_control grab_loss grab_loss grab_predictor_weights grab_predictor_weights grab_outcome grab_outcome grab_predictors grab_predictors grab_unit_weights grab_unit_weights

Documented in grab_balance_table grab_loss grab_outcome grab_predictors grab_predictor_weights grab_significance grab_synthetic_control grab_unit_weights

# All grab_ methods used in the synthetic_control package.


#' grab_unit_weights
#'
#' Extract the unit weights generated by `generate_weights()` from the synth pipeline.
#'
#' @param data nested data of type `tbl_df`
#' @param placebo boolean flag; if TRUE placebo values are returned as well (if
#'   available). Default is FALSE.
#'
#' @return tibble data frame
#' @export
#'
#' @examples
#'
#' \donttest{
#'
#' # Smoking example data
#' data(smoking)
#'
#' smoking_out <-
#' smoking %>%
#'
#' # initial the synthetic control object
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=TRUE) %>%
#'
#' # Generate the aggregate predictors used to generate the weights
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'
#'
#'   # Generate the fitted weights for the synthetic control
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6)
#'
#' # Grab the unit weights for the treated unit.
#' smoking_out %>% grab_unit_weights()
#'
#' # Grab the unit weights for the placebo units as well.
#' smoking_out %>% grab_unit_weights(placebo=TRUE)
#'
#' }
#'
grab_unit_weights <- function(data,placebo=FALSE){
  UseMethod("grab_unit_weights")
}

#' @export
grab_unit_weights <- function(data,placebo=FALSE){
  # Check if .unit_weights is in data.
  if(!(".unit_weights" %in% colnames(data))){
    stop("`.unit_weights` column is missing. Please run `generate_weights()` to generate this data field.")
  }

  if(placebo){

    data %>%
      dplyr::filter(.type=="controls") %>%
      dplyr::select(.id,.placebo,.unit_weights) %>%
      tidyr::unnest(.unit_weights)

  }else{

    data %>%
      dplyr::filter(.placebo==0) %>%
      dplyr::filter(.type=="controls") %>%
      dplyr::select(.unit_weights) %>%
      tidyr::unnest(.unit_weights)

  }
}



#' grab_predictors
#'
#' Extract the aggregate-level covariates generated by `generate_predictor()` from
#' the synth pipeline.
#'
#' @param data nested data of type `tbl_df`
#' @param type string specifying which version of the data to extract: "treated"
#'   or "control". Default is "treated".
#' @param placebo boolean flag; if TRUE placebo values are returned as well (if
#'   available). Default is FALSE.
#'
#' @return tibble data frame
#' @export
#'
#' @examples
#'
#' \donttest{
#'
#' # Smoking example data
#' data(smoking)
#'
#' smoking_out <-
#' smoking %>%
#'
#' # initial the synthetic control object
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=FALSE) %>%
#'
#' # Generate the aggregate predictors used to generate the weights
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'
#'
#'   # Generate the fitted weights for the synthetic control
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6) %>%
#'
#'   # Generate the synthetic control
#'   generate_control()
#'
#' # Grab predictors data frame for the treated unit
#' smoking_out %>% grab_predictors()
#'
#' # Grab predictors data frame for control units
#' smoking_out %>% grab_predictors(type="controls")
#'
#' }
#'
grab_predictors <- function(data,type="treated",placebo=FALSE){
  UseMethod("grab_predictors")
}

#' @export
grab_predictors <- function(data,type="treated",placebo=FALSE){

  # Checks
  if(!".predictors" %in% colnames(data)){ stop("Predictors must be generated prior to running using `generate_predictors()`.")}

  if(placebo){

    data %>%
      dplyr::filter(.type==type) %>%
      dplyr::select(.id,.placebo,.predictors) %>%
      tibble::as_tibble() %>%
      tidyr::unnest(cols=c(.predictors))

  }else{

    data %>%
      dplyr::filter(.placebo==0) %>%
      dplyr::filter(.type==type) %>%
      dplyr::select(.predictors) %>%
      tibble::as_tibble() %>%
      tidyr::unnest(cols=c(.predictors))

  }
}


#' grab_outcome
#'
#' Extract a data frame containing the outcome variable from the synth pipline.
#'
#' @param data nested data of type `tbl_df`
#' @param type string specifying which version of the data to extract: "treated"
#'   or "control". Default is "treated".
#' @param placebo boolean flag; if TRUE placebo values are returned as well (if
#'   available). Default is FALSE.
#'
#' @return tibble data frame
#' @export
#'
#' @examples
#'
#' \donttest{
#'
#' # Smoking example data
#' data(smoking)
#'
#' smoking_out <-
#' smoking %>%
#'
#' # initial the synthetic control object
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=FALSE) %>%
#'
#' # Generate the aggregate predictors used to generate the weights
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'
#'
#'   # Generate the fitted weights for the synthetic control
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6) %>%
#'
#'   # Generate the synthetic control
#'   generate_control()
#'
#' # Grab outcome data frame for the treated unit
#' smoking_out %>% grab_outcome()
#'
#' # Grab outcome data frame for control units
#' smoking_out %>% grab_outcome(type="controls")
#'
#' }
#'
grab_outcome <- function(data,type="treated",placebo=FALSE){
  UseMethod("grab_outcome")
}

#' @export
grab_outcome <- function(data,type="treated",placebo=FALSE){

  if(placebo){

    data %>%
      dplyr::filter(.type==type) %>%
      dplyr::select(.id,.placebo,.outcome) %>%
      tibble::as_tibble() %>%
      tidyr::unnest(cols=c(.outcome))

  }else{

    data %>%
      dplyr::filter(.placebo==0) %>%
      dplyr::filter(.type==type) %>%
      dplyr::select(.outcome) %>%
      tibble::as_tibble() %>%
      tidyr::unnest(cols=c(.outcome))

  }

}


#' grab_predictor_weights
#'
#' Extract the predictor variable weights generated by `generate_weights()` from the
#' synth pipeline.
#'
#' @param data nested data of type `tbl_df`
#' @param placebo boolean flag; if TRUE placebo values are returned as well (if
#'   available). Default is FALSE.
#'
#' @return tibble data frame
#' @export
#'
#' @examples
#' \donttest{
#'
#' # Smoking example data
#' data(smoking)
#'
#' smoking_out <-
#' smoking %>%
#'
#' # initial the synthetic control object
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=TRUE) %>%
#'
#' # Generate the aggregate predictors used to generate the weights
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'
#'
#'   # Generate the fitted weights for the synthetic control
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6) %>%
#'
#'   # Generate the synthetic control
#'   generate_control()
#'
#' # Grab the predictor weights data frame for the treated unit.
#' smoking_out %>% grab_predictor_weights()
#'
#' # Grab the predictor weights data frame for the placebo units as well.
#' smoking_out %>% grab_predictor_weights(placebo=TRUE)
#'
#' }
#'
grab_predictor_weights <- function(data,placebo=FALSE){
  UseMethod("grab_predictor_weights")
}

#' @export
grab_predictor_weights <- function(data,placebo=FALSE){


  # Check if .predictor_weights is in data.
  if(!(".predictor_weights" %in% colnames(data))){
    stop("`.predictor_weights` column is missing. Please run `generate_weights()` to generate this data field.")
  }

  if(placebo){

    data %>%
      dplyr::filter(.type=="controls") %>%
      dplyr::select(.id,.placebo,.predictor_weights) %>%
      tibble::as_tibble() %>%
      tidyr::unnest(.predictor_weights)

  } else {

    data %>%
      dplyr::filter(.placebo==0) %>%
      dplyr::filter(.type=="controls") %>%
      dplyr::select(.predictor_weights) %>%
      tibble::as_tibble() %>%
      tidyr::unnest(.predictor_weights)

  }

}

#' grab_loss
#'
#' Extract the RMSE loss of the optimized weights from the synth pipeline.
#'
#' @param data nested data of type `tbl_df`
#'
#' @return tibble data frame
#' @export
#'
#' @examples
#'
#' \donttest{
#'
#' # Smoking example data
#' data(smoking)
#'
#' smoking_out <-
#' smoking %>%
#'
#' # initial the synthetic control object
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=TRUE) %>%
#'
#' # Generate the aggregate predictors used to generate the weights
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'
#'
#'   # Generate the fitted weights for the synthetic control
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6) %>%
#'
#'   # Generate the synthetic control
#'   generate_control()
#'
#' # grab the MSPE loss from the optimization of the weights.
#' smoking_out %>% grab_loss()
#'
#' }
#'
grab_loss <- function(data){
  UseMethod("grab_loss")
}

#' @export
grab_loss <- function(data){
  # Check if .loss is in data.
  if(!(".loss" %in% colnames(data))){
    stop("`.loss` column has been removed. Please run `generate_weights()` to generate this data field.")
  }

  data %>%
    dplyr::select(.id,.placebo,.loss) %>%
    tibble::as_tibble() %>%
    tidyr::unnest(cols = c(.loss)) %>%
    dplyr::distinct()
}


#' grab_synthetic_control
#'
#' Extract the synthetic control as a data frame generated using
#' `generate_control()` from the synth pipeline.
#'
#' @param data nested data of type `tbl_df`
#' @param placebo boolean flag; if TRUE placebo values are returned as well (if
#'   available). Default is FALSE.
#'
#' @return tibble data frame
#' @export
#'
#' @examples
#'
#' \donttest{
#'
#' # Smoking example data
#' data(smoking)
#'
#' smoking_out <-
#' smoking %>%
#'
#' # initial the synthetic control object
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=TRUE) %>%
#'
#' # Generate the aggregate predictors used to generate the weights
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'
#'
#'   # Generate the fitted weights for the synthetic control
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6) %>%
#'
#'   # Generate the synthetic control
#'   generate_control()
#'
#' # Grab a data frame containing the observed outcome and the synthetic control outcome
#' smoking_out %>% grab_synthetic_control()
#'
#'
#' # Grab the data frame with the placebos.
#' smoking_out %>% grab_synthetic_control(placebo=TRUE)
#'
#' }
#'
grab_synthetic_control <- function(data,placebo=FALSE){
  UseMethod("grab_synthetic_control")
}

#' @export
grab_synthetic_control <- function(data,placebo=FALSE){

  # Check if .synthetic_control is in data.
  if(!(".synthetic_control" %in% colnames(data))){
    stop("`.synthetic_control` column is missing. Please run `generate_control()` to generate this data field.")
  }

  if(placebo){
    data %>%
      dplyr::filter(.type=="treated") %>%
      dplyr::select(.id,.placebo,.synthetic_control) %>%
      tidyr::unnest(.synthetic_control)
  }else{
    data %>%
      dplyr::filter(.placebo==0,.type=="treated") %>%
      dplyr::select(.synthetic_control) %>%
      tidyr::unnest(.synthetic_control)
  }

}



#' grab_significance
#'
#' Generate inferential statistics comparing the rarety of the unit that
#' actually received the intervention to the placebo units in the donor pool.
#'
#' Inferential statitics are generated by comparing the observed difference
#' between the actual treated unit and its synthetic control to each placebo
#' unit and its synthetic control. The rarity of the actual to the placebo is
#' used to infer the likelihood of observing the effect.
#'
#' Inference in this framework leverages the mean squared predictive error
#' (MSPE) of the fit in the pre-period to the fit in the post-period as a ratio.
#'
#' \deqn{\frac{RMSE_{Post}}{RMSE_{Pre}}}
#'
#' The ratio captures the differences between the pre-intervention fit and the
#' post-intervention divergence of the trend (i.e. the causal quantity). A good
#' fit in the pre-period denotes that the observed and synthetic case tracked
#' well together. Divergence in the post-period captures the difference brought
#' about by the intervention in the two trends. Thus, when the ratio is high, we
#' observe more of a difference between the two trends. If, however, the
#' pre-period fit is poor, or there is not substantial divergence in the
#' post-period, then this ratio amount will be smaller.
#'
#' The Fisher's Exact P-Value is generated by ranking the ratios for the treated
#' and placebo units. The P-Value is then calculated by dividing the rank of the
#' case over the total (rank/total). The case with the highest RMSE ratio is
#' rare given the distribution of cases as generated by the placebo. A more
#' detailed outline of inference within the synthetic control framework can be
#' found in Adabie et al. 2010.
#'
#' Note that conventional significance levels are not achievable if there is an
#' insufficient number of control cases. One needs at least 20 control case to
#' use the conventional .05 level. With fewer cases, significance levels need to
#' be adjusted to accommodate the low total rank. This is a bug of rank based
#' significance metrics.
#'
#' In addition to the Fisher's Precise P-Value, a Z-score is also included,
#' which is just the standardized RMSE ratios for all the cases. The Z-Score
#' captures the degree to which a particular case's RMSE ratio deviates from the
#' distribution of the placebo cases.
#'
#'
#' @param data nested data of type `tbl_df`
#' @param time_window time window that the significance values should be
#'   computed.
#'
#' @return tibble data frame containing the following fields:
#'
#'   - `unit_name`: name of the unit
#'
#'   - `type`: treated or donor unit (placebo)
#'
#'   - `pre_mspe`: pre-intervention period means squared predictive error
#'
#'   - `post_mspe`: post-intervention period means squared predictive error
#'
#'   - `mspe_ratio`: post_mspe/pre_mspe; captures the difference in fit in the
#'   pre and post period. A good fit in the pre-period and a poor fit in the
#'   post-period reflects a meaningful effect when comparing the difference
#'   between the observed outcome and the synthetic control.
#'
#'   - `rank`: rank order of the mspe_ratio.
#'
#'   - `fishers_exact_pvalue`: rank/total to generate a p-value. Conventional
#'   levels aren't achievable if there isn't a sufficient number of controls to
#'   generate a large enough ranking. Need at least 20 control units to use the
#'   conventional .05 level.
#'
#'   - `z_score`: (mspe_ratio-mean(mspe_ratio))/sd(mspe_ratio); captures the
#'   degree to which the mspe_ratio of the treated unit deviates from the mean
#'   of the placebo units. Provinding an alternative significance determination.
#'
#' @export
#'
#' @examples
#'
#' \donttest{
#'
#' # Smoking example data
#' data(smoking)
#'
#' smoking_out <-
#' smoking %>%
#'
#' # initial the synthetic control object
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=FALSE) %>%
#'
#' # Generate the aggregate predictors used to generate the weights
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'
#'
#'   # Generate the fitted weights for the synthetic control
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6) %>%
#'
#'   # Generate the synthetic control
#'   generate_control()
#'
#' # Plot the observed and synthetic trend
#' smoking_out %>% grab_significance(time_window = 1970:2000)
#'
#' }
#'
grab_significance <- function(data,time_window = NULL){
  UseMethod("grab_significance")
}

#' @export
grab_significance <- function(data,time_window = NULL){

  # Check if .synthetic_control is in data.
  if(!(".synthetic_control" %in% colnames(data))){stop("`.synthetic_control` column is missing. Please run `generate_control()` to generate this data field.")}
  if(!(".meta" %in% colnames(data))){ stop("`.meta` column is missing. `.meta` column needs to be included for `generte_control()` to work.")}


  # Grab meta data
  trt_time <- data$.meta[[1]]$treatment_time
  time_index <- data$.meta[[1]]$time_index

  # If no time window is specified for the table, calculate the entire series
  if(is.null(time_window)){
    time_window <- unique(data$.original_data[[1]][[time_index]])
  }else{
    if(max(time_window) <= trt_time){
      stop("The specified time window must contain post-treatment time units to calculate the post-MSPE. The current window only contains pretreatment periods. Please adjust.")
    }
  }

  # Formulate the output data using the donor and treated synthetic controls
  data %>%
    grab_synthetic_control(placebo = TRUE) %>%
    dplyr::filter(time_unit %in% time_window) %>%
    dplyr::group_by(.id, period = ifelse(time_unit <= trt_time,"pre_mspe","post_mspe"))  %>%
    dplyr::summarize(.placebo = mean(.placebo),
                     mspe = sum((real_y - synth_y)^2)/dplyr::n(),.groups='drop') %>%
    tidyr::pivot_wider(names_from = period,values_from = mspe) %>%
    dplyr::mutate(mspe_ratio = post_mspe/pre_mspe) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(dplyr::desc(mspe_ratio)) %>%
    dplyr::mutate(rank = dplyr::row_number(),
                  fishers_exact_pvalue = rank / max(rank),
                  z_score = (mspe_ratio-mean(mspe_ratio))/stats::sd(mspe_ratio),
                  type = ifelse(.placebo==0,"Treated","Donor")) %>%
    dplyr::select(unit_name=.id,type,pre_mspe,post_mspe,dplyr::everything(),-.placebo)
}



#' grab_balance_table
#'
#' Compare the distributions of the aggregate-level predictors for the observed
#' intervention unit, the synthetic control, and the donor pool average. Table
#' helps user compare the the level of balance produced by the synthetic
#' control.
#'
#' @param data nested data of type `tbl_df`
#'
#' @return tibble data frame containing balance statistics between the
#'   observed/synthetic unit and the donor pool for each variable used to fit
#'   the synthetic control.
#'
#' @export
#'
#' @examples
#'
#' \donttest{
#' data(smoking)
#' smoking_out <-
#' smoking %>%
#' synthetic_control(outcome = cigsale,
#'                   unit = state,
#'                   time = year,
#'                   i_unit = "California",
#'                   i_time = 1988,
#'                   generate_placebos=FALSE) %>%
#'   generate_predictor(time_window=1980:1988,
#'                      lnincome = mean(lnincome, na.rm = TRUE),
#'                      retprice = mean(retprice, na.rm = TRUE),
#'                      age15to24 = mean(age15to24, na.rm = TRUE)) %>%
#'   generate_predictor(time_window=1984:1988,
#'                      beer = mean(beer, na.rm = TRUE)) %>%
#'   generate_predictor(time_window=1975,
#'                      cigsale_1975 = cigsale) %>%
#'   generate_predictor(time_window=1980,
#'                      cigsale_1980 = cigsale) %>%
#'   generate_predictor(time_window=1988,
#'                      cigsale_1988 = cigsale) %>%
#'   generate_weights(optimization_window =1970:1988,
#'                    Margin.ipop=.02,Sigf.ipop=7,Bound.ipop=6) %>%
#'   generate_control()
#'
#' smoking_out %>% grab_balance_table()
#'
#' }
#'
grab_balance_table <- function(data){
  UseMethod("grab_balance_table")
}

#' @export
grab_balance_table <- function(data){

  # Checks
  if(!(".meta" %in% colnames(data))){ stop("`.meta` column is missing. `.meta` column needs to be included for `generte_control()` to work.")}
  if(!(".unit_weights" %in% colnames(data))){ stop("`.unit_weights` column is missing. Run `generate_weights()` prior to running this function.")}
  if(!(".predictor_weights" %in% colnames(data))){ stop("`.predictor_weights` column is missing. Run `generate_weights()` prior to running this function.")}


  # Treated mean values
  treated_values <-
    data %>%
    grab_predictors(type="treated",placebo = FALSE)

  # Synthetic Control Weighted Values
  control_values =
    data %>%
    grab_predictors(type="controls") %>%
    tidyr::gather(unit,value,-variable) %>%
    dplyr::left_join(grab_unit_weights(data),by="unit") %>%
    dplyr::mutate(value_adjusted = value*weight) %>%
    dplyr::group_by(variable) %>%
    dplyr::summarize(synthetic_values = sum(value_adjusted),.groups='drop')
  colnames(control_values)[2] = paste0("synthetic_",colnames(treated_values)[2])

  # Donor mean values
  donor_values <-
    data %>%
    grab_predictors(type="controls") %>%
    tidyr::gather(unit,value,-variable) %>%
    dplyr::group_by(variable) %>%
    dplyr::summarize(donor_sample = mean(value),.groups='drop')

  # Combine as a table
  table_ <-
    dplyr::left_join(treated_values,control_values,by='variable') %>%
    dplyr::left_join(donor_values,by='variable')

  return(table_)
}

Try the tidysynth package in your browser

Any scripts or data that you put into this service are public.

tidysynth documentation built on May 31, 2023, 6:13 p.m.