rollmatch: Rolling Entry Matching

Documented in rollmatch

#' Rolling entry matching
#'
#' \code{rollmatch} is the last of 3 main functions in the rollmatch package
#' \emph{<rollmatch>} implements a comparison group selection
#' methodology for interventions with rolling participant entry over time.
#' A difficulty in evaluating rolling entry interventions is that a suitable
#' "entry" date is not observed for non-participants. This method, called
#' rolling entry matching, assigns potential comparison non-participants
#' multiple counterfactual entry periods which allows for matching of
#' participant and non-participants based on data immediately preceding each
#' participant's specific entry period, rather than using data from a fixed
#' pre-intervention period.
#'
#' Rolling entry matching requires preliminary steps. This package will 
#' assist the user in steps 2 and 3. First, a quasi-panel dataset
#' is constructed containing multiple observations of non-participants (one for
#' each entry period). Participants enter the data once in the baseline period
#' immediately preceding their unique entry into the intervention. Time-varying
#' covariates (e.g., health conditions, spending, utilization) are dynamic for
#' each entry period's non-participant observations. The user of rollmatch
#' is expected to have already created this quasi-panel dataset. Second, the
#' pool of potential comparisons for each participant is restricted to those
#' that have the same "entry period" into the intervention
#' (see function "reduce_data"). Finally, a predicted
#' probability of treatment is obtained for participants and non-participants
#' (e.g. through propensity score matching). The user can use function
#' "score_data" to complete this step, or create use their own propensity score 
#' calculation. 
#' 
#' The final step consists of the matching algorithm. The algorithm selects
#' the best matched comparison(s) for each participant from the pool of
#' non-participants with the same entry period. This is completed via the
#' function "rollmatch". 
#' 
#' @param scored_data Output from scored_data() or the output from
#' reduce_data() with propensity scores labeled "score".
#' @param data Original dataset before reduce_data() was ran.
#' @param treat String for name of treatment variable in data.
#' @param tm String for time period indicator variable name in data.
#' @param entry String for name of time period in which the participant
#' enrolled in the intervention (in the same units as the tm variable).
#' @param id String for individual id variable name in data.
#' @param vars Vector of column names used in the propensity score algorithm.
#' This is used when creating the balance table. 
#' @param lookback The number of time periods to look back before the
#' time period of enrollment (1-...).
#' @param alpha Part of the pre-specified distance within which to allow
#' matching. The caliper width is calculated as the \code{alpha} multiplied by
#' the pooled standard deviation of the propensity scores or the logit of the
#' propensity scores - depending on the value of \code{match_on}.
#' @param standard_deviation String. 'average' for average pooled standard
#' deviation, 'weighted' for weighted pooled standard deviation, and
#' 'None' to not use a standard deviation multiplication. Default is "average".
#' @param num_matches Number of comparison beneficiary matches to attempt
#' to assign to each treatment beneficiary. Default is 3. 
#' @param replacement Assign comparison beneficiaries with replacement (TRUE)
#' or without replacement (FALSE). If \code{replacement} is TRUE, then
#' comparison beneficiaries will be allowed to be used with replacement within
#' a single quarter, but will not be allowed to match to different treatment
#' beneficiaries across multiple quarters. Default is TRUE.
#'
#' @examples
#' data(package="rollmatch", "rem_synthdata_small")
#' reduced_data <- reduce_data(data = rem_synthdata_small, treat = "treat",
#'                             tm = "quarter", entry = "entry_q",
#'                             id = "indiv_id", lookback = 1)
#' fm <- as.formula(treat ~ qtr_pmt + yr_pmt + age)
#' vars <- all.vars(fm)
#' scored_data <- score_data(reduced_data = reduced_data,
#'                           model_type = "logistic", match_on = "logit",
#'                           fm = fm, treat = "treat",
#'                           tm = "quarter", entry = "entry_q", id = "indiv_id")
#' output <- rollmatch(scored_data, data=rem_synthdata_small, treat = "treat",
#'                     tm = "quarter", entry = "entry_q", id = "indiv_id",
#'                     vars = vars, lookback = 1, alpha = .2,
#'                     standard_deviation = "average", num_matches = 3,
#'                     replacement = TRUE)
#' output
#'
#' @return \code{rollmatch} returns an object of class "rollmatch".
#'
#' An object of class "rollmatch" is a list containing the following components:
#' \item{model}{The output of the model used to estimate the distance measure.}

#' \item{scores}{The propensity scores used in the matching algorithm. }
#' \item{data}{The original dataset with all matches added.}
#' \item{summary}{A basic summary table with counts of matched and unmatched
#' data.}
#' \item{ids_not_matched}{A vector of the treatment IDs that were not matched.}
#' \item{total_not_matched}{The number of treatment IDs not matched.}
#' \item{matched_data}{R data.frame of matches with scores, matching information,
#' and the weights of the individuals}
#' 
#' \item{balance}{table showing the full treatment, full control, matched
#' treatment, and matched comparison group means and standard deviations for
#' the variables used in the model.}
#'
#' @importFrom stats ave aggregate binomial glm qlogis reshape time var
#' @import dplyr
#' @import magrittr
#' @export 
#'
rollmatch <- function(scored_data, data, treat, tm, entry, id, vars, lookback,
                      alpha = 0, standard_deviation = "average",
                      num_matches = 3, replacement=TRUE) {

  run_checks_one(scored_data, treat, tm, entry, id)
  run_checks_two(scored_data, alpha, standard_deviation, num_matches,
                 replacement)
  check_lookback(data, lookback = lookback, entry = entry)

  if (is.data.frame(scored_data) == FALSE){
    stop("'data' input parameter must be a data.frame")
  } else if (!("score" %in% colnames(scored_data)) ){
    stop("Column 'score' must be in variable 'scored_data'")
  }

  # Create pool of possible matches
  comparison_pool <- compare_pool(scored_data, treat, tm, entry, id)
  # Trim pool based on specified caliper
  trimmed_pool <- trim_pool(alpha = alpha, comparison_pool = comparison_pool,
                            scored_data = scored_data, treat = treat, tm = tm,
                            standard_deviation = standard_deviation)

  # Using matching algorithm to find top matches
  matches <- create_matches(trimmed_pool, tm, num_matches, replacement)
  # Add additional clmns: total_matches, treatment_weight, control_matches
  matches <- add_matches_columns(matches)
  # Combine datasets and values in preperation for output
  combined_output <- make_output(scored_data = scored_data,
                                 data = data, matches = matches,
                                 treat = treat, tm = tm, entry = entry,
                                 id = id, lookback = lookback)
  # Add balance table to the output
  output <- add_balance_table(scored_data = scored_data, vars = vars, tm = tm,
                              id = id, combined_output = combined_output,
                              treat = treat, matches = matches)
  # Set the class
  class(output) <- "rollmatch"

  return(output)
}