R/predict_iii.R
In i3pack: Incentives for Inter- And Intra-Party Electoral Competition

Documented in predict_iii

#' Predict II Score for a given set of electoral rule configurations
#'
#' @param data A data.frame containing the following variables: `ballot_type` (factor),
#'  `pool_level` (factor), `votes_per_voter` (factor), `M` (numeric), `threshold` (numeric), and `formula` (factor). See [simulate_election()] for more details.
#' @param score Character string indicating type of score to predict; one of `TDE` (default) or `AP`.
#' @param district_level Boolean: Should district level, or country level models be used? If `TRUE` (default), the
#' function uses district level models, which are more accurate for district-level electoral systems. If `FALSE`, the 
#' function uses country-level models, which are more accurate for country-level electoral systems.
#' @param return_avg Boolean: Should the average score across imputed models be returned? The original models
#' were trained on millions of simulated elections, with intermediate values for some parameters interpolated using
#' 5 multiple imputations. If `TRUE` (default), the function returns the average score across all imputations.
#' If `FALSE`, the function returns a list of scores. 
#' @return Predicted TDE or AP score for given electoral system
#' @examples
#' ## Create example data for PR system with closed party lists,
#' ## magnitude 5, and Droop quota
#' new_system <- data.frame(ballot_type = as.factor("closed"),
#'                          pool_level = as.factor("party"),
#'                          votes_per_voter = as.factor("One"),
#'                          M = 5.0,
#'                          threshold = 0.05,
#'                          formula = as.factor("droop"))
#' predict_iii(data = new_system, score = "AP", district_level = FALSE)
#'
#' @export
#' @importFrom gbm predict.gbm
#' @importFrom readr read_rds
#'
predict_iii <- function(data,
                        score = c("TDE","AP"),
                        district_level = TRUE,
                        return_avg = TRUE){
  
  score <- match.arg(score)
  requireNamespace("gbm", quietly = TRUE)
  ## Set url's
  base_url <- "https://github.com/solivella/i3packModels/raw/refs/heads/main/"
  mod_url <- case_when(district_level == TRUE & score == "AP" ~ "ap_dist",
                       district_level == TRUE & score == "TDE" ~ "tde_dist",
                       district_level == FALSE & score == "AP" ~ "ap_ctry",
                       district_level == FALSE & score == "TDE" ~ "tde_ctry")
  full_url <- paste0(base_url, mod_url, ".rds")
  
  ## Dowload model object
  pred_gbm <- readr::read_rds(full_url)
  
  ## Check data object
  if(!all(c("ballot_type",
            "pool_level",
            "votes_per_voter",
            "M",
            "threshold",
            "formula") %in%
          names(data)))
  {
    stop("data does not include all necessary prediction variables (i.e. component rules). See help documentation for more details.")
  }
  implemented_formulas <- c("plurality", "abs. majority",
                            "dhondt", "hare", "droop",
                            "imperiali", "saintelague", "modsaintlague", "hagenbachbischoff",
                            "fortified_pr") 
  if(!all(data$formula %in% implemented_formulas)){
    stop(paste0("unrecognized values in 'formula'; must be one of ", implemented_formulas))
  }
  stopifnot("threshold is not numeric" = is.numeric(c(data$threshold)),
            "magnitude is not numeric" = is.numeric(c(data$M)),
            "unrecognized values in ballot_type; must be one of 'closed', 'flexible', or 'open'." = all(unique(c(data$ballot_type)) %in% c("closed", "flexible", "open")),
            "unrecognized values in pool_level; must be one of 'candidate', 'party', or 'party_list'." = all(unique(c(data$pool_level)) %in% c("candidate", "party", "party_list")),
            "unrecognized values in votes_per_voter; must be one of 'One', 'M', or 'M-1'." = all(unique(c(data$votes_per_voter)) %in% c("One", "M", "M-1")))
  
  ## Replace data names to match trained models
  names(data)[which(names(data)=="votes_per_voter")] <- "new.nvotes"
  
  ## Return predicted data
  ret_val <- lapply(pred_gbm,
                     function(x) {
                       gbm::predict.gbm(x, newdata = data, n.trees = x$n.trees)
                     })
  if(return_avg){
    ret_val <- Reduce("+", ret_val) / length(ret_val)
  } 
  return(ret_val)
}