R/confidence_score_evaluation.R
In aggreCAT: Mathematically Aggregating Expert Judgments

Documented in confidence_score_evaluation

#' @title
#' Confidence Score Evaluation
#'
#' @description
#' Evaluate the performance of the confidence scores generated by one or more aggregation
#' methods. Assumes probabilistic confidence scores for the metrics selected.
#'
#' @param confidence_scores A dataframe in the format output by the `aggreCAT::` aggregation methods
#' @param outcomes A dataframe with two columns: `paper_id` (corresponding to the id's from the
#' confidence_scores), and `outcome` containing the known outcome of replication studies
#'
#' @importFrom DescTools BrierScore
#' @importFrom MLmetrics Accuracy
#' @importFrom precrec auc
#' @importFrom precrec evalmod
#' 
#' @return
#' Evaluated dataframe with four columns: `method` (character variable describing the aggregation method),
#' `AUC` (Area Under the Curve (AUC) scores of ROC curves - see `?precrec::auc`), `Brier_Score` (see
#' `?DescTools::BrierScore`) and `Classification_Accuracy`(classification accuracy measured for pcc =
#' percent correctly classified; see `?MLmetrics::Accuracy`).
#'
#' @examples
#' \donttest{
#' confidence_score_evaluation(data_confidence_scores,
#'                             data_outcomes)
#' }
#'
#' @export

confidence_score_evaluation <- function(confidence_scores,
                                        outcomes){

  ## Combine inputs

  cs_df <- confidence_scores %>%
    dplyr::left_join(outcomes,
                     by = "paper_id")

  ## Add threshold prediction to be able to calculate Accuracy

  cs_df <- cs_df %>%
    dplyr::mutate(cl = as.numeric(cs > 0.5))

  ## Prediction evaluation

  eval_df <- cs_df %>%
    dplyr::filter(!is.na(outcome)) %>%
    dplyr::group_by(method) %>%
    dplyr::summarise(AUC = precrec::auc(precrec::evalmod(scores = cs,
                                                         labels = outcome))[1, 4],
                     Brier_Score = DescTools::BrierScore(x = outcome,
                                                         pred = cs),
                     Classification_Accuracy = MLmetrics::Accuracy(y_pred = cl,
                                                                   y_true = outcome))

  return(eval_df)

}