knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
In this vignette we show how we extracted incremental validity figures in Cohen's d that are reported in Table 2. We use the results from the 1000 resamples in the training set to compare the overall performance of a two competing models. The comparisons are of two types: - for the models with the same predictor set but using different algorithms (elastic net vs. random forest) - for models using elastic net between models with an incremetally larger set of predictors. (These results are presented in Table 2.)
We use McFadden's pseudo R-squared as a measure of overall performance but transform these values to d before comparing models.
library(recidivismsl) library(dplyr)
We will use this function to compare an incremental model to a simpler model. It will:
- extract the columns in the values
element of model_perfs_training_set1000
that refer to the McFadden pseudo R2 values for the incremental (incr_mod
) and the simpler models (simpler_mod
). (It will do this via using column names that are specified later.)
- transform these R2 values to d. (See ?calc_d_from_R2()
)
- calculate the median, percetiles 2.5 and 97.5, and the proportion of cases when the difference is 0 or negative (interpreted as a one-sided p_value)
R2_difference_in_d <- function(incr_mod, simpler_mod, resamples = model_perfs_training_set1000) { data_frame(R2_b = resamples$values[[incr_mod]], R2_w = resamples$values[[simpler_mod]]) %>% mutate(incr_d = calc_d_from_R2(R2_b), simpler_d = calc_d_from_R2(R2_w), diff_d = incr_d - simpler_d) %>% summarise(median_diff_d = median(diff_d), LL = quantile(diff_d, 0.025), UL = quantile(diff_d, 0.975), p_one_sided = mean(diff_d <= 0)) }
Create a data frame with pairs of models to compare: corresponding elastic net (glmnet) and random forest (rf) models.
compared_models <- model_grid %>% filter(analysis == "Main analyses" & model_type != "Logistic regression") %>% select(model_name) %>% purrr::as_vector() model_pairs_algo <- data_frame(glmnet_mod = stringr::str_subset(compared_models, "glmnet"), rf_mod = stringr::str_subset(compared_models, "rf")) %>% mutate(glmnet_mod = paste0(glmnet_mod, "~McF_R2"), rf_mod = paste0(rf_mod, "~McF_R2"))
Make the comparisons.
purrr::map2_df(.x = model_pairs_algo$glmnet_mod, .y = model_pairs_algo$rf_mod, .f = ~R2_difference_in_d(.x, .y)) %>% # Bind in columns with compared models bind_cols(model_pairs_algo, .) %>% # Clean up model names mutate_at(c("glmnet_mod", "rf_mod"), ~stringr::str_remove(., "~McF_R2"))
Create a data frame with pairs of models to compare: models with same outcome and incremental predictor sets.
incremental_mod <- c("gen_stat_glmnet", "gen_bgnn_glmnet", "gen_allp_glmnet", "vio_stat_glmnet", "vio_bgnn_glmnet", "vio_allp_glmnet") baseline_mod <- c("gen_rita_glmnet", "gen_stat_glmnet", "gen_bgnn_glmnet", "vio_rita_glmnet", "vio_stat_glmnet", "vio_bgnn_glmnet")
Make the comparisons.
model_pairs_preds <- data_frame(incr = incremental_mod, base = baseline_mod) %>% mutate(incr = paste0(incr, "~McF_R2"), base = paste0(base, "~McF_R2")) purrr::map2_df(.x = model_pairs_preds$incr, .y = model_pairs_preds$base, .f = ~R2_difference_in_d(.x, .y)) %>% bind_cols(model_pairs_preds, .) %>% mutate_at(c("incr", "base"), ~stringr::str_remove(., "~McF_R2"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.