#' Proportional odds Testing
#'
#' Tests for proportional odds assumption in data. You can use as many predictors
#' as you want, but you cannot use random effects (only fixed effects allowed)
#' for the time being. A message will be displayed whether if the proportional odds
#' assumption is likely met or not. A dataframe with estimates and confidence intervals
#' will be returned if you want to plot th rsults to double check.
#'
#' @param ord_data Data frame with your data
#' @param model_formula The model formula to provide to a glm call
#' @param clm_obj Optional CLM or CLMM object to compare estimates to
#' @param .link Link function, defaults to logit for proportional odds model,
#' message is sent if this differs from clm_obj's link function
#' @param control Control to use for mixed models, defaults to
#'
#' @return A data frame with estimates and confidence intervals for each scale response
#' @export
#'
test_propodds <- function(ord_data, model_formula, clm_obj = NULL, .link = "logit", control = list()) {
if (!is.null(clm_obj)) {
if (!class(clm_obj) %in% c('clmm','clm'))
stop("clm_obj must be of class clm or clmm")
if (clm_obj$link != .link)
message(glue::glue("CLM link is {clm_obj$link} but .link is {.link}"))
}
requireNamespace("ordinal", quietly = TRUE)
model_formula <- as.character(model_formula)
# Check if the model is mixed (glmer) or not (glm) to use appropriate functions
is_mixed <- grepl("\\|", model_formula[[3L]])
if (is_mixed)
requireNamespace("lme4", quietly = TRUE)
mdl_fx <- ifelse(is_mixed, lme4::glmer, stats::glm)
coef_fx <- ifelse(is_mixed, lme4::fixef, stats::coef)
response_var <- model_formula[[2L]]
responses <- as.numeric(levels(factor(ord_data[[response_var]])))
results <-
purrr::map_df(
# Ensures the signs of the estimates are equivalent
rev(responses[-1L]),
function(j) .run_single_glm(ord_data,
mdl_fx,
coef_fx,
model_formula,
response_var,
j,
.link,
control
)
)
# Remove the random effects and intercepts terms from the checks
term_names <- unique(results[['term']])
term_names <- stringr::str_sort(term_names[!grepl("(Intercept)|\\.sig", term_names)])
results <- dplyr::filter(results, term %in% term_names)
# If a CLM object is passed, include the confidence regions from it
if (!is.null(clm_obj)){
propodds_checks <- .add_ordinal_result(clm_obj, results)
} else {
propodds_checks <-
results |>
dplyr::group_by(term) |>
dplyr::mutate(check = ci_low < mean(estimate) && ci_high > mean(estimate))
}
n_fail <- sum(propodds_checks[['check']], na.rm = TRUE)
# Print message to console
issue_string <- glue::glue("Issues with {n_fail}/{nrow(propodds_checks)}")
if (n_fail / nrow(propodds_checks) < .1)
message(crayon::green(glue::glue("Proportional Odds assumption likely holds. {issue_string}.")))
else
message(crayon::red(glue::glue("Proportional Odds assumption might not hold. {issue_string}.")))
propodds_checks
}
#' Merge CLMM object
#'
#' Adds the confidence intervals for the CLM estimated effects to compare the
#' individual binary regressions against. Note that interactions must be
#' specified in the same order in both model equations. Will check if the
#' confidence intervals from the binary regressions include the confidence
#' region from the CLM/CLMM estimates.
#'
#' @param clm_obj CLM or CLMM object from the ordinal package
#' @param results Results from .run_single_glm
#'
#' @return Extended dataframe
.add_ordinal_result <- function(clm_obj, results) {
estimates <-
broom::tidy(clm_obj)[,c('term','estimate')] |>
`colnames<-`(c('term','clm_estimate'))
confidence_intervals <-
stats::confint(clm_obj, level = .99) |>
tibble::as_tibble(rownames = "term") |>
`colnames<-`(c('term',"clm_low","clm_high"))
results |>
dplyr::left_join(merge(estimates, confidence_intervals)) |>
dplyr::group_by(term) |>
dplyr::mutate(check = ifelse(ci_low > clm_high | ci_high < clm_low, 1, 0))
}
#' Run binary regression
#'
#' Split data at cutpoint j and run a binary regression with the given
#' link function.
#'
#' @param ord_data Ordinal dataframe
#' @param mdl_fx Model function to use, either glm or glmer
#' @param coef_fx Coefficient function to use, either fixef or coef
#' @param model_formula Model formula to evaluate
#' @param response_var LHS of the formula
#' @param j Cutpoint value
#' @param .link Link function, default is logit (ie logistic regression)
#' @param control Control to use for mixed models, will default to bobyqa if not specified
#'
#' @return Dataframe with binary regression results
.run_single_glm <- function(ord_data, mdl_fx, coef_fx, model_formula, response_var, j, .link = "logit", control=list()) {
# Recode values below threshold to 1, above threshold to 0, update model formula
new_data <- dplyr::mutate(ord_data, j_response = as.numeric(!!sym(response_var) >= j))
model_formula[[2L]] <- "j_response"
# Concatenate formula to use for this run
glm_formula <- stats::formula(paste(model_formula[[2L]],"~",model_formula[[3L]]))
# If this is a mixed model and control is not specified, use bobyqa
if (identical(control, list()) & identical(mdl_fx, lme4::glmer))
control <- lme4::glmerControl("bobyqa")
mdl <- mdl_fx(glm_formula, family = binomial(.link), data = new_data, control = control)
# Check for successful convergence with code 0, method is differs for glm & glmer
conv_code <- 0
if (identical(mdl_fx, stats::glm))
conv_code <- ifelse(mdl$converged, 0, 1)
else if (!identical(mdl@optinfo$conv$lme4, list()))
conv_code <- ifelse(is.null(mdl@optinfo$conv$lme4$code), 1, mdl@optinfo$conv$lme4$code)
# Get 99% wald confidence intervals and return results
ci <- stats::confint(mdl, method = "Wald",level = .99)
#TODO: fix the na handling here, as random effects don't work right
ci <- ci[!grepl("\\.sig", rownames(ci)),] # remove any random effects
tibble::tibble(!!response_var := j,
term = names(coef_fx(mdl)),
estimate = coef_fx(mdl),
ci_low = ci[,1L],
ci_high = ci[,2L],
converged = conv_code)
}
#' Plot proportional odds output
#'
#' A very simple helper to plot the estimates from the proportional odds tester.
#'
#' @param propodds_results Output of test_propodds
#'
#' @export
plot_propodds <- function(propodds_results) {
resp_var <- colnames(propodds_results)[1L]
clm_region <- NULL
if ('clm_high' %in% colnames(propodds_results))
clm_region <-
ggplot2::geom_ribbon(aes(x = !!sym(resp_var),
ymax = clm_high,
ymin = clm_low),
fill = "#6edbbe",
alpha=.4,
inherit.aes = FALSE)
requireNamespace("ggplot2", quietly = TRUE)
scale_range <- range(propodds_results[[resp_var]])
scale_values <- seq(from=scale_range[1], to = scale_range[2])
threshold_labels <- paste0(scale_values-1, "|", scale_values)
propodds_results |>
dplyr::filter(term != '(Intercept)') |>
dplyr::group_by(term) |>
dplyr::mutate(check = ifelse(check == 1, "red","black"),
converged = factor(converged),
threshold = factor(paste0(!!sym(resp_var)-1,"|", !!sym(resp_var)))) |>
ggplot2::ggplot(aes(x = !!sym(resp_var), y = estimate, ymax = ci_high, ymin = ci_low, color = I(check), shape = converged)) +
clm_region +
ggplot2::scale_x_continuous(breaks = scale_values, labels = threshold_labels) +
ggplot2::geom_point() +
ggplot2::geom_errorbar() +
# ggplot2::geom_hline(aes(yintercept = mn), linetype = "dashed") +
ggplot2::facet_wrap(~term) +
ggplot2::theme_minimal() +
ggplot2::xlab("Threshold")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.