Nothing
#' Waterfall plot
#'
#' @description
#' Generic function primarily used with wrappers targeting
#' types of values changed between two models.
#'
#'
#' @param xpdb_s <`xpose_set`> object
#' @param ... See <[`two_set_dots`]>
#' @param .inorder See <[`two_set_dots`]>
#' @param type See Details.
#' @param .cols <`tidyselect`> data columns to plot.
#' @param max_nind If less than 1, the percentile of absolute
#' change values above which to plot.
#' If above 1, the absolute number of subjects is included. To show all,
#' use an extreme positive number like 9999.
#' @param scale_diff <`logical`> Scale change to the standard deviation
#' of the model 1 column values. Respects faceting.
#' @param show_n <`logical`> For faceting variables, show N per facet.
#' *Not implemented*
#' @param title Plot title
#' @param subtitle Plot subtitle
#' @param caption Plot caption
#' @param tag Plot tag
#' @param plot_name Metadata name of plot
#' @param opt User-specified data options. Only some of these
#' will be used.
#' @param facets <`character`> Faceting variables
#' @param facet_scales <`character`> Forwarded to `facet_*(scales = facet_scales)`
#' @param .problem The problem to be used, by default returns the last one.
#' @param .subprob The subproblem to be used, by default returns the last one.
#' @param .method The estimation method to be used, by default returns the last one.
#' @param quiet Silence extra debugging output
#'
#' @details
#' For type-based customization of plots:
#' \itemize{
#' \item `b` bar plot (from `geom_bar`)
#' \item `h` hline at 0 (from `geom_hline`)
#' \item `t` text of change value (from `geom_text`)
#' }
#'
#' @returns The desired plot
#'
#' @export
#'
xset_waterfall <- function(
xpdb_s,
...,
.inorder=FALSE,
type = "bh",
.cols = NULL,
max_nind = 0.7,
scale_diff = TRUE,
show_n = TRUE,
title = NULL,
subtitle = NULL,
caption = NULL,
tag = NULL,
plot_name = 'waterfall',
opt,
facets = NULL,
facet_scales = "free_x",
.problem,
.subprob,
.method,
quiet
) {
# process dots
two_set_dots(xpdb_s, ..., .inorder=.inorder)
# Now have mod1 and mod2
# Default to the settings for mod1
if (missing(.problem)) .problem <- xpose::default_plot_problem(mod1$xpdb)
if (missing(.subprob))
.subprob <- xpose::last_file_subprob(mod1$xpdb, ext = "ext", .problem = .problem)
if (missing(.method))
.method <- xpose::last_file_method(mod1$xpdb, ext = "ext", .problem = .problem,
.subprob = .subprob)
if (missing(quiet)) quiet <- mod1$xpdb$options$quiet
if (rlang::quo_is_null(rlang::enquo(.cols))) rlang::abort("Must select at least one column.")
# Get combined xpdb
xpdb_f <- franken_xpdb(
mod1$xpdb,
mod2$xpdb,
.cols = {{.cols}},
problem = .problem,
prop_transforms = function(xpdb_f, xpdb_list, problem) {
# Combine dir
franken_prop(
xpdb_f = xpdb_f,
xpdb_list = xpdb_list,
prop = "dir")
}
)
# Selected column
sel_cols <- dplyr::select(
xpose::get_data(xpdb_f, .problem = .problem, quiet = TRUE),
{{.cols}}
) %>% names() %>% unique()
# Data options
combined_columns <- purrr::map(sel_cols, ~paste0(.x, "_", 1:2)) %>%
purrr::list_c()
# Facet processing
# Check if user tries to pass symbol to facets instead of character
if ((rlang::quo_is_symbol(rlang::enquo(facets)) &&
!exists(deparse(substitute(facets)), envir = parent.frame())) ||
(!is.character(facets) &&
!is.null(facets))) {
cli::cli_abort("Facets should be a simple character vector.")
}
post_processing_cov <- function(x) x
if (!is.null(facets) && facets %in% xp_var(xpdb_f, .problem = .problem, type = "catcov")$col) {
# TODO: use apply_lul_wide here for facets, but will have to be sure the new name is captured
# facet_labeller_fn <- apply_lul_wide(xpdb_f, .problem = .problem, show_n = show_n)
}
# post-processing
id_var <- xp_var(xpdb_f, .problem = .problem, type = "id", silent = TRUE)$col
nind_filter <- function(df) {
out <- df %>%
dplyr::group_modify(~{
if (nrow(.x)<=1) return(.x)
if (max_nind<1) .x <- .x %>%
dplyr::filter(abs(value)>quantile(abs(value), max_nind))
if (max_nind>=1) .x <- .x %>%
dplyr::filter(dplyr::row_number() <= min(floor(max_nind),dplyr::n()))
.x
})
if (nrow(out)==0) rlang::abort("Current `max_nind` filters out all data. Consider a different value")
out
}
labeller_fn <- apply_labels_units(xpdb_f, .problem = .problem)
post_processing <- function(df) {
# Given tidy processing, need to summarize from
# variable: col1_1, col1_2, col2_1, col2_2,...
# value: .....
# to
# variable: col1, col2, ...
# value: diffs
# Valid regex to get all col_1s without accidentally grabbing another column with _1
col1_regex <- paste(sel_cols,collapse="|") %>%
paste0("^(",.,")_1$")
if (any(is.na(df$value))) {
cli::cli_warn(
"NA values in columns {.bold {unique(df$variable[is.na(df$value)])}} will be removed."
)
}
df %>%
dplyr::filter(!.data[[id_var]] %in% .data[[id_var]][is.na(value)]) %>%
dplyr::mutate(
new_variable = stringr::str_extract(variable, "(^.*)_\\d+$", group=1)
) %>%
dplyr::group_by(new_variable, across(dplyr::any_of(facets))) %>%
dplyr::mutate(
# scale values (relative to model 1) or keep as is
value_weight = sd(value[grepl(col1_regex, variable)]),
value = ifelse(
scale_diff==TRUE & value_weight>0, # TODO: document that this silent check occurs
(value - mean(value[grepl(col1_regex, variable)]))/value_weight,
value
)
) %>%
dplyr::ungroup() %>%
# Calculate difference
dplyr::group_by(new_variable, across(dplyr::any_of(id_var))) %>%
dplyr::mutate(
variable = new_variable,
value = diff(value)
) %>%
dplyr::distinct(variable, .keep_all=TRUE) %>%
dplyr::ungroup() %>%
# Filter number
dplyr::arrange(-abs(value)) %>%
dplyr::group_by(variable, across(dplyr::any_of(facets))) %>%
nind_filter() %>%
# Ensure waterfall sort
dplyr::arrange(-value) %>%
dplyr::ungroup() %>%
# Apply labels and units
labeller_fn() %>%
# Make sure ID stays in order
dplyr::mutate(
id_label=paste(.data[[id_var]]),
id_order=forcats::as_factor(dplyr::row_number()) %>%
forcats::fct_inorder()
)
}
def_opt <- xpose::data_opt(.problem = .problem, .subprob = .subprob, .method = .method,
filter = xpose::only_distinct(xpdb_f, .problem, facets, quiet),
tidy = TRUE, value_col = combined_columns,
post_processing = post_processing)
if (!missing(opt)) {
if ("post_processing" %in% names(opt) && is.function(opt$post_processing)) {
def_opt$post_processing <- function(df) {
df %>%
post_processing() %>%
{opt$post_processing(.)}
}
}
}
if (missing(opt)) opt <- def_opt
# Only some data options should be used
data <- xpose::fetch_data(xpdb_f, quiet = quiet, .problem = def_opt$problem, .subprob = def_opt$subprob,
.method = def_opt$method, .source = opt$source, simtab = opt$simtab,
filter = def_opt$filter, tidy = def_opt$tidy, index_col = NULL,
value_col = def_opt$value_col, post_processing = def_opt$post_processing)
if (!is.null(data) && all(data$value==0))
cli::cli_warn(paste("All individual value differences are 0.",
"This is unlikely to be an informative plot,",
"since these are probably the same model."))
if (is.null(data) || nrow(data) == 0) {
rlang::abort('No data available for plotting. Please check the variable mapping and filering options.')
}
# Check type
allow_types <- c("b", "t","h")
xpose::check_plot_type(type, allowed = allow_types)
check_type <- purrr::map(allow_types, ~stringr::str_detect(type, stringr::fixed(.x, ignore_case = TRUE))) %>%
setNames(allow_types)
# Start plot# Assign gg_theme
gg_theme <- xpdb_f$gg_theme
if (is.function(gg_theme)) {
gg_theme <- do.call(gg_theme, args = list())
}
# Create ggplot base
xp <- ggplot2::ggplot(data = data,
aes(x = id_order,
y = value)
) + gg_theme +
ggplot2::scale_x_discrete(
labels = data$id_label,
breaks = data$id_order
)
# Add hline
if (check_type$h) {
xp <- xp + xpose::xp_geoms(mapping = NULL,
xp_theme = xpdb_f$xp_theme,
name = "hline",
ggfun = "geom_hline"
)
}
# Add bars
if (check_type$b) {
xp <- xp + xpose::xp_geoms(mapping = NULL,
xp_theme = xpdb_f$xp_theme,
name = "ggabarDiag",
ggfun = "geom_bar",
ggabarDiag_stat = "identity"
)
}
# Add text
if (check_type$t) {
xp <- xp + xpose::xp_geoms(mapping = aes(
text_y = value,
text_label = formatC(
value,
format = "fg",
digits = reportable_digits(xpdb_f)
),
text_hjust = ifelse(value>0, 1.1, 0)
),
xp_theme = xpdb_f$xp_theme,
name = "text",
ggfun = "geom_text",
text_color = colorspace::darken(xpdb_f$xp_theme$ggabarDiag_fill, 0.68),
text_angle = 90,
text_alpha = 1
)
}
# Define panels
if (!is.null(facets) || length(sel_cols)>1) {
facet_arg <- "variable"
ncol_arg <- 1
nrow_arg <- NULL
scales_arg <- facet_scales
if (!is.null(facets)) {
# facet_arg <- paste(facets,collapse="+") %>%
# paste0("variable~",.) %>%
# stats::as.formula()
facet_arg <- c("variable",facets)
ncol_arg <- NULL
nrow_arg <- length(sel_cols)
}
xp <- xp + xpose::xpose_panels(xp_theme = xpdb_f$xp_theme,
extra_args = list(
facets = facet_arg,
ncol=ncol_arg,
nrow=nrow_arg,
scales=scales_arg
))
}
# Add labels
xp <- xp + ggplot2::labs(
title = title,
subtitle = subtitle,
caption = caption,
y = ifelse(scale_diff, "Scaled change", "Change"),
x = id_var
)
if (utils::packageVersion('ggplot2') >= '3.0.0') {
xp <- xp + ggplot2::labs(tag = tag)
}
# Add metadata to plots
xp$xpose <- list(fun = plot_name,
summary = xpdb_f$summary,
problem = attr(data, 'problem'),
subprob = attr(data, 'subprob'),
method = attr(data, 'method'),
quiet = quiet,
xp_theme = xpdb_f$xp_theme[stringr::str_c(c('title', 'subtitle',
'caption', 'tag'), '_suffix')])
# Output the plot
xpose::as.xpose.plot(xp)
}
# Helper function for the specific type plots
waterfall_helper <- function(
type,
label,
envir = parent.frame()
) {
mod1 <- envir$mod1
mod2 <- envir$mod2
.problem <- envir$.problem
mid_sentence_label <- label
start_sentence_label <- stringr::str_to_title(label)
if (label!=tolower(label)) {
start_sentence_label <- label
}
rlang::try_fetch({
m1col <- xp_var(mod1$xpdb, .problem = .problem, type = type)$col
m2col <- xp_var(mod2$xpdb, .problem = .problem, type = type)$col
},
error = function(s) {
rlang::abort(paste0("No ", mid_sentence_label, "s appear to be available for at least one model.",
"Make sure to `set_var_type(", type, " = *)` if they are available in the data."),
parent = s)
})
if (!dplyr::setequal(m1col,m2col)) {
a_notin_b <- setdiff(m1col,m2col)
b_notin_a <- setdiff(m2col,m1col)
best_msg <- dplyr::case_when(
length(a_notin_b)>0 && length(b_notin_a)>0 ~ paste("{.strong {cli::col_cyan(a_notin_b)}} {ifelse(length(a_notin_b)==1,'is','are')} missing in model 2",
"and {.strong {cli::col_cyan(b_notin_a)}} {ifelse(length(b_notin_a)==1,'is','are')} missing in model 1"),
length(a_notin_b)>0 ~ "{.strong {cli::col_cyan(a_notin_b)}} {ifelse(length(a_notin_b)==1,'is','are')} missing in model 2",
length(b_notin_a)>0 ~ "{.strong {cli::col_cyan(b_notin_a)}} {ifelse(length(b_notin_a)==1,'is','are')} missing in model 1"
)
cli::cli_abort(paste(start_sentence_label,"list is not identical for the two models.", best_msg))
}
assign("m1col",m1col,envir = envir)
assign("m2col",m1col,envir = envir)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.