Nothing
#' @title Moderation Effect Plot
#'
#' @description Plot the moderation effect in a regression model
#'
#' @details This function generate a basic `ggplot2` graph
#' typically found in psychology manuscripts. It tries to
#' check whether one or more variables are standardized, and
#' report this in the plot if required.
#'
#' This function only has features for typical plots of moderation effects.
#' It is not intended to be a flexible tool for a fine control on the plots.
#'
#' @return
#' A `ggplot2` graph. Plotted if not assigned to a name. It can
#' be further modified like a usual `ggplot2` graph.
#'
#' @param output The output
#' of [stats::lm()], [std_selected()], or
#' [std_selected_boot()].
#'
#' @param x The name of the focal variable (x-axis) in `output``. It
#' can be the name of the variable, with or without quotes.
#' Currently only numeric variables are supported.
#' @param w The name of the moderator in `output`. It
#' can be the name of the variable, with or without quotes.
#' @param x_label The label for the X-axis. Default is the value of `x`.
#' @param w_label The label for the legend for the lines.
#' Default is the value of`w`.
#' @param y_label The label for the Y-axis. Default is the
#' name of the response variable in the model.
#' @param title The title of the graph. If not supplied, it will be
#' generated from the variable
#' names or labels (in `x_label`, `y_label`,
#' and `w_label`). If `""`, no title will be printed.
#' This can be used when the plot is for manuscript
#' submission and figures are required to have no
#' titles.
#' @param digits Number of decimal places to print. Default is 3.
#' @param x_from_mean_in_sd How many SD from mean is used to define
#' "low" and
#' "high" for the focal variable.
#' Default is 1.
#' @param w_from_mean_in_sd How many SD from mean is used to define
#' "low" and
#' "high" for the moderator. Default is 1.
#' Ignored if `w` is categorical.
#' @param w_method How to define "high" and "low" for the moderator levels.
#' Default is in terms of the
#' standard deviation of the moderator, `"sd"`.
#' If equal to
#' `"percentile"`, then the percentiles of the moderator in
#' the
#' dataset are used.
#' Ignored if `w` is categorical.
#' @param w_percentiles If `w_method` is `"percentile"`, then this
#' argument
#' specifies the two percentiles to be used,
#' divided by 100.
#' It must be a
#' vector of two numbers. The default is
#' `c(.16, .84)`,
#' the 16th and 84th percentiles,
#' which corresponds approximately
#' to one SD below and above mean for a
#' normal distribution, respectively.
#' Ignored if `w` is categorical.
#' @param x_method How to define "high" and "low" for the focal
#' variable levels.
#' Default is in terms of the
#' standard deviation of the focal variable, `"sd"`.
#' If equal to
#' `"percentile"`, then the percentiles of the
#' focal variable in the
#' dataset is used.
#' @param x_percentiles If `x_method` is `"percentile"`, then this
#' argument
#' specifies the two percentiles to be used,
#' divided by 100.
#' It must be a
#' vector of two numbers. The default is
#' `c(.16, .84)`,
#' the 16th and 84th percentiles,
#' which corresponds approximately
#' to one SD below and above mean for a
#' normal distribution, respectively.
#' @param w_sd_to_percentiles If `w_method` is `"percentile"` and
#' this argument is
#' set to a number, this number will be
#' used
#' to determine the percentiles to be used.
#' The
#' lower percentile is the percentile in a
#' normal
#' distribution
#' that is `w_sd_to_percentiles` SD below
#' the mean.
#' The upper percentile is the percentile in
#' a normal
#' distribution that is `w_sd_to_percentiles`
#' SD
#' above the mean. Therefore, if
#' `w_sd_to_percentiles` is set to 1, then the
#' lower
#' and upper percentiles are 16th and 84th,
#' respectively. Default is `NA`.
#' @param x_sd_to_percentiles If `x_method` is `"percentile"` and this
#' argument is
#' set to a number, this number will be used
#' to determine the percentiles to be used. The
#' lower percentile is the percentile in a
#' normal
#' distribution
#' that is `x_sd_to_percentiles` SD below the mean.
#' The upper percentile is the percentile in a normal
#' distribution that is `x_sd_to_percentiles` SD
#' above the mean. Therefore, if
#' `x_sd_to_percentiles` is set to 1, then the lower
#' and upper percentiles are 16th and 84th,
#' respectively. Default is `NA`.
#'
#' @param w_values The values of `w` to
#' be used. Default is `NULL`. If a
#' numeric vector is supplied, these
#' values will be used to compute the
#' conditional effects. Other arguments
#' on generating levels are ignored.
#' Note that, if `w` has been standardized
#' or centered, these values are for
#' the standardized or centered `w`.
#' The values will always be sorted.
#' This argument is ignored if `w` is
#' categorical.
#'
#' @param note_standardized If `TRUE`, will check whether a variable has SD
#' nearly equal to one. If yes, will report this in the
#' plot.
#' Default is `TRUE`.
#' @param no_title If `TRUE`, title will be suppressed. Default is `FALSE`.
#' @param line_width The width of the lines as used in
#' [ggplot2::geom_segment()].
#' Default is 1.
#' @param point_size The size of the points as used in [ggplot2::geom_point()].
#' Default is 5.
#' @param graph_type If `"default"`, the typical line-graph with equal end-points
#' will be plotted. If `"tubmle"`, then the tumble graph
#' proposed by Bodner (2016) will be plotted. Default is
#' `"default"`.
#'
#' @author Shu Fai Cheung <https://orcid.org/0000-0002-9871-9448>
#'
#' @references
#'
#' Bodner, T. E. (2016). Tumble graphs: Avoiding misleading end point
#' extrapolation when graphing interactions from a moderated multiple
#' regression analysis. *Journal of Educational and Behavioral
#' Statistics, 41*(6), 593-604. \doi{10.3102/1076998616657080}
#'
#' @examples
#'
#' # Do a moderated regression by lm
#' lm_out <- lm(sleep_duration ~ age + gender + emotional_stability*conscientiousness, sleep_emo_con)
#' plotmod(lm_out,
#' x = emotional_stability,
#' w = conscientiousness,
#' x_label = "Emotional Stability",
#' w_label = "Conscientiousness",
#' y_label = "Sleep Duration")
#'
#' # Standardize all variables except for categorical variables
#' # Alternative: use to_standardize as a shortcut
#' # lm_std <- std_selected(lm_out,
#' # to_standardize = ~ .)
#' lm_std <- std_selected(lm_out,
#' to_scale = ~ .,
#' to_center = ~ .)
#' plotmod(lm_std,
#' x = emotional_stability,
#' w = conscientiousness,
#' x_label = "Emotional Stability",
#' w_label = "Conscientiousness",
#' y_label = "Sleep Duration")
#'
#' # Tumble Graph
#' plotmod(lm_std,
#' x = emotional_stability,
#' w = conscientiousness,
#' x_label = "Emotional Stability",
#' w_label = "Conscientiousness",
#' y_label = "Sleep Duration",
#' graph_type = "tumble")
#'
#' @importFrom rlang .data
#' @export
plotmod <- function(output, x, w,
x_label,
w_label,
y_label,
title,
digits = 3,
x_from_mean_in_sd = 1,
w_from_mean_in_sd = 1,
w_method = c("sd", "percentile"),
w_percentiles = c(.16, .84),
x_method = c("sd", "percentile"),
x_percentiles = c(.16, .84),
w_sd_to_percentiles = NA,
x_sd_to_percentiles= NA,
w_values = NULL,
note_standardized = TRUE,
no_title = FALSE,
line_width = 1,
point_size = 5,
graph_type = c("default", "tumble")
) {
w_method <- match.arg(w_method)
x_method <- match.arg(x_method)
graph_type <- match.arg(graph_type)
x0 <- deparse(substitute(x))
if (inherits(tryCatch(x00 <- as.character(x), error = function(e) e),
"simpleError")) {
x <- x0
} else {
x <- x00
}
w0 <- deparse(substitute(w))
if (inherits(tryCatch(w00 <- as.character(w), error = function(e) e),
"simpleError")) {
w <- w0
} else {
w <- w00
}
y <- colnames(stats::model.frame(output))[
attr(stats::terms(output), "response")
]
mf0 <- stats::model.frame(output)
x_numeric <- is.numeric(mf0[, x])
w_numeric <- is.numeric(mf0[, w])
if (!x_numeric) {
stop("x variable must be a numeric variable.")
}
if (w_numeric) {
if (is.numeric(w_values)) {
w_levels <- sort(w_values, decreasing = TRUE)
w_levels_labels <- as.character(w_levels)
w_method <- "user"
} else {
w_levels <- gen_levels(mf0[, w],
method = w_method,
from_mean_in_sd = w_from_mean_in_sd,
levels = c(-1, 1),
sd_levels = c(-1, 1),
sd_to_percentiles = w_sd_to_percentiles,
percentiles = w_percentiles)
tmp <- length(w_levels)
if (tmp == 2) {
w_levels_labels <- c("Low", "High")
}
if (tmp == 3) {
w_levels_labels <- c("Low", "Medium", "High")
}
if (tmp > 3) {
if (w_method == "percentile") {
w_levels_labels <- paste0(formatC(w_percentiles * 100,
digits = 0,
format = "f"),
"%")
} else {
w_levels_labels <- formatC(w_levels,
digits = 2,
format = "f")
}
}
}
} else {
w_lo <- NA
w_hi <- NA
w_levels <- levels(as.factor(mf0[, w]))
}
if (graph_type == "default") {
x_levels <- gen_levels(mf0[, x],
method = x_method,
from_mean_in_sd = x_from_mean_in_sd,
levels = c(-1, 1),
sd_levels = c(-1, 1),
sd_to_percentiles = x_sd_to_percentiles,
percentiles = x_percentiles)
}
if (graph_type == "tumble") {
x_subsets <- lapply(w_levels, x_for_w,
mf = mf0, x = x, w = w, w_numeric = w_numeric)
x_levels_list <- lapply(x_subsets,
gen_levels,
method = x_method,
from_mean_in_sd = x_from_mean_in_sd,
levels = c(-1, 1),
sd_levels = c(-1, 1),
sd_to_percentiles = x_sd_to_percentiles,
percentiles = x_percentiles)
}
if (w_numeric) {
if (graph_type == "default") {
mf2 <- plot_df_meansd_w_numeric(output = output,
x = x,
w = w,
x_levels,
w_levels,
x_levels_labels = c("Low", "High"),
w_levels_labels = w_levels_labels,
other_numeric_on = "mean",
other_categorical_on = "reference")
}
if (graph_type == "tumble") {
mf_list <- mapply(plot_df_meansd_w_numeric,
x_levels = x_levels_list,
w_levels = w_levels,
w_levels_labels = w_levels_labels,
MoreArgs = list(output = output,
x = x,
w = w,
x_levels_labels = c("Low", "High"),
other_numeric_on = "mean",
other_categorical_on = "reference"),
SIMPLIFY = FALSE)
mf2 <- do.call(rbind, mf_list)
}
} else {
if (graph_type == "default") {
mf2 <- plot_df_meansd_w_categorical(output = output,
x = x,
w = w,
x_levels,
w_levels,
x_levels_labels = c("Low", "High"),
other_numeric_on = "mean",
other_categorical_on = "reference")
}
if (graph_type == "tumble") {
mf_list <- mapply(plot_df_meansd_w_categorical,
x_levels = x_levels_list,
w_levels = w_levels,
MoreArgs = list(output = output,
x = x,
w = w,
x_levels_labels = c("Low", "High"),
other_numeric_on = "mean",
other_categorical_on = "reference"),
SIMPLIFY = FALSE)
mf2 <- do.call(rbind, mf_list)
}
}
mf2$predicted <- stats::predict(output, mf2)
if (missing(x_label)) x_label <- x
if (missing(w_label)) w_label <- w
if (missing(y_label)) y_label <- y
x_standardized <- is_standardized(mf0[, x])
w_standardized <- is_standardized(mf0[, w])
y_standardized <- is_standardized(mf0[, y])
if (note_standardized) {
if (any(c(x_standardized,
w_standardized,
y_standardized))) {
tmp <- ifelse(c(x_standardized,
w_standardized,
y_standardized),
c(x_label,
w_label,
y_label),
c(NA, NA, NA))
cap_std <-
paste0(stats::na.omit(tmp), collapse = ", ")
cap_std <- paste0(cap_std, " standardized")
} else {
cap_std <- NULL
}
}
if (missing(title)) {
title <- "Moderation Effect"
}
if (utils::packageVersion("ggplot2") >= "3.0.0") {
ggtmp1 <- "predicted"
ggtmp2 <- "w_level"
p <- ggplot2::ggplot() +
ggplot2::geom_point(ggplot2::aes(x = .data[[x]],
y = .data[[ggtmp1]],
color = .data[[ggtmp2]]),
data = mf2,
size = point_size)
} else {
p <- ggplot2::ggplot() +
ggplot2::geom_point(ggplot2::aes_string(x = x,
y = "predicted",
colour = "w_level"),
data = mf2,
size = point_size)
}
if (utils::packageVersion("ggplot2") >= "3.4.0") {
p <- p +
ggplot2::geom_segment(ggplot2::aes(
x = mf2[mf2$x_level == "Low", x],
xend = mf2[mf2$x_level == "High", x],
y = mf2[mf2$x_level == "Low", "predicted"],
yend = mf2[mf2$x_level == "High", "predicted"],
colour = mf2[mf2$x_level == "Low", "w_level"]
), linewidth = line_width)
} else {
p <- p +
ggplot2::geom_segment(ggplot2::aes(
x = mf2[mf2$x_level == "Low", x],
xend = mf2[mf2$x_level == "High", x],
y = mf2[mf2$x_level == "Low", "predicted"],
yend = mf2[mf2$x_level == "High", "predicted"],
colour = mf2[mf2$x_level == "Low", "w_level"]
), size = line_width)
}
b_all <- find_bs(mf = mf2, x = x, w = w, w_levels = w_levels)
b_format <- paste0("%.", digits, "f")
if (w_numeric) {
tmp <- paste0(w_label, " ", w_levels_labels, ": ",
x_label, " effect = ",
sprintf(b_format, b_all))
subtxt <- paste(tmp, collapse = "\n")
} else {
subtxt <- paste0(w_levels,
": ",
x_label,
" effect = ",
sprintf(b_format, b_all),
collapse = "\n")
}
if (w_numeric) {
if (w_method == "percentile") {
tmp <- paste0(w_levels_labels, ": ",
round(w_percentiles * 100, 0),
"th percentile")
cap_txt <- paste0(w_label, " levels: ",
paste0(tmp, collapse = "; "))
}
if (w_method == "sd") {
cap_txt <- paste0(w_label, " levels: ",
"Low: ", w_from_mean_in_sd,
"SD below mean; Hi: ",
w_from_mean_in_sd, " SD above mean")
}
if (w_method == "user") {
cap_txt <- NULL
}
} else {
cap_txt <- NULL
}
if (note_standardized & !is.null(cap_std)) {
if (!is.null(cap_txt)) {
cap_txt <- paste0(cap_txt, "\n", cap_std)
} else {
cap_txt <- cap_std
}
}
out <- p +
ggplot2::labs(title = title,
subtitle = subtxt,
caption = cap_txt) +
ggplot2::theme(legend.position = "top",
plot.caption = ggplot2::element_text(hjust = .5,
size = 9),
plot.title = ggplot2::element_text(hjust = .5),
plot.subtitle = ggplot2::element_text(hjust = .5,
size = 9)) +
ggplot2::xlab(x_label) +
ggplot2::ylab(y_label) +
ggplot2::guides(colour = ggplot2::guide_legend(title = w_label))
if (no_title) {
out <- out + ggplot2::labs(title = NULL)
}
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.