#' Plotting Partial Dependence Functions
#'
#' Plots partial dependence functions (i.e., marginal effects) using
#' \code{\link[ggplot2]{ggplot2}} graphics.
#'
#' @param object An object that inherits from the \code{"partial"} class.
#'
#' @param center Logical indicating whether or not to produce centered ICE
#' curves (c-ICE curves). Only useful when \code{object} represents a set of ICE
#' curves; see \code{\link[pdp]{partial}} for details. Default is \code{FALSE}.
#'
#' @param plot.pdp Logical indicating whether or not to plot the partial
#' dependence function on top of the ICE curves. Default is \code{TRUE}.
#'
#' @param pdp.color Character string specifying the color to use for the partial
#' dependence function when \code{plot.pdp = TRUE}. Default is \code{"red"}.
#'
#' @param pdp.size Positive number specifying the line width to use for the
#' partial dependence function when \code{plot.pdp = TRUE}. Default is \code{1}.
#'
#' @param pdp.linetype Positive number specifying the line type to use for the
#' partial dependence function when \code{plot.pdp = TRUE}. Default is \code{1}.
#'
#' @param rug Logical indicating whether or not to include rug marks on the
#' predictor axes. Default is \code{FALSE}.
#'
#' @param smooth Logical indicating whether or not to overlay a LOESS smooth.
#' Default is \code{FALSE}.
#'
#' @param smooth.method Character string specifying the smoothing method
#' (function) to use (e.g., \code{"auto"}, \code{"lm"}, \code{"glm"},
#' \code{"gam"}, \code{"loess"}, or \code{"rlm"}). Default is \code{"auto"}.
#' See \code{\link[ggplot2]{geom_smooth}} for details.
#'
#' @param smooth.formula Formula to use in smoothing function (e.g.,
#' \code{y ~ x}, \code{y ~ poly(x, 2)}, or \code{y ~ log(x)}).
#'
#' @param smooth.span Controls the amount of smoothing for the default loess
#' smoother. Smaller numbers produce wigglier lines, larger numbers produce
#' smoother lines. Default is \code{0.75}.
#'
#' @param smooth.method.args List containing additional arguments to be passed
#' on to the modeling function defined by \code{smooth.method}.
#'
#' @param contour Logical indicating whether or not to add contour lines to the
#' level plot.
#'
#' @param contour.color Character string specifying the color to use for the
#' contour lines when \code{contour = TRUE}. Default is \code{"white"}.
#'
#' @param train Data frame containing the original training data. Only required
#' if \code{rug = TRUE} or \code{chull = TRUE}.
#'
#' @param xlab Character string specifying the text for the x-axis label.
#'
#' @param ylab Character string specifying the text for the y-axis label.
#'
#' @param main Character string specifying the text for the main title of the
#' plot.
#'
#' @param legend.title Character string specifying the text for the legend title.
#' Default is \code{"yhat"}.
#'
#' @param ... Additional (optional) arguments to be passed onto
#' \code{\link[ggplot2]{geom_line}}, \code{\link[ggplot2]{geom_point}}, or
#' \code{\link[ggplot2]{scale_fill_viridis_c}}.
#'
#' @return A \code{"ggplot"} object.
#'
#' @importFrom ggplot2 aes autoplot facet_wrap geom_contour geom_line
#'
#' @importFrom ggplot2 geom_point geom_rug geom_smooth geom_tile ggplot ggtitle
#'
#' @importFrom ggplot2 scale_fill_viridis_c stat_summary
#'
#' @importFrom ggplot2 theme_bw xlab ylab
#'
#' @importFrom rlang .data
#'
#' @rdname autoplot.partial
#'
#' @export
#'
#' @examples
#' \dontrun{
#' #
#' # Regression example (requires randomForest package to run)
#' #
#'
#' # Load required packages
#' library(ggplot2) # for autoplot() generic
#' library(gridExtra) # for `grid.arrange()`
#' library(magrittr) # for forward pipe operator `%>%`
#' library(randomForest)
#'
#' # Fit a random forest to the Boston housing data
#' data (boston) # load the boston housing data
#' set.seed(101) # for reproducibility
#' boston.rf <- randomForest(cmedv ~ ., data = boston)
#'
#' # Partial dependence of cmedv on lstat
#' boston.rf %>%
#' partial(pred.var = "lstat") %>%
#' autoplot(rug = TRUE, train = boston) + theme_bw()
#'
#' # Partial dependence of cmedv on lstat and rm
#' boston.rf %>%
#' partial(pred.var = c("lstat", "rm"), chull = TRUE, progress = TRUE) %>%
#' autoplot(contour = TRUE, legend.title = "cmedv",
#' option = "B", direction = -1) + theme_bw()
#'
#' # ICE curves and c-ICE curves
#' age.ice <- partial(boston.rf, pred.var = "lstat", ice = TRUE)
#' grid.arrange(
#' autoplot(age.ice, alpha = 0.1), # ICE curves
#' autoplot(age.ice, center = TRUE, alpha = 0.1), # c-ICE curves
#' ncol = 2
#' )
#' }
autoplot.partial <- function(object, center = FALSE, plot.pdp = TRUE,
pdp.color = "red", pdp.size = 1, pdp.linetype = 1,
rug = FALSE, smooth = FALSE,
smooth.method = "auto", smooth.formula = y ~ x,
smooth.span = 0.75, smooth.method.args = list(),
contour = FALSE, contour.color = "white",
train = NULL, xlab = NULL, ylab = NULL,
main = NULL, legend.title = "yhat", ...) {
# Determine if object contains an ID column (i.e., multiple curves)
multi <- "yhat.id" %in% names(object)
# Determine number of variables to plot
nx <- if (multi) {
ncol(object) - 2 # don't count yhat or yhat.id
} else {
ncol(object) - 1 # don't count yhat
}
# Generate plot
if (multi) { # user-supplied prediction function
# Call workhorse function
ggplot_ice_curves( # ICE curves from user-specified prediction function
object = object, center = center, plot.pdp = plot.pdp,
pdp.color = pdp.color, pdp.size = pdp.size, pdp.linetype = 1, rug = rug,
train = train, xlab = xlab, ylab = ylab, main = main, ...
)
} else if (nx == 1L) { # one predictor
# Call workhorse function
ggplot_one_predictor_pdp( # single predictor PDP
object = object, rug = rug, smooth = smooth,
smooth.method = smooth.method, smooth.formula = smooth.formula,
smooth.span = smooth.span, smooth.method.args = smooth.method.args,
train = train, xlab = xlab, ylab = ylab, main = main, ...
)
} else if (nx == 2L) { # two predictors
# Call workhorse function
ggplot_two_predictor_pdp( # two predictor PDP
object = object, rug = rug, smooth = smooth,
smooth.method = smooth.method, smooth.formula = smooth.formula,
smooth.span = smooth.span, smooth.method.args = smooth.method.args,
contour = contour, contour.color = contour.color,
train = train, xlab = xlab, ylab = ylab, main = main,
legend.title = legend.title, ...
)
} else { # more than two predictors
# Throw error
stop("`autoplot()` does not currently support PDPs with more than two ",
"predictors.")
}
}
#' @rdname autoplot.partial
#'
#' @export
autoplot.ice <- function(
object, center = FALSE, plot.pdp = TRUE, pdp.color = "red", pdp.size = 1,
pdp.linetype = 1, rug = FALSE, train = NULL, xlab = NULL, ylab = NULL,
main = NULL, ...
) {
# Call workhorse function
ggplot_ice_curves( # ICE curves
object = object, center = center, plot.pdp = plot.pdp,
pdp.color = pdp.color, pdp.size = pdp.size, pdp.linetype = pdp.linetype,
rug = rug, train = train, xlab = xlab, ylab = ylab, main = main, ...
)
}
#' @rdname autoplot.partial
#'
#' @export
autoplot.cice <- function(
object, plot.pdp = TRUE, pdp.color = "red", pdp.size = 1, pdp.linetype = 1,
rug = FALSE, train = NULL, xlab = NULL, ylab = NULL, main = NULL, ...
) {
# Call workhorse function
ggplot_ice_curves( # c-ICE curves
object = object, center = FALSE, plot.pdp = plot.pdp, pdp.color = pdp.color,
pdp.size = pdp.size, pdp.linetype = pdp.linetype, rug = rug, train = train,
xlab = xlab, ylab = ylab, main = main, ...
)
}
#' @keywords internal
ggplot_ice_curves <- function(
object, center, plot.pdp, pdp.color, pdp.size, pdp.linetype, rug, train,
xlab, ylab, main, ...
) {
# Should the curves be centered to start at yhat = 0?
if (center) {
object <- center_ice_curves(object)
}
# Use the first column to determine which type of plot to construct
if (is.factor(object[[1L]])) {
# Draw scatterplots
p <- ggplot(object,
aes(.data[[names(object)[1L]]], .data[["yhat"]], group = 1)) +
geom_line(aes(group = .data[["yhat.id"]]), ...) +
geom_point(aes(group = .data[["yhat.id"]]))
# Should the PDP be displayed too?
if (plot.pdp) {
p <- p + stat_summary(
fun = mean, geom = "line", col = pdp.color, size = pdp.size,
linetype = pdp.linetype
)
}
} else {
# Draw lineplots
p <- ggplot(object, aes(.data[[names(object)[1L]]], .data[["yhat"]])) +
geom_line(aes(group = .data[["yhat.id"]]), ...)
# Should the PDP be displayed too?
if (plot.pdp) {
p <- p + stat_summary(
fun = mean, geom = "line", col = pdp.color, size = pdp.size,
linetype = pdp.linetype
)
}
# Add rug plot to x-axis
if (isTRUE(rug)) {
if (is.null(train)) {
stop("The training data must be supplied for rug display.")
} else {
x.name <- which(colnames(train) == names(object)[[1L]])
x.rug <- data.frame(as.numeric(
stats::quantile(train[, x.name, drop = TRUE], probs = 0:10/10,
na.rm = TRUE)))
p <- p + geom_rug(data = x.rug, aes(.data[[names(x.rug)[1L]]]),
sides = "b", inherit.aes = FALSE)
}
}
}
# Add axis labels and title
if (is.null(xlab)) {
p <- p + xlab(names(object)[1L])
} else {
p <- p + xlab(xlab)
}
if (is.null(ylab)) {
p <- p + ylab("yhat")
} else {
p <- p + ylab(ylab)
}
if (!is.null(main)) {
p <- p + ggtitle(main)
}
# Return "ggplot" object
p
}
#' @keywords internal
ggplot_one_predictor_pdp <- function(
object, rug, smooth, smooth.method, smooth.formula, smooth.span,
smooth.method.args, train, xlab, ylab, main, ...
) {
# Use the first column to determine which type of plot to construct
if (is.factor(object[[1L]])) {
# Draw a scatterplot
p <- ggplot(object, aes(.data[[names(object)[1L]]], .data[["yhat"]])) +
geom_point(...)
} else {
# Draw a lineplot
p <- ggplot(object, aes(.data[[names(object)[1L]]], .data[["yhat"]])) +
geom_line(...)
# Add rug plot to x-axis
if (isTRUE(rug)) {
if (is.null(train)) {
stop("The training data must be supplied for rug display.")
} else {
x.name <- which(colnames(train) == names(object)[[1L]])
x.rug <- data.frame(as.numeric(
stats::quantile(train[, x.name, drop = TRUE], probs = 0:10/10,
na.rm = TRUE)))
p <- p + geom_rug(data = x.rug, aes(.data[[names(x.rug)[1L]]]),
sides = "b", inherit.aes = FALSE)
}
}
# Add smoother
if (smooth) {
p <- p + geom_smooth(
method = smooth.method, formula = smooth.formula, span = smooth.span,
se = FALSE, method.args = smooth.method.args
)
}
}
# Add axis labels and title
if (is.null(xlab)) {
p <- p + xlab(names(object)[1L])
} else {
p <- p + xlab(xlab)
}
if (is.null(ylab)) {
p <- p + ylab("yhat")
} else {
p <- p + ylab(ylab)
}
if (!is.null(main)) {
p <- p + ggtitle(main)
}
# Return "ggplot" object
p
}
#' @keywords internal
ggplot_two_predictor_pdp <- function(
object, rug, smooth, smooth.method, smooth.formula, smooth.span,
smooth.method.args, contour, contour.color, train, xlab, ylab, main,
legend.title, ...
) {
# Use the first two columns to determine which type of plot to construct
if (is.factor(object[[1L]]) && is.factor(object[[2L]])) {
# Draw a faceted scatterplot
p <- ggplot(object, aes(.data[[names(object)[1L]]], .data[["yhat"]])) +
geom_point(...) +
facet_wrap(~ .data[[names(object)[2L]]])
} else if (is.factor(object[[1L]]) && !is.factor(object[[2L]])) {
# Draw a faceted lineplot
p <- ggplot(object, aes(.data[[names(object)[2L]]], .data[["yhat"]])) +
geom_line(...) +
facet_wrap(~ .data[[names(object)[1L]]])
# Add rug plot to the x-axis
if (isTRUE(rug)) {
if (is.null(train)) {
stop("The training data must be supplied for rug display.")
} else {
x.name <- which(colnames(train) == names(object)[[2L]])
x.rug <- data.frame(as.numeric(
stats::quantile(train[, x.name, drop = TRUE], probs = 0:10/10,
na.rm = TRUE)))
p <- p + geom_rug(data = x.rug, aes(.data[[names(x.rug)[1L]]]),
sides = "b", inherit.aes = FALSE)
}
}
# Add smoother
if (smooth) {
p <- p + geom_smooth(
method = smooth.method, formula = smooth.formula, span = smooth.span,
se = FALSE, method.args = smooth.method.args
)
}
} else if (!is.factor(object[[1L]]) && is.factor(object[[2L]])) {
# Draw a faceted lineplot
p <- ggplot(object, aes(.data[[names(object)[1L]]], .data[["yhat"]])) +
geom_line(...) +
facet_wrap(~ .data[[names(object)[2L]]])
# Add rug plot to x-axis
if (isTRUE(rug)) {
if (is.null(train)) {
stop("The training data must be supplied for rug display.")
} else {
x.name <- which(colnames(train) == names(object)[[1L]])
x.rug <- data.frame(as.numeric(
stats::quantile(train[, x.name, drop = TRUE], probs = 0:10/10,
na.rm = TRUE)))
p <- p + geom_rug(data = x.rug, aes(.data[[names(x.rug)[1L]]]),
sides = "b", inherit.aes = FALSE)
}
}
# Add smoother
if (smooth) {
p <- p + geom_smooth(
method = smooth.method, formula = smooth.formula, span = smooth.span,
se = FALSE, method.args = smooth.method.args
)
}
} else {
# Draw a false color level plot
p <- ggplot(object,
aes(.data[[names(object)[1L]]], .data[[names(object)[2L]]],
z = .data[["yhat"]], fill = .data[["yhat"]])
) + geom_tile()
# Add contour lines
if (contour) {
p <- p + geom_contour(color = contour.color)
}
# Add legend title and theme
p <- p + scale_fill_viridis_c(name = legend.title, ...)
}
# Add axis labels and title
if (is.null(xlab)) {
p <- if (is.factor(object[[1L]]) && !is.factor(object[[2L]])) {
p + xlab(names(object)[2L])
} else {
p + xlab(names(object)[1L])
}
} else {
p <- p + xlab(xlab)
}
if (is.null(ylab)) {
p <- if (!is.factor(object[[1L]]) && !is.factor(object[[2L]])) {
p + ylab(names(object)[2L])
} else {
p + ylab("yhat")
}
} else {
p <- p + ylab(ylab)
}
if (!is.null(main)) {
p <- p + ggtitle(main)
}
# Return "ggplot" object
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.