#' Beautiful boxplots with ggplot2.
#'
#' Create boxplots with \link{ggplot2}.
#' @param y a numeric variable for which the boxplot is to be constructed.
#' @param \dots further arguments passed to \code{\link[ggplot2]{geom_boxplot}}.
#' @export
bboxplot <- function(y, ...) UseMethod("bboxplot")
#' @param g a grouping variable, usually a factor, for constructing parallel
#' boxplots.
#' @param data a dataframe containing the variables.
#'
#' @return A boxplot of the variable \code{y} or the boxplot of the \code{y}
#' against \code{g}.
#'
#' @examples
#' data(centro_2015)
#' bboxplot("valor", data = centro_2015)
#' bboxplot("valor", "padrao", centro_2015)
#' @rdname bboxplot
#' @export
bboxplot.default <- function(y, g, data, ...) {
df <- as.data.frame(data)
y <- as.name(y)
x <- 1
if (missing(g)) g <- rlang::enquo(x) else g <- as.name(g)
df <- tibble::rowid_to_column(df)
is_outlier <- function(x, na.rm = TRUE) {
if (isTRUE(na.rm))
x <- na.omit(x)
low <- stats::quantile(x, 0.25) - 1.5 * stats::IQR(x)
up <- stats::quantile(x, 0.75) + 1.5 * stats::IQR(x)
return(x < low | x > up)
}
df <- df |>
dplyr::select(!!y, !!g, rowid) |>
stats::na.omit() |>
dplyr::group_by(!!g) |>
dplyr::mutate(outlier = ifelse(is_outlier(!!y), rowid, as.numeric(NA)))
p <- ggplot(df, aes(x = {{g}}, y = {{y}})) +
geom_boxplot(aes(fill = {{g}}), outlier.colour = "red", ...) +
geom_text(aes(label = .data$outlier), na.rm = TRUE, hjust = -0.3) +
theme(axis.title.x = element_blank(), legend.position = "none")
p
}
#' @param formula A model formula of the form ~ y to produce a boxplot for
#' the variable y, or of the form y ~ g to produce parallel boxplots for y
#' within levels of the grouping variable(s) g, etc., usually factors.
#' @examples
#' bboxplot(valor~padrao, centro_2015)
#' bboxplot(~valor, centro_2015)
#' data(CampoGrande)
#' bboxplot(PU ~ PADRAO, CampoGrande) # something is wrong!
#' @rdname bboxplot
#' @export
#'
bboxplot.formula <- function(formula, data, ...) {
df <- as.data.frame(data)
response <- attr(stats::terms.formula(formula, data = df),
"response")
var <- attr(stats::terms.formula(formula, data = df),
"term.labels")
if (response == 0) {
p <- bboxplot.default(y = var, data = df)
} else {
response <- colnames(df)[response]
p <- bboxplot.default(y = response,
g = var,
data = df
)
}
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.