Nothing
#' Box plot using ggplot
#'
#' This function provides a simple interface to create a \code{\link{ggplot}}
#' box plot, organising different boxplots by levels of a factor is desired,
#' and showing row numbers of outliers.
#'
#' This function is based on JasonAizkalns' answer to a question on Stack
#' Exchange (Cross Validated; see
#' \url{https://stackoverflow.com/questions/33524669/labeling-outliers-of-boxplots-in-r}).
#'
#' @param dat Either a vector of values (to display in the box plot) or a
#' dataframe containing variables to display in the box plot.
#' @param y If \code{dat} is a dataframe, this is the name of the variable to
#' make the box plot of.
#' @param x If \code{dat} is a dataframe, this is the name of the variable
#' (normally a factor) to place on the X axis. Separate box plots will be
#' generate for each level of this variable.
#' @param labelOutliers Whether or not to label outliers.
#' @param outlierColor If labeling outliers, this is the color to use.
#' @param theme The theme to use for the box plot.
#' @param \dots Any additional arguments will be passed to
#' \code{\link{geom_boxplot}}.
#' @return A \code{\link{ggplot}} plot is returned.
#' @author Jason Aizkalns; implemented in this package (and tweaked a bit) by
#' Gjalt-Jorn Peters.
#'
#' Maintainer: Gjalt-Jorn Peters <gjalt-jorn@@userfriendlyscience.com>
#' @seealso \code{\link{geom_boxplot}}
#' @keywords hplot
#' @examples
#'
#' ### A box plot for miles per gallon in the mtcars dataset:
#' ggBoxplot(mtcars$mpg);
#'
#' ### And separate for each level of 'cyl' (number of cylinder):
#' ggBoxplot(mtcars, y='mpg', x='cyl');
#'
#' @export ggBoxplot
ggBoxplot <- function(dat, y = NULL, x = NULL,
labelOutliers = TRUE,
outlierColor = 'red',
theme = ggplot2::theme_bw(), ...) {
if (is.null(x) && is.null(y)) {
if (is.numeric(dat)) {
if (length(dat) < 5) {
stop("If both arguments 'x' and 'y' are NULL, the first argument, 'dat, ",
"should be a vector of values, but it's only ", length(dat),
" elements long, which isn't enough to generate a boxplot."); }
varname <- deparse(substitute(dat));
### Take variable only in case a variable in a dataframe was specified
varname <- extractVarName(varname);
tmpDf <- data.frame(dat);
names(tmpDf) <- varname;
tmpDf$outlier <- ifelse(iqrOutlier(tmpDf[, varname]),
1:nrow(tmpDf),
as.numeric(NA));
resPlot <- ggplot2::ggplot(tmpDf,
ggplot2::aes_string(y=varname)) +
ggplot2::geom_boxplot(ggplot2::aes(x=factor(varname)), ...) +
ggplot2::xlab("") + theme +
ggplot2::theme(axis.text.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank());
if (labelOutliers) {
resPlot <- resPlot +
ggrepel::geom_text_repel(ggplot2::aes_string(x='1',
label = 'outlier'),
na.rm = TRUE,
color = outlierColor);
}
return(resPlot);
} else {
stop("If both arguments 'x' and 'y' are NULL, the first argument, 'dat, ",
"should be a vector, but instead, it has class '", class(dat),
"'.");
}
} else {
if (is.null(y)) {
stop("Argument 'y' should be a text string specifying a variable in the ",
"dataframe specified by 'dat'; instead, no 'y' is specified.");
}
if (length(y) > 1) {
warning("If argument 'y' is specified, it should be the name of a ",
"variable in the dataframe specified by argument 'dat'. However, ",
"'y' had ", length(y), "elements. Discarding all but the last one.");
}
if (!(y %in% names(dat))) {
stop("Argument 'y' should be a text string specifying a variable in the ",
"dataframe specified by 'dat', but '", y, "' isn't among ",
"names(dat). Please check your spelling, and remember that R is ",
"case sensitive: it matters whether you use capitals or not!");
}
if (is.null(x)) {
dat$outlier <- ifelse(iqrOutlier(dat[, y]),
1:nrow(dat),
as.numeric(NA));
resPlot <-
ggplot2::ggplot(dat, ggplot2::aes_string(y=y)) +
ggplot2::geom_boxplot(ggplot2::aes(x=factor(y)), ...) +
ggplot2::xlab("") + theme +
ggplot2::theme(axis.text.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank());
if (labelOutliers) {
resPlot <- resPlot +
ggrepel::geom_text_repel(ggplot2::aes_string(x='1',
label = 'outlier'),
na.rm = TRUE,
color = outlierColor);
}
return(resPlot);
} else {
if (length(x) > 1) {
warning("If argument 'x' is specified, it should be the name of a ",
"variable (a factor, normally) in the dataframe",
" specified by argument 'dat'. However, ",
"'x' had ", length(x), "elements. Discarding all but the last one.");
}
if (!(x %in% names(dat))) {
stop("Argument 'x' should be a text string specifying a variable in the ",
"dataframe specified by 'dat', but '", x, "' isn't among ",
"names(dat). Please check your spelling, and remember that R is ",
"case sensitive: it matters whether you use capitals or not!");
} else if (!is.factor(dat[, x])) {
dat[, x] <- factor(dat[, x]);
}
### Based on JasonAizkalns' answer at
### https://stackoverflow.com/questions/33524669/labeling-outliers-of-boxplots-in-r
dat <-
plyr::ddply(dat, x,
function(datF) {
datF$outlier <- iqrOutlier(datF[, y]);
return(datF);
});
dat$outlier <- ifelse(dat$outlier,
1:nrow(dat),
as.numeric(NA));
resPlot <- ggplot2::ggplot(dat, ggplot2::aes_string(y=y, x=x)) +
ggplot2::geom_boxplot(...) +
theme;
if (labelOutliers) {
resPlot <- resPlot +
ggrepel::geom_text_repel(ggplot2::aes_string(label = 'outlier'),
na.rm = TRUE,
color=outlierColor);
}
return(resPlot);
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.