#' Confidence Curves
#'
#' Confidence curves are a way to show the confidence in an estimate computed
#' from sample data. They are useful because they show all confidence levels
#' simultaneously, thereby giving a good sense of the accuracy of the estimate,
#' without forcing the researchers to make a more or less arbitrary choice for
#' one confidence level.
#'
#'
#' @param metric The metric, currently only 'd' (Cohen's \emph{d}) and 'r'
#' (Pearson's \emph{r}) are implemented.
#' @param value The value for which to create the confidence curve plot.
#' @param n The sample size for which to create the confidence curve plot. If
#' \code{n} is specified, the y axis shows confidence levels (i.e. a
#' conventional confidence curve is generated). If \code{n} is set to
#' \code{NULL}, the y axis shows sample sizes. Either \code{n} or
#' \code{conf.level} must be \code{NULL}.
#' @param conf.level The confidence level for which to create the confidence
#' curve plot. If \code{conf.level} is specified, the y axis shows sample
#' sizes. If \code{conf.level} is set to \code{NULL}, the y axis shows
#' confidence levels (i.e. a conventional confidence curve is generated).
#' Either \code{n} or \code{conf.level} must be \code{NULL}.
#' @param wRange The range of 'half-widths', or margins of error, to plot in
#' the confidence curve plot if no sample size is specified (if \code{n=NULL}).
#' @param curveSize The line size of the confidence curve line.
#' @param curveColor The color of the confidence curve line.
#' @param confRange The range of confidence levels to plot.
#' @param confLines,widthLines If a traditional confidence curve is generated,
#' lines can be added to indicate the metric values corresponding to the lower
#' and upper confidence interval bounds. For an inverse confidence curve, lines
#' can be added to inficate the metric values and sample sizes corresponding to
#' specific margins of error (or 'half-widths').
#' @param lineColor If confidence or 'interval width lines' lines are added
#' (see \code{confLines}), this is the color in which they are drawn. Specify a
#' vector (e.g. \code{\link{brewer.pal}(9, 'Set1')}) to have the colors drawn
#' in different colors for each confidence level or width.
#' @param lineSize If confidence lines or 'interval width lines' are added (see
#' \code{confLines} and \code{widthLines}), these arguments specify the color
#' and size in which they are drawn.
#' @param lineAlpha The alpha value (transparency) of the confidence lines or
#' 'interval width lines'.
#' @param xlab The label on the x axis.
#' @param steps The number of steps to use when generating the data for the
#' confidence curves' more steps yield prettier, smoother curves, but take more
#' time.
#' @param theme The \code{\link{ggplot}} theme to use.
#' @param gradient Whether to use a gradient as background to make the
#' confidence more explicit. This is experimental and pretty influential in
#' terms of how the plot looks. The default gradient, used when passing
#' \code{TRUE}, uses black as background color when the confidence is 0
#' percent, and white for 100 percent. If two colors are specified, these are
#' used instead.
#' @param gradientWidth If using a gradient, the width of the
#' \code{\link{geom_tile}} geoms used to create the gradient.
#' @param outputFile A file to which to save the plot.
#' @param outputWidth,outputHeight Width and height of saved plot (specified in
#' centimeters by default, see \code{ggsaveParams}).
#' @param ggsaveParams Parameters to pass to ggsave when saving the plot.
#' @return A \code{\link{ggplot2}} plot.
#' @author Gjalt-Jorn Peters
#'
#' Maintainer: Gjalt-Jorn Peters <gjalt-jorn@@userfriendlyscience.com>
#' @seealso \code{\link{cohensdCI}}, \code{\link{pwr.cohensdCI}},
#' \code{\link{confIntR}}, \code{\link{pwr.confIntR}}
#' @references Bender, R., Berg, G., & Zeeb, H. (2005). Tutorial: Using
#' confidence curves in medical research. \emph{Biometrical Journal}, 47(2),
#' 237-247. http://doi.org/10.1002/bimj.200410104
#'
#' Birnbaum, A. (1961). Confidence curves: An omnibus technique for estimation
#' and testing statistical hypotheses. \emph{Journal of the American
#' Statistical Association}, 56(294), 246-249.
#' http://doi.org/10.1080/01621459.1961.10482107
#' @keywords hplot
#' @examples
#'
#' ggConfidenceCurve(metric='d', value = .5, n = 128);
#' ggConfidenceCurve(metric='d', value = .5, conf.level = .95);
#'
#' @export ggConfidenceCurve
ggConfidenceCurve <- function(metric = 'd', value = NULL, n = NULL,
conf.level=NULL,
wRange = c(.05, .8),
curveSize = 1,
curveColor = 'black',
confRange = c(.0001, .9999),
confLines = c(.50, .80, .95, .99),
widthLines = c(min(wRange), .1, .2, .3, max(wRange)),
lineColor = brewer.pal(9, 'Set1'),
lineSize=1,
lineAlpha = .5,
xlab = metric,
steps=1000,
theme=theme_bw(),
gradient=NULL,
gradientWidth=.01,
outputFile = NULL,
outputWidth = 16,
outputHeight = 16,
ggsaveParams = list(units='cm',
dpi=300,
type="cairo")) {
### Check arguments
if (is.null(conf.level) && is.null(n)) {
stop("Arguments 'conf.level' and 'n' cannot both be NULL!");
}
if (is.null(value)) {
stop("You must always specify a value for your metric.");
}
if (tolower(metric) == 'd') {
if (is.null(n)) {
### So, sample sizes on the y axis, and values on the x axis.
### So we need the values (bounds) corresponding to different
### widths (which then correspond to sample sizes).
minN <- pwr.cohensdCI(d=value, w=max(wRange));
maxN <- pwr.cohensdCI(d=value, w=min(wRange));
df1 <- df2 <- data.frame(n = seq(minN, maxN, length.out=steps));
confInts <- cohensdCI(d=value,
n=seq(minN, maxN, length.out=steps),
conf.level=conf.level);
df1 <- cbind(df1, confInts[, 1]);
df2 <- cbind(df2, confInts[, 2]);
} else {
### So, confidence is on the y axis, and values on the x axis.
### So we need to the confidence interval bounds for the given
### sample size, for different confidence levels.
minConf <- min(confRange);
maxConf <- max(confRange);
df1 <- df2 <- data.frame(confidence = seq(minConf, maxConf, length.out=steps));
confInts <- matrix(unlist(lapply(df1$confidence, cohensdCI, d=value,
n=n)), ncol=2, byrow=TRUE);
df1 <- cbind(df1, confInts[, 1]);
df2 <- cbind(df2, confInts[, 2]);
### If need be, get metric values corresponding to confidence
### interval bounds
if (!is.null(confLines) && (length(confLines) > 0)) {
metricValues <- list();
for (i in seq_along(confLines)) {
metricValues[[i]] <- cohensdCI(d=value, n=n, conf.level=confLines[i]);
}
}
}
} else if (tolower(metric)=='r') {
if (is.null(n)) {
### So, sample sizes on the y axis, and values on the x axis.
### So we need the values (bounds) corresponding to different
### widths (which then correspond to sample sizes).
minN <- pwr.confIntR(r=value, w=max(wRange));
maxN <- pwr.confIntR(r=value, w=min(wRange));
df1 <- df2 <- data.frame(n = seq(minN, maxN, length.out=steps));
confInts <- matrix(unlist(lapply(df1$n, confIntR, r=value,
conf.level=conf.level)),
ncol=2, byrow=TRUE);
df1 <- cbind(df1, confInts[, 1]);
df2 <- cbind(df2, confInts[, 2]);
} else {
### So, confidence is on the y axis, and values on the x axis.
### So we need to the confidence interval bounds for the given
### sample size, for different confidence levels.
minConf <- min(confRange);
maxConf <- max(confRange);
df1 <- df2 <- data.frame(confidence = seq(minConf, maxConf, length.out=steps));
confInts <- matrix(unlist(lapply(df1$confidence, confIntR, r=value, N=n)),
ncol=2, byrow=TRUE);
df1 <- cbind(df1, confInts[, 1]);
df2 <- cbind(df2, confInts[, 2]);
### If need be, get metric values corresponding to confidence
### interval bounds
if (!is.null(confLines) && (length(confLines) > 0)) {
metricValues <- list();
for (i in seq_along(confLines)) {
metricValues[[i]] <- confIntR(r=value, N=n, conf.level=confLines[i]);
}
}
}
} else {
stop("Sorry, metric '", metric, "' is not implemented.");
}
plot <- ggplot();
if (is.null(n)) {
### Add lines to indicate margins of error ('half-widths'), if we need to
if (!is.null(widthLines) && (length(widthLines) > 0)) {
if (tolower(metric) == 'd') {
### Get the required sample sizes to obtain intervals of those widths
### given the specified sample size and confidence level.
yValues <- sapply(widthLines, pwr.cohensdCI, d=value, conf.level=conf.level);
### Then get the metric values
metricValues <- cohensdCI(d=value, conf.level=conf.level, n = yValues);
} else if (tolower(metric) == 'r') {
### Get the required sample sizes to obtain intervals of those widths
### given the specified sample size and confidence level.
yValues <- sapply(widthLines, pwr.confIntR, r=value, conf.level=conf.level);
### Then get the metric values
metricValues <- confIntR(r=value, conf.level=conf.level, N = yValues);
}
metricValueLabels <- ifelseObj(tolower(metric) == 'r',
formatR(metricValues),
round(metricValues, 2));
yValueLabels <- rev(paste0(yValues, " (MoE=", round(widthLines, 2), ", total width=",
round(2*widthLines, 2), ")"));
if (length(lineColor) < length(confLines)) {
lineColor <- rep(lineColor, each=widthLines)[1:length(widthLines)];
}
for (i in seq_along(widthLines)) {
plot <- plot +
geom_hline(yintercept = yValues[i],
color=lineColor[i],
size=lineSize,
alpha=lineAlpha) +
geom_vline(xintercept = metricValues[i, 1],
color=lineColor[i],
size=lineSize,
alpha=lineAlpha) +
geom_vline(xintercept = metricValues[i, 2],
color=lineColor[i],
size=lineSize,
alpha=lineAlpha);
}
}
names(df1) <- names(df2) <- c('n', metric);
plot <- plot +
geom_line(data=df1, aes_string(x=metric, y='n'),
color = curveColor, size = curveSize) +
geom_line(data=df2, aes_string(x=metric, y='n'),
color = curveColor, size = curveSize) +
ggtitle(paste0("Inverse Confidence Curve for ",
metric, " = ", value, " and ", round(100*conf.level, 2), "% confidence"));
} else {
### Add gradient, if desired
if (!is.null(gradient)) {
if (length(gradient) == 1 && isTrue(gradient)) {
gradient <- c("black", "white")
}
if (!(length(gradient)==2)) {
stop("If specifying a gradient, specify exactly two values!");
} else {
if (!all(areColors(gradient))) {
stop("Not all values specified in 'gradient' are colors!");
} else {
names(df1) <- c('confidence', metric);
names(df2) <- c('confidence', metric);
tmpDf <- rbind(df1, df2);
plot <- plot +
geom_tile(data=tmpDf, aes_string(x=metric, y='confidence', fill='confidence'),
height=Inf, width=gradientWidth) +
scale_fill_gradient2(low = gradient[2], mid = gradient[1], high = gradient[2], midpoint = 0);
}
}
}
### Add lines to indicate confidence intervals, if we need to
if (!is.null(confLines) && (length(confLines) > 0)) {
metricValues <- round(sort(unlist(metricValues)), 2);
metricValueLabels <-
ifelseObj((tolower(metric) == 'r'),
noZero(metricValues),
metricValues);
yValues <- confLines;
yValueLabels <- confLines;
if (length(lineColor) < length(confLines)) {
lineColor <- rep(lineColor, each=confLines)[1:length(confLines)];
}
for (i in seq_along(confLines)) {
plot <- plot +
geom_hline(yintercept = confLines[i],
color=lineColor[length(confLines)+1-i],
size=lineSize,
alpha=lineAlpha) +
geom_vline(xintercept = metricValues[i],
color=lineColor[i],
size=lineSize,
alpha=lineAlpha) +
geom_vline(xintercept = metricValues[length(metricValues)+1-i],
color=lineColor[i],
size=lineSize,
alpha=lineAlpha);
}
}
names(df1) <- names(df2) <- c('confidence', metric);
df1 <- rbind(c(1, -Inf), df1);
df2 <- rbind(df2, c(1, Inf));
plot <- plot +
geom_line(data=df1, aes_string(x=metric, y='confidence'),
color = curveColor, size = curveSize) +
geom_line(data=df2, aes_string(x=metric, y='confidence'),
color = curveColor, size = curveSize) +
ggtitle(paste0("Confidence Curve for ",
metric, " = ", value, " and n = ", n));
}
if ((!is.null(n) && !is.null(confLines) && (length(confLines) > 0))
|| (!is.null(conf.level) && !is.null(widthLines) && (length(widthLines) > 0))) {
plot <- plot +
scale_x_continuous(sec.axis = dup_axis(breaks=metricValues,
labels=metricValueLabels,
name="")) +
scale_y_continuous(sec.axis = dup_axis(breaks=round(sort(unlist(yValues)), 2),
labels=yValueLabels,
name=""));
}
plot <- plot + theme + xlab(xlab);
if (!is.null(outputFile)) {
ggsaveParameters <- c(list(filename = outputFile,
plot = plot,
width = outputWidth,
height = outputHeight),
ggsaveParams);
do.call(ggsave, ggsaveParameters);
}
return(plot);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.