A ggplot2 layer for drawing brackets annotated with p-values and significance testing between barplot samples.
Alex Azzo azzoa@vcu.edu
ggBrackets is a very small R package which allows the adding of brackets and significance testing between samples/observations in ggplot2, a simple task which I found difficult and finicky to accomplish natively in ggplot. ggBrackets was specifically written for use with geom_bar() and has not been tested thoroughly with other geometries yet.
The two primary functions in ggBrackets are:
gg_bracket_between()
- draws the bracket between two samples gg_ttest_between()
- peforms a 2-sample t-test and displays the
resulting p-value on the plotA third function, gg_value_between()
, allows user specified text to appear
between bars. This gives the flexibility to use the output of any statistical
method in R with ggBrackets.
Some features of the package are:
extra_y_space
parameter to fine tune the vertical location of
the brackets/p-values t_test2
function which allows summary statistics
(mean, sd, n) in the dataframe to be used instead of sample vectors That's about it!
The best way to install ggBrackets right now is by first installing
the devtools
R package.
install.packages('devtools')
library(devtools)
And then install the latest version from the Github repository using
install_github
.
install_github('azzoam/ggBrackets')
ggBrackets is designed to work with 'tidy data' formatting guidelines where
Data formatted in this way is also the most convenient to use with
ggplot2 anyways. Additionally, gg_ttest_between()
requires
library(ggplot2)
library(ggBrackets)
# Data from ToothGrowth built in R dataset
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
ToothGrowth$supp <- as.factor(ToothGrowth$supp)
tg <- data.frame()
for(dose in levels(ToothGrowth$dose)) {
for(supp in levels(ToothGrowth$supp)) {
len_mean <- mean(ToothGrowth[ToothGrowth$dose == dose & ToothGrowth$supp == supp, 'len'])
len_sd <- sd(ToothGrowth[ToothGrowth$dose == dose & ToothGrowth$supp == supp, 'len'])
len_n <- length(ToothGrowth[ToothGrowth$dose == dose & ToothGrowth$supp == supp, 'len'])
tmp_df <- data.frame(len_mean, len_sd, len_n, supp, dose)
tg <- rbind(tg, tmp_df)
}
}
rm(dose, supp, len_mean, len_n, len_sd, tmp_df)
# Standard grouped ggplot bargraph with error bars
p <- ggplot(tg, aes(x = supp, y = len_mean, fill = dose)) +
geom_bar(stat = 'identity', position = 'dodge') +
geom_errorbar(aes(ymax = len_mean + len_sd, ymin = len_mean - len_sd),
position = position_dodge(width=0.9), width = 0.25) +
xlab('Supplement') +
ylab('Tooth Length')
# Minor theme tweaks for viewability
p <- p + theme(plot.title = element_text(size = 24, hjust = 0.5),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14))
# Default ggplot2 bargraph
p
# With ggBrackets
p +
# Between OJ-dose1 and OJ-dose2
gg_bracket_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'OJ',
mean_col = 'len_mean', group_col = 'dose', group1 = 1, group2 = 2,
extra_y_space = 5) +
gg_ttest_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'OJ',
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', group_col = 'dose',
group1 = 1, group2 = 2, extra_y_space = 5)
# Multiple brackets
p +
# Between OJ-dose1 and OJ-dose 2
gg_bracket_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'OJ',
mean_col = 'len_mean', group_col = 'dose', group1 = 1, group2 = 2,
extra_y_space = 5) +
gg_ttest_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'OJ',
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', group_col = 'dose',
group1 = 1, group2 = 2, extra_y_space = 5) +
# Between VC-dose1 and VC-dose 2
gg_bracket_between(data = tg, sample_col = 'supp', sample1 = 'VC', sample2 = 'VC',
mean_col = 'len_mean', group_col = 'dose', group1 = 1, group2 = 2,
extra_y_space = 5) +
gg_ttest_between(data = tg, sample_col = 'supp', sample1 = 'VC', sample2 = 'VC',
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', group_col = 'dose',
group1 = 1, group2 = 2, extra_y_space = 5) +
# Between OJ-dose2 and VC-dose2
gg_bracket_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'VC',
mean_col = 'len_mean', group_col = 'dose', group1 = 2, group2 = 2,
extra_y_space = 10) +
gg_ttest_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'VC',
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', group_col = 'dose',
group1 = 2, group2 = 2, extra_y_space = 10)
# With stars instead of p-values
p +
# Between OJ-dose1 and OJ-dose 2
gg_bracket_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'OJ',
mean_col = 'len_mean', group_col = 'dose', group1 = 1, group2 = 2,
extra_y_space = 5) +
gg_ttest_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'OJ',
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', group_col = 'dose',
group1 = 1, group2 = 2, extra_y_space = 5, p_value_star = T) +
# Between VC-dose1 and VC-dose 2
gg_bracket_between(data = tg, sample_col = 'supp', sample1 = 'VC', sample2 = 'VC',
mean_col = 'len_mean', group_col = 'dose', group1 = 1, group2 = 2,
extra_y_space = 5) +
gg_ttest_between(data = tg, sample_col = 'supp', sample1 = 'VC', sample2 = 'VC',
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', group_col = 'dose',
group1 = 1, group2 = 2, extra_y_space = 5, p_value_star = T) +
# Between OJ-dose2 and VC-dose2
gg_bracket_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'VC',
mean_col = 'len_mean', group_col = 'dose', group1 = 2, group2 = 2,
extra_y_space = 10) +
gg_ttest_between(data = tg, sample_col = 'supp', sample1 = 'OJ', sample2 = 'VC',
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', group_col = 'dose',
group1 = 2, group2 = 2, extra_y_space = 10, p_value_star = T)
Works great for single group bar graphs as well, simply leave out the group parameters.
# Single group graph
tg_oj <- tg[tg$supp == 'OJ', c('dose', 'len_mean', 'len_sd', 'len_n')]
p <- ggplot(tg_oj, aes(x = dose, y = len_mean)) +
geom_bar(stat = 'identity') +
geom_errorbar(aes(ymax = len_mean + len_sd, ymin = len_mean - len_sd),
width = 0.25) +
xlab('OJ dose') +
ylab('Tooth Length')
p <- p + theme(plot.title = element_text(size = 24, hjust = 0.5),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14))
p +
# Between dose1 and dose2
gg_bracket_between(data = tg_oj, sample_col = 'dose', sample1 = 1, sample2 = 2,
mean_col = 'len_mean', extra_y_space = 5) +
gg_ttest_between(data = tg_oj, sample_col = 'dose', sample1 = 1, sample2 = 2,
mean_col = 'len_mean', sd_col = 'len_sd', n_col = 'len_n', extra_y_space = 5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.