#' Generate violin plots for a numeric variable over several categorical variables.
#' @param x Tibble. Table containing the categorical variables to cross.
#' @param var1 Character. Name of the main categorical variable (x).
#' @param var2 Character. Name of the numeric variable (y).
#' @param var3 Character. Name of the secondary categorical variable (z).
#' @return A scatter plot with the distributions of the variables.
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 guide_legend
#' @importFrom ggplot2 coord_flip
#' @importFrom ggplot2 scale_fill_brewer
#' @importFrom ggplot2 scale_alpha_discrete
#' @importFrom ggplot2 scale_x_discrete
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_rect
#' @importFrom ggplot2 element_line
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 rel
#' @importFrom ggplot2 facet_grid
#' @importFrom ggplot2 geom_jitter
#' @importFrom ggplot2 position_jitter
#' @importFrom ggplot2 ylab
#' @importFrom ggplot2 geom_violin
#' @importFrom ggplot2 geom_boxplot
#' @importFrom dplyr vars
#' @importFrom gridExtra grid.arrange
#' @importFrom forcats fct_rev
#' @importFrom stats na.omit
#' @importFrom stats ftable
#' @export
datexp_violin <- function(x,
var1,
var2,
var3 = "all") {
# Check that the selected variables have the right format
x <- x %>%
mutate(all = "all") %>%
mutate(all = as.factor(all)) %>%
as.data.frame()
stopifnot(!is.numeric(x[,var1]), is.numeric(x[,var2]), !is.numeric(x[, var3]))
# Bind variables
Freq <- NULL
# Prepare the table for processing
table <- x[, c(var1, var2, var3)]
names(table) <- c("var1", "var2", "var3")
table <- na.omit(table)
table2 <- as.data.frame(ftable(var1~var3, table)) %>%
mutate(var3 <- fct_rev(var3))
# Mosaic chart
plot_mosaic <- ggplot(data = table2, aes(x = var3, y = Freq, fill = var1)) +
geom_bar(position = "fill", stat = "identity", width = 0.95, alpha = 0.75) +
xlab(var1) + ylab("Proportion") +
scale_fill_brewer(palette = "Set1") +
theme(
legend.position = "none",
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
)
# Violin plot and box plot of x and y variables
violin <- ggplot(
table,
aes(x = var1, y = var2, fill = var1, color = var1)
) +
geom_jitter(
shape = 16,
position = position_jitter(0.2),
alpha = max(0.01, min(0.3, 10 / nrow(table)))
) +
geom_violin(alpha = 0.2) +
geom_boxplot(
width = 0.25,
alpha = 0.5
) +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
ylim(
min(table$var2),
max(table$var2)
) +
labs(
x = var1,
y = var2
) +
theme(
legend.position = "none",
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
)
if (var3 != "all") violin <- violin + facet_grid(var3~.)
# marginal density of x - plot on top
plot_top <- ggplot(
table,
aes(x = var1, fill = var1)
) +
geom_bar(
stat = "count",
alpha = 0.3,
color = "black"
) +
scale_x_discrete(name = var1) +
scale_y_continuous(name = "frequency") +
scale_fill_brewer(palette = "Set1") +
theme(
legend.position = "none",
axis.text = element_text(colour = "black"),
axis.title = element_text(size = 12, colour = "grey", face = "bold"),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
)
# marginal density of y - plot on the right
plot_right <- ggplot(
table,
aes(var2, fill = var1)
) +
geom_density(alpha = 0.3) +
coord_flip() +
theme(
legend.position = "none",
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
) +
xlab(var2) +
xlim(
min(table$var2),
max(table$var2)
) +
scale_fill_brewer(palette = "Set1")
if (var3 != "all") plot_right <- plot_right + facet_grid(var3 ~ .)
# arrange the plots together, with appropriate height and width for each row and column
grid.arrange(
plot_top,
plot_mosaic,
violin,
plot_right,
ncol = 2,
nrow = 2,
widths = c(2, 1),
heights = c(1, 2)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.