#' Generate a graph crossing the distributions of two categorical variables.
#' @param x Dataframe. Table containing the categorical variables to cross.
#' @param var1 Character. Name of the first categorical variable (x).
#' @param var2 Character. Name of the second categorical variable (y).
#' @return A mosaic plot crossing the two selected categorical variables.
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 guide_legend
#' @importFrom ggplot2 coord_flip
#' @importFrom ggplot2 scale_fill_brewer
#' @importFrom ggplot2 scale_alpha_discrete
#' @importFrom ggplot2 scale_x_discrete
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_rect
#' @importFrom ggplot2 element_line
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 geom_histogram
#' @importFrom ggplot2 geom_tile
#' @importFrom gridExtra grid.arrange
#' @importFrom ggmosaic product
#' @importFrom ggmosaic geom_mosaic
#' @importFrom ggplot2 theme_minimal
#' @importFrom forcats fct_rev
#' @importFrom stats na.omit
#' @importFrom stats ftable
#' @export
datexp_crosscat <- function(x,
var1,
var2) {
# Check that variables are indeed categorical.
stopifnot(!is.numeric(x[,var1]), !is.numeric(x[,var2]))
# Bind variables
Freq <- NULL
# Prepare the table for processing
table <- x[, c(var1, var2)]
names(table) <- c("var1", "var2")
table <- na.omit(table)
table2 <- as.data.frame(ftable(var1~var2, table))
# Mosaic chart
plot_mosaic <-
ggplot(data = table2) +
ggmosaic::geom_mosaic(
aes(weight = Freq, x = product(var1, fct_rev(var2)), fill = factor(var1)),
na.rm = TRUE,
offset = 0.005,
alpha = 0.5
) +
labs(
x = var2,
y = "",
fill = var1
) +
guides(fill = guide_legend(reverse = TRUE)) +
coord_flip() +
scale_fill_brewer(palette = "Set1") +
theme(
plot.title = element_text(size = rel(1)),
legend.position = "none",
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
)
# Stacked bar of x and y variables
stackedbar <-
ggplot(
table,
aes(x = var1, fill = var1, alpha = var2)
) +
geom_histogram(
position = "fill",
stat = "count"
) +
theme(
legend.position = "none",
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
) +
scale_fill_brewer(palette = "Set1") +
scale_alpha_discrete(range = c(0.2, 0.7)) +
labs(x = var1, y = "Relative frequency")
# marginal density of x - plot on top
plot_top <-
ggplot(
table,
aes(x = var1, fill = var1)
) +
geom_bar(
stat = "count",
alpha = 0.3,
color = "black"
) +
scale_x_discrete(name = var1) +
scale_y_continuous(name = "frequency") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position = "none") +
theme(
axis.text = element_text(colour = "black"),
axis.title = element_text(size = 12, colour = "grey", face = "bold"),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
)
# marginal density of y - plot on the right
plot_right <-
ggplot(
table,
aes(fct_rev(var2), alpha = var2)
) +
geom_bar(
stat = "count",
color = "black"
) +
scale_x_discrete(name = var2) +
scale_y_continuous(name = "frequency") +
scale_alpha_discrete(range = c(0.2, 0.7)) +
coord_flip() +
theme(
legend.position = "none",
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
) +
xlab(var2)
# Heatmap
heatmap <-
ggplot(
table2,
aes(var1, fct_rev(var2))
) +
geom_tile(
aes(fill = Freq),
colour = "white",
alpha = 0.75
) +
scale_fill_gradient(
low = "white",
high = "black"
) +
labs(
x = var1,
y = var2
) +
theme(legend.position = "none") +
theme(
axis.text = element_text(colour = "black"),
axis.title = element_text(size = 12, colour = "grey", face = "bold"),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey81"),
panel.grid.minor = element_line(colour = "grey81")
)
# arrange the plots together, with appropriate height and width for each row and column
gridExtra::grid.arrange(
plot_top,
ggplot() + theme_minimal(),
stackedbar,
plot_right,
heatmap,
plot_mosaic,
ncol = 3,
nrow = 6,
layout_matrix = cbind(c(1, 3, 3, 3, 5, 5), c(1, 3, 3, 3, 5, 5), c(2, 4, 4, 4, 6, 6))
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.