R/datexp_violin.R

Defines functions datexp_violin

Documented in datexp_violin

#' Generate violin plots for a numeric variable over several categorical variables.
#' @param x    Tibble. Table containing the categorical variables to cross.
#' @param var1 Character. Name of the main categorical variable (x).
#' @param var2 Character. Name of the numeric variable (y).
#' @param var3 Character. Name of the secondary categorical variable (z).
#' @return     A scatter plot with the distributions of the variables.
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 guide_legend
#' @importFrom ggplot2 coord_flip
#' @importFrom ggplot2 scale_fill_brewer
#' @importFrom ggplot2 scale_alpha_discrete
#' @importFrom ggplot2 scale_x_discrete
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_rect
#' @importFrom ggplot2 element_line
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 rel
#' @importFrom ggplot2 facet_grid
#' @importFrom ggplot2 geom_jitter
#' @importFrom ggplot2 position_jitter
#' @importFrom ggplot2 ylab
#' @importFrom ggplot2 geom_violin
#' @importFrom ggplot2 geom_boxplot
#' @importFrom dplyr vars
#' @importFrom gridExtra grid.arrange
#' @importFrom forcats fct_rev
#' @importFrom stats na.omit
#' @importFrom stats ftable
#' @export


datexp_violin <- function(x,
                          var1,
                          var2,
                          var3 = "all") {
  
  # Check that the selected variables have the right format
  x <- x %>%
    mutate(all = "all") %>%
    mutate(all = as.factor(all)) %>%
    as.data.frame()
  stopifnot(!is.numeric(x[,var1]), is.numeric(x[,var2]), !is.numeric(x[, var3]))

  # Bind variables
  Freq <- NULL
  
  # Prepare the table for processing
  table <- x[, c(var1, var2, var3)]
  names(table) <- c("var1", "var2", "var3")
  table <- na.omit(table)
  table2 <- as.data.frame(ftable(var1~var3, table)) %>%
    mutate(var3 <- fct_rev(var3))

  # Mosaic chart
  plot_mosaic <- ggplot(data = table2, aes(x = var3, y = Freq, fill = var1)) +
    geom_bar(position = "fill", stat = "identity", width = 0.95, alpha = 0.75) +
    xlab(var1) + ylab("Proportion") +
    scale_fill_brewer(palette = "Set1") +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    )

  # Violin plot and box plot of x and y variables
  violin <- ggplot(
      table,
      aes(x = var1, y = var2, fill = var1, color = var1)
    ) +
    geom_jitter(
      shape = 16,
      position = position_jitter(0.2),
      alpha = max(0.01, min(0.3, 10 / nrow(table)))
    ) +
    geom_violin(alpha = 0.2) +
    geom_boxplot(
      width = 0.25,
      alpha = 0.5
    ) +
    scale_color_brewer(palette = "Set1") +
    scale_fill_brewer(palette = "Set1") +
    ylim(
      min(table$var2),
      max(table$var2)
    ) +
    labs(
      x = var1,
      y = var2
    ) +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    )

  if (var3 != "all") violin <- violin + facet_grid(var3~.)

  # marginal density of x - plot on top
  plot_top <- ggplot(
      table,
      aes(x = var1, fill = var1)
    ) +
    geom_bar(
      stat = "count",
      alpha = 0.3,
      color = "black"
    ) +
    scale_x_discrete(name = var1) +
    scale_y_continuous(name = "frequency") +
    scale_fill_brewer(palette = "Set1") +
    theme(
      legend.position = "none",
      axis.text = element_text(colour = "black"),
      axis.title = element_text(size = 12, colour = "grey", face = "bold"),
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    )

  # marginal density of y - plot on the right
  plot_right <- ggplot(
      table,
      aes(var2, fill = var1)
    ) +
    geom_density(alpha = 0.3) +
    coord_flip() +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    ) +
    xlab(var2) +
    xlim(
      min(table$var2),
      max(table$var2)
    ) +
    scale_fill_brewer(palette = "Set1")

  if (var3 != "all") plot_right <- plot_right + facet_grid(var3 ~ .)

  # arrange the plots together, with appropriate height and width for each row and column
  grid.arrange(
    plot_top,
    plot_mosaic,
    violin,
    plot_right,
    ncol = 2,
    nrow = 2,
    widths = c(2, 1),
    heights = c(1, 2)
  )
}
NicolasJBM/datexp documentation built on May 14, 2019, 10:36 a.m.