R/datexp_crosscat.R

Defines functions datexp_crosscat

Documented in datexp_crosscat

#' Generate a graph crossing the distributions of two categorical variables.
#' @param x    Dataframe. Table containing the categorical variables to cross.
#' @param var1 Character. Name of the first categorical variable (x).
#' @param var2 Character. Name of the second categorical variable (y).
#' @return A mosaic plot crossing the two selected categorical variables.
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 guide_legend
#' @importFrom ggplot2 coord_flip
#' @importFrom ggplot2 scale_fill_brewer
#' @importFrom ggplot2 scale_alpha_discrete
#' @importFrom ggplot2 scale_x_discrete
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_rect
#' @importFrom ggplot2 element_line
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 geom_histogram
#' @importFrom ggplot2 geom_tile
#' @importFrom gridExtra grid.arrange
#' @importFrom ggmosaic product
#' @importFrom ggmosaic geom_mosaic
#' @importFrom ggplot2 theme_minimal
#' @importFrom forcats fct_rev
#' @importFrom stats na.omit
#' @importFrom stats ftable
#' @export


datexp_crosscat <- function(x,
                            var1,
                            var2) {

  # Check that variables are indeed categorical.
  stopifnot(!is.numeric(x[,var1]), !is.numeric(x[,var2]))
  
  # Bind variables
  Freq <- NULL
  
  # Prepare the table for processing
  table <- x[, c(var1, var2)]
  names(table) <- c("var1", "var2")
  table <- na.omit(table)
  table2 <- as.data.frame(ftable(var1~var2, table))
  
  # Mosaic chart
  plot_mosaic <-
    ggplot(data = table2) +
    ggmosaic::geom_mosaic(
      aes(weight = Freq, x = product(var1, fct_rev(var2)), fill = factor(var1)),
      na.rm = TRUE,
      offset = 0.005,
      alpha = 0.5
    ) +
    labs(
      x = var2,
      y = "",
      fill = var1
    ) +
    guides(fill = guide_legend(reverse = TRUE)) +
    coord_flip() +
    scale_fill_brewer(palette = "Set1") +
    theme(
      plot.title = element_text(size = rel(1)),
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    )

  # Stacked bar of x and y variables
  stackedbar <-
    ggplot(
      table,
      aes(x = var1, fill = var1, alpha = var2)
    ) +
    geom_histogram(
      position = "fill",
      stat = "count"
    ) +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    ) +
    scale_fill_brewer(palette = "Set1") +
    scale_alpha_discrete(range = c(0.2, 0.7)) +
    labs(x = var1, y = "Relative frequency")

  # marginal density of x - plot on top
  plot_top <-
    ggplot(
      table,
      aes(x = var1, fill = var1)
    ) +
    geom_bar(
      stat = "count",
      alpha = 0.3,
      color = "black"
    ) +
    scale_x_discrete(name = var1) +
    scale_y_continuous(name = "frequency") +
    scale_fill_brewer(palette = "Set1") +
    theme(legend.position = "none") +
    theme(
      axis.text = element_text(colour = "black"),
      axis.title = element_text(size = 12, colour = "grey", face = "bold"),
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    )

  # marginal density of y - plot on the right
  plot_right <-
    ggplot(
      table,
      aes(fct_rev(var2), alpha = var2)
    ) +
    geom_bar(
      stat = "count",
      color = "black"
    ) +
    scale_x_discrete(name = var2) +
    scale_y_continuous(name = "frequency") +
    scale_alpha_discrete(range = c(0.2, 0.7)) +
    coord_flip() +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    ) +
    xlab(var2)

  # Heatmap
  heatmap <-
    ggplot(
      table2,
      aes(var1, fct_rev(var2))
    ) +
    geom_tile(
      aes(fill = Freq),
      colour = "white",
      alpha = 0.75
    ) +
    scale_fill_gradient(
      low = "white",
      high = "black"
    ) +
    labs(
      x = var1,
      y = var2
    ) +
    theme(legend.position = "none") +
    theme(
      axis.text = element_text(colour = "black"),
      axis.title = element_text(size = 12, colour = "grey", face = "bold"),
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    )

  # arrange the plots together, with appropriate height and width for each row and column
  gridExtra::grid.arrange(
    plot_top,
    ggplot() + theme_minimal(),
    stackedbar,
    plot_right,
    heatmap,
    plot_mosaic,
    ncol = 3,
    nrow = 6,
    layout_matrix = cbind(c(1, 3, 3, 3, 5, 5), c(1, 3, 3, 3, 5, 5), c(2, 4, 4, 4, 6, 6))
  )
}
NicolasJBM/datexp documentation built on May 14, 2019, 10:36 a.m.