R/datexp_scatter.R

Defines functions datexp_scatter

Documented in datexp_scatter

#' Generate a scatter plot of two numeric variable and a categorical one.
#' @param x    Tibble. Table containing the categorical variables to cross.
#' @param var1 Character. Name of the first numeric variable (x).
#' @param var2 Character. Name of the second numeric variable (y).
#' @param var3 Character. Name of the categorical variable (z).
#' @return A scatter plot with the distributions of the variables.
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 guide_legend
#' @importFrom ggplot2 coord_flip
#' @importFrom ggplot2 scale_fill_brewer
#' @importFrom ggplot2 scale_color_brewer
#' @importFrom ggplot2 scale_alpha_discrete
#' @importFrom ggplot2 scale_x_discrete
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_rect
#' @importFrom ggplot2 element_line
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 xlim
#' @importFrom ggplot2 ylim
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 geom_smooth
#' @importFrom ggplot2 geom_density_2d
#' @importFrom gridExtra grid.arrange
#' @importFrom ggmosaic product
#' @importFrom ggmosaic geom_mosaic
#' @importFrom ggplot2 theme_minimal
#' @importFrom forcats fct_rev
#' @importFrom stats na.omit
#' @importFrom stats ftable
#' @export


datexp_scatter <- function(x,
                           var1,
                           var2,
                           var3 = "all") {

  # Check that the selected variables have the appropriate format
  x <- x %>%
    mutate(all = "all") %>%
    mutate(all = as.factor(all)) %>%
    as.data.frame()
  stopifnot(is.numeric(x[,var1]), is.numeric(x[,var2]), !is.numeric(x[,var3]))

  # Select the data and prepare the table for processing
  x <- x[, c(var1, var2, var3)]
  names(x) <- c("var1", "var2", "var3")
  x$var3 <- as.factor(x$var3)
  freq <- as.data.frame(table(x$var3))
  names(freq) <- c("var3", "freq")
  x <- na.omit(x)

  # histogram
  plot_hist <-
    ggplot(
      freq,
      aes(var3, freq, fill = var3)
    ) +
    geom_bar(
      stat = "identity",
      alpha = 0.3,
      color = "black"
    ) +
    coord_flip() +
    scale_x_discrete(name = var3) +
    scale_y_continuous(name = "frequency") +
    scale_fill_brewer(palette = "Set1") +
    theme(legend.position = "none") +
    theme(
      axis.text = element_text(colour = "black"),
      axis.title = element_text(size = 12, colour = "grey", face = "bold"),
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    )

  # scatterplot of x and y variables
  scatter <-
    ggplot(
      x,
      aes(x = var1, y = var2, color = var3)
    ) +
    geom_point(alpha = 0.3) +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    ) +
    labs(
      x = var1,
      y = var2,
      color = var3
    ) +
    xlim(
      min(x$var1),
      max(x$var1)
    ) +
    ylim(
      min(x$var2),
      max(x$var2)
    ) +
    scale_color_brewer(palette = "Set1") +
    scale_fill_brewer(palette = "Set1") +
    geom_smooth(
      aes(group = var3, colour = var3),
      method = "lm"
    ) +
    geom_density_2d(alpha = 0.3)

  # marginal density of x - plot on top
  plot_top <-
    ggplot(
      x,
      aes(var1, fill = var3)
    ) +
    geom_density(alpha = 0.3) +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    ) +
    xlab(var1) +
    xlim(
      min(x$var1),
      max(x$var1)
    ) +
    scale_fill_brewer(palette = "Set1")

  # marginal density of y - plot on the right
  plot_right <-
    ggplot(
      x,
      aes(var2, fill = var3)
    ) +
    geom_density(alpha = 0.3) +
    coord_flip() +
    theme(
      legend.position = "none",
      panel.background = element_rect(fill = "white"),
      panel.grid.major = element_line(colour = "grey81"),
      panel.grid.minor = element_line(colour = "grey81")
    ) +
    xlab(var2) +
    xlim(min(x$var2), max(x$var2)) +
    scale_fill_brewer(palette = "Set1")

  # arrange the plots together, with appropriate height and width for each row and column
  grid.arrange(
    plot_top,
    plot_hist,
    scatter,
    plot_right,
    ncol = 2,
    nrow = 2,
    widths = c(2, 1),
    heights = c(1, 2)
  )
}
NicolasJBM/datexp documentation built on May 14, 2019, 10:36 a.m.