R/raw_code/geom_rbar.R

Defines functions geom_rbar

Documented in geom_rbar

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#' Creates a radial barchart which shows a cluster's average feature values
#'
#' @inheritParams ggplot2::layer
#' @seealso \code{\link[ggplot2]{layer}}
#' @param data The dataset as data frame / tibble. Should be scaled. See ?scale.
#' @param stat  The statistical transformation to use on the data, as a string;
#'   recommendation - `identity` as it keeps the data as it is.
#' @param position Position adjustment, either as a string, or the result of
#'   a call to a position adjustment function, recommendation - `identity` so it doesn't adjust position.
#' @param na.rm A length-one logical vector. Should missing values (including NaN) be removed?
#'   If `FALSE`, the default, missing values are removed with
#'   a warning. If `TRUE`, missing values are silently removed.
#' @param ... Other arguments passed on to [`layer()`]. These are
#'   often aesthetics, used to set an aesthetic to a fixed value, like
#'   `color = "red"` or `size = 3`. They may also be parameters
#'   to the paired geom/stat.
#'
#' @param cluster_idx A length-one integer vector. The Index of cluster of interest. e.g. 1L, 2L etc.
#' @param cluster_assignment A integer vector with the cluster membership assignment. (optional)
#' @param cluster_phase  Character vector with the time point of the recording, e.g there are two unique time points, `"T0"` and `"T1"`. (optional)
#' @param phase_present A length-one logical vector, the default `FALSE`.
#' @param cluster_name Name of the cluster. (optional)
#' @param cluster_abbrev Prefix of cluster_idx. e.g. `"PT1"`, `"PT2"`
#' @param colour_clusters for color clusters
#' @param scale_rng Min and max values to be shown.
#' @param data_dict For internal use.
#' @param delta_threshold For internal use.
#' @param group_names Character vector with group names of features. Group names are displayed in the inner circle. (optional)
#' @param show_group_names A length-one logical vector, the default `FALSE, does not display group_names in inner circle.
#' @param unique_id Numerical vector with unique identifiers for each observation. (optional)
#'
#' @details # Warning
#' * The visualization only works with a polar coordinate system.(set internally)
#' * If phase_present = TRUE, then cluster_phase and unique_id must be provided.
#' * If cluster_assignment is not provided; there will be only one cluster for observations.
#' * If group_names is not provided, there will be only one group for all features.
#'
#' @section Aesthetics:
#' The following aesthetics are understood (required are in bold):
#' \itemize{
#'    \item \strong{`x`}, A variable with names of features
#'    \item \strong{`y`}, A variable with values for the corresponding feature names in `x`
#'    \item `fill`, Affects fill color
#'    \item `size`,
#'    \item `color_inner_circle`,
#'    \item `standard_error`,
#' }
#'
#' @return the radial barchart
#'
#' @name geom_rbar
#'
#' @export
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
geom_rbar <- function(mapping = NULL,
                      data = NULL,
                      stat = "identity",
                      position = "identity",
                      ...,
                      na.rm = TRUE,
                      show.legend = NA,
                      inherit.aes = TRUE,
                      cluster_idx = NULL,
                      cluster_assignment = NULL,
                      phase_present = FALSE,
                      cluster_phase = NULL,
                      cluster_name = NULL,
                      cluster_abbrev = NULL,
                      colour_clusters = NULL,
                      scale_rng = c(-1, 1) * 1.5,
                      data_dict = NULL,
                      delta_threshold = 0.25,
                      group_names = NULL,
                      show_group_names = FALSE,
                      unique_id = NULL) {
  ggplot2::layer(
    data = data,
    mapping = mapping,
    geom = GeomRbar,
    stat = stat,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      na.rm = na.rm,
      cluster_assignment = cluster_assignment,
      cluster_idx = cluster_idx,
      phase_present = phase_present,
      cluster_phase = cluster_phase,
      cluster_name = cluster_name,
      cluster_abbrev = cluster_abbrev,
      colour_clusters = colour_clusters,
      scale_rng = scale_rng,
      data_dict = data_dict,
      delta_threshold = delta_threshold,
      group_names = group_names,
      show_group_names = show_group_names,
      unique_id = unique_id,
      ...
    )
  )
}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#' GeomRbar
#'
#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
GeomRbar <- ggplot2::ggproto("GeomRbar", ggplot2::Geom,
  required_aes = c("x", "y"),
  default_aes = ggplot2::aes(
    fill = "grey60",
    size = 1.25,
    color_inner_circle = "grey90",
    standard_error = TRUE
  ),

  # setup_params --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  # Used to setup defaults that are needed to complete the dataset, and to inform the user of important choices. returns a list of parameters.


  setup_params = function(data, params) {

    if (is.null(data) || nrow(data) == 0) {
      rlang::inform("No data : Nothing to plot. The dataset must not be empty.")
      return(ggplot2::zeroGrob())
    }

    if (length(data$x) != length(data$y)) {
      rlang::abort("Length of 'x' and 'y' aesthetic is different ")
    }

    # if (is.numeric(data$x)) {
    #   rlang::abort("The required aesthetic 'x' must be character or factor variable of feature names.")
    # }   # bug because x is changed into numerical by setup_params

    if (!is.numeric(data$y)) {
      rlang::abort("The required aesthetic 'y' must contain numerical values for features in 'x'.")
    }

    if (isFALSE(tibble::is_tibble(data))) {
      data <- tibble::as_tibble(data)
      rlang::inform("The dataset was converted to tibble.")
    }

    if (!is.factor(data$x)) {
      data$x <- as.factor(data$x)
      rlang::inform("The required aesthetic 'x' was converted to a factor variable.")
    }

    # df <- tibble::tibble(f = data$x, v= data$y)

    # browser()

    # Checks for Parameters ---------------------------------------------------

    # Strict aborts

    # coordinate system

    # if (is.null(params$coordinate_system) || (!(ggplot2::is.Coord(params$coordinate_system)))) {
    #   rlang::abort("Please provide a coordinate system; only 'coord_polar' is acceptable.")
    # }



     # cluster_assignment, cluster_idx, group_names

    if (!is.null(params$cluster_assignment)) {
      # if (!is.integer(params$cluster_assignment)) {
      #   rlang::abort("'cluster_assignment': wrong datatype. Integer datatype expected.")
      # }

      if (length(params$cluster_assignment) != length(data$x)) {
        rlang::abort("Length of cluster_assignment is different from 'x' and 'y' aesthetic ")
      }

      if (!is.factor(params$cluster_assignment)) {
        params$cluster_assignment <- as.factor(params$cluster_assignemnet)

        rlang::inform("'cluster_assignment' converted to a factor variable, if not already.")
      }
    }

    if (!is.null(params$cluster_idx)) {
      if (!is.integer(params$cluster_idx)) {
        rlang::abort("'cluster_idx' must be a integer value.")
      }
    }

    if (!is.null(params$group_names)) {
      if (is.logical(params$group_names) || is.numeric(params$group_names)) {
        rlang::abort("'group_names' : wrong datatype. Character or factor datatype expected.")
      }

      if (length(params$group_names) != length(data$x)) {
        rlang::abort("Length of 'group_names' is different from 'x' and 'y' aesthetic ")
      }

      if (!is.factor(params$group_names)) {
        params$group_names <- as.factor(params$group_names)

        rlang::inform("'group_names' converted to factor, if it is not already.")
      }
    }

    # show_group_names

    if (!is.logical(params$show_group_names) || length(params$show_group_names) != 1) {
      rlang::abort("'show_group_names' must be logical either 'TRUE' or 'FALSE' and Length must be 1.")
    }

    # phase_present with -  cluster_assignment,  cluster_phase, unique_id

    if (!is.logical(params$phase_present) || length(params$phase_present) != 1) {
      rlang::abort("phase_present must be logical either 'TRUE' or 'FALSE' and Length must be 1.")
    }

    if (isTRUE(params$phase_present) && is.null(params$cluster_assignment)) {
      rlang::abort("If 'phase_present = TRUE', you must provide 'cluster_assignment'.")
    }

    if (isTRUE(params$phase_present) && is.null(params$cluster_phase)) {
      rlang::abort("If 'phase_present = TRUE', you must provide 'cluster_phase'.")
    }

    if (isTRUE(params$phase_present) && is.null(params$unique_id)) {
      rlang::abort("If 'phase_present = TRUE', you must provide 'unique_id's.")
    }

    if (!is.null(params$cluster_phase)) {
      if (!is.character(params$cluster_phase)) {
        rlang::abort("'cluster_phase' : wrong datatype. Character datatype expected.")
      }

      if (length(params$cluster_phase) != length(df$f)) {
        rlang::abort("Length of 'cluster_phase' is different from 'x' and 'y' aesthetic ")
      }

      if (!is.factor(params$cluster_phase)) {
        params$cluster_phase <- as.factor(params$cluster_phase)

        rlang::inform(" 'cluster_phase' converted to factor, if it is not already.")
      }
    }

    if (!is.null(params$unique_id)) {
      if (!is.integer(params$unique_id)) {
        rlang::abort("'unique_id' must be a integer column.")
      }

      if (length(params$unique_id) != length(df$f)) {
        rlang::abort("Length of 'unique_id' is different from 'x' and 'y' aesthetic ")
      }
    }


    # Warnings ----------------------------------------------------------------

    # cluster_name and cluster_abbrev

    if (!is.character(params$cluster_name) || stringr::str_length(params$cluster_name) > 15) {
      rlang::warn("Recommendation : 'Name' in character and length less than or equal to 15; text may not fit.")
    }

    if (!is.character(params$cluster_abbrev) || stringr::str_length(params$cluster_abbrev) > 6) {
      rlang::warn("Recommendation : 'Abbreviation' in character and length less than or equal to 4; text may not fit.")
    }

    list(
      na.rm = params$na.rm,
      cluster_assignment = params$cluster_assignment,
      cluster_idx = params$cluster_idx,
      phase_present = params$phase_present,
      cluster_phase = params$cluster_phase,
      cluster_name = params$cluster_name,
      cluster_abbrev = params$cluster_abbrev,
      color_clusters = params$color_clusters,
      scale_rng = params$scale_rng,
      data_dict = params$data_dict,
      delta_threshold = params$delta_threshold,
      group_names = params$group_names,
      show_group_names = params$show_group_names,
      unique_id = params$unique_id
    )
    #browser()
  },


  # setup_data ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  # returns modified data.
  setup_data = function(data, params,
                        cluster_idx = NULL,
                        cluster_assignment = NULL,
                        phase_present = FALSE,
                        cluster_phase = NULL,
                        cluster_name = NULL,
                        cluster_abbrev = NULL,
                        colour_clusters = NULL,
                        scale_rng = c(-1, 1) * 1.5,
                        data_dict = NULL,
                        delta_threshold = 0.25,
                        group_names = NULL,
                        show_group_names = FALSE,
                        unique_id = NULL) {

    data <- tibble::tibble(f = data$x,
                           v = data$y)

    # Modify Data

    if (!is.null(cluster_assignment)) {
      data <- data %>% dplyr::mutate(.cluster = cluster_assignment)
    } else {
      cluster_values <- c("C1")
      cluster_levels <- c("C1")

      data <- data %>% dplyr::mutate(.cluster = factor(cluster_values, levels = cluster_levels))

      cluster_assignment <- data$.cluster

      rlang::inform("A new factor variable was created, that contains cluster membership,\nSince you did not specify 'cluster_assignement', only one cluster exists.")
    }

    unique_clusters <- sort(unique(cluster_assignment))

    if (is.null(cluster_idx)) cluster_idx <- unique_clusters

    # if (cluster_idx > length(unique_clusters)) rlang::abort("The Cluster value, you specified does not exist.")


    auto_colors <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, "Set1"))(length(unique_clusters))


    # color_clusters parameter
    if (is.null(colour_clusters)) {
      colour_clusters <- if (!is.null(cluster_idx)) NA_character_ else auto_colors
    }

    if (!is.null(group_names)) {
      data <- data %>% dplyr::mutate(feature_groups = group_names)
    } else {
      group_values <- c("SingleGroup")
      group_levels <- c("SingleGroup")

      data <- data %>% dplyr::mutate(feature_groups = factor(group_values, levels = group_levels))

      # params$group_names <- data$feature_groups

      rlang::inform("A new factor variable was created, that conatins 'group_names'.\nSince you did not specify group_names, only one group exists.")
    }


    if (!is.null(cluster_phase)) {
      data <- data %>% dplyr::mutate(.phase = cluster_phase)
    } else {
      data <- data %>% dplyr::mutate(.phase := NA_character_)
    }

    if (!is.null(unique_id)) {
      data <- data %>% dplyr::mutate(.id = unique_id)
    } else {
      data <- data %>% mutate(.id := NA_character_)
    }

    # browser()

    ## just for checking; we have required columns in data- for testing purposes only
    vars_dummy <- setdiff(c(".phase", ".id", ".cluster", "feature_groups"), names(data))

    if (length(vars_dummy) > 0) {
      rlang::abort("Data doesn't have required structure to build the plot")
    }



    ####  Calculate cluster average for each feature
    if (!is.null(cluster_idx)) {
      rlang::inform("Calculating cluster average for each feature")

      data <- data %>% dplyr::filter(.cluster == levels(.cluster)[cluster_idx]) # now contains observations from one cluster(cluster of interest) e.g C2 has 23713 observations
    }

    if (!is.null(cluster_idx)) {
      data_cluster <- data %>%
        dplyr::mutate(.cluster = forcats::fct_drop(as.factor(.cluster))) %>% # fct_drop - drops unused levels
        dplyr::mutate(.cluster = factor(.cluster, labels = paste(cluster_abbrev, levels(.cluster)))) # add cluster_abbrev to .cluster e.g : PT C2
    } else {
      data_cluster <- data
    }



    # # Observations that are assigned to the cluster of interest
    # obs_ids_cluster <- which(df$.cluster == cluster_idx)
    #
    # browser()

    # If phase_present, filter observations that have 2 measurements
    if (isTRUE(phase_present)) {
      data_cluster <- data_cluster %>%
        dplyr::group_by(.id) %>%
        dplyr::mutate(n = dplyr::n_distinct(.phase)) %>%
        dplyr::filter(n == 2) %>%
        dplyr::select(-n) %>%
        dplyr::arrange(.id) %>%
        dplyr::ungroup()
    }

    data <- data_cluster %>%
      dplyr::group_by(f) %>%
      dplyr::add_count(.cluster) %>%
      dplyr::ungroup() %>%
      dplyr::group_by(.phase, .cluster, f, feature_groups) %>%
      dplyr::summarize(avg = mean(v), sd = sd(v), n = n[1], .groups = "keep") %>%
      dplyr::ungroup() %>%
      dplyr::mutate(error = stats::qnorm(0.975) * sd / sqrt(n)) %>%
      # winsorize cluster averages
      dplyr::mutate(avg = ifelse(avg > scale_rng[2], scale_rng[2], avg)) %>%
      dplyr::mutate(avg = ifelse(avg < scale_rng[1], scale_rng[1], avg)) %>%
      dplyr::mutate(sd = dplyr::if_else(avg < 0, sd, -sd)) %>%
      dplyr::arrange(feature_groups, f, .cluster)

    browser()
  },





  # draw_group ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

  draw_group = function(self, data, params,
                        cluster_idx = NULL,
                        cluster_assignment = NULL,
                        phase_present = FALSE,
                        cluster_phase = NULL,
                        cluster_name = NULL,
                        cluster_abbrev = NULL,
                        colour_clusters = NULL,
                        scale_rng = c(-1, 1) * 1.5,
                        data_dict = NULL,
                        delta_threshold = 0.25,
                        group_names = NULL,
                        show_group_names = FALSE,
                        unique_id = NULL) {


    # q_id is the group rank
    # f_id is the feature id after inserting a space of 2 units between each q_id
    df_plot <- data %>%
      dplyr::group_by(.cluster) %>%
      dplyr::mutate(id = as.double(dplyr::row_number())) %>%
      dplyr::mutate(q_id = dplyr::dense_rank(feature_groups)) %>%
      dplyr::mutate(f_id = dplyr::if_else(q_id == 1, id, id + 2 * (q_id - 1))) %>%
      dplyr::mutate(f_id_adj = dplyr::case_when(
        isTRUE(phase_present) & .phase == levels(.phase)[1] ~ f_id + 0.15,
        isTRUE(phase_present) & .phase == levels(.phase)[2] ~ f_id - 0.15,
        TRUE ~ f_id
      )) %>%
      tidyr::complete(f_id = -1:(max(f_id) + 2), fill = list(feature = "", avg = NA_real_)) %>%
      dplyr::ungroup()



    # base_data ----
    # position of group lines and labels
    base_data <- df_plot %>%
      tidyr::drop_na(-.phase) %>%
      dplyr::group_by(feature_groups) %>%
      dplyr::summarize(
        start = min(f_id_adj),
        end = max(f_id_adj),
        title = start + 0.5 * (end - start)
      ) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(hjust = dplyr::case_when(
        dplyr::between(title / max(end), 0.075, 0.425) ~ 1,
        dplyr::between(title / max(end), 0.575, 0.925) ~ 0,
        TRUE ~ 0.5
      )) %>%
      dplyr::mutate(hjust = dplyr::if_else(title / max(end) > 0.9, 0, hjust)) %>%
      dplyr::mutate(hjust = dplyr::case_when(
        title / max(end) < 0.5 ~ 1,
        TRUE ~ 0
      )) %>%
      dplyr::mutate(vjust = dplyr::case_when(
        title / max(end) < 0.1 | title / max(end) > 0.9 ~ 1,
        dplyr::between(title / max(end), 0.4, 0.6) ~ 0,
        TRUE ~ 0.5
      ))

    if (isTRUE(params$show_group_names)) {
      base_data <- base_data %>%
        dplyr::mutate(feature_groups = factor(feature_groups, labels = seq_along(levels(feature_groups))))
    }



    # grid_data ----
    # position of grid lines between groups
    grid_data <- df_plot %>%
      # dplyr::filter(.cluster == .cluster[1]) %>%
      dplyr::filter(is.na(id)) %>%
      dplyr::select(f_id) %>%
      dplyr::mutate(diff = f_id - dplyr::lag(f_id, default = f_id[1])) %>%
      dplyr::mutate(g = cumsum(diff > 1) + 1) %>%
      dplyr::select(-diff) %>%
      dplyr::group_by(g) %>%
      dplyr::summarize(start = f_id[1], end = f_id[2]) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(y = list(seq(scale_rng[1], scale_rng[2], 0.5))) %>%
      tidyr::unnest(y)

    # label_data ----
    # position of feature names
    label_data <- df_plot %>%
      dplyr::mutate(f_id_min = min(f_id) - 2, f_id_max = max(f_id) + 2) %>%
      dplyr::filter(!is.na(f)) %>%
      dplyr::mutate(flag = !is.null(cluster_idx) && phase_present) %>%
      dplyr::mutate(y = dplyr::if_else(flag, avg + error, avg)) %>%
      dplyr::group_by(f, f_id_min, f_id_max) %>%
      dplyr::summarize(f_id = mean(f_id), avg = max(avg), y = max(y)) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(rel_pos = f_id / diff(c(f_id_min[1], f_id_max[1]))) %>%
      dplyr::select(f_id_adj, f, avg) %>%
      dplyr::mutate(feature_suffix = stringr::str_trunc(feature_suffix, 12)) %>%
      dplyr::mutate(y = dplyr::if_else(y > 0, y, 0)) %>%
      dplyr::mutate(angle = 90 - 360 * (rel_pos + 0.035)) %>%
      dplyr::mutate(angle = 90 - 360 * ((dplyr::row_number() + 2 - 0.5) / (n() + 4))) %>%
      dplyr::mutate(hjust = dplyr::if_else(angle < -90, 1, 0)) %>%
      dplyr::mutate(angle = dplyr::if_else(angle < -90, angle + 180, angle)) %>%
      tidyr::drop_na() %>%
      dplyr::mutate(size = dplyr::if_else(avg >= 0 & (avg * 5 + nchar(f) > 20), 7 / .pt, 8 / .pt))

    if (!is.null(data_dict)) {
      label_data <- label_data %>%
        dplyr::left_join(data_dict %>% dplyr::filter(!is.na(label)) %>% dplyr::select(f = label, f_desc = description), by = "f")
    } else {
      label_data <- label_data %>% dplyr::mutate(f_desc = NA_character_)
    }


    # browser()
    if (!is.null(cluster_idx)) {
      if (phase_present) {
        cluster_label <- paste0(cluster_abbrev, " ", cluster_idx, " (n=", sum(df$.cluster == cluster_idx), ")")
      } else {
        cluster_label <- paste0(
          cluster_abbrev, " ", cluster_idx, "\n",
          "n=", dplyr::n_distinct(data_cluster$.id), "/",
          sum(cluster_assignment == cluster_idx)
        )
      }

      if (!is.null(cluster_name)) {
        cluster_label <- paste0(cluster_label, "\n", cluster_name)
      }
    }

    x_lim <- c(-2, 2) + range(df_plot$f_id)
    y_lim <- c(
      scale_rng[1] - (scale_rng[2] - scale_rng[1]) * c(0.5, 1)[show_group_names + 1],
      scale_rng[2] + 1.05
    )



    if (isTRUE(phase_present)) {
      teffect_segments <- df_plot %>%
        dplyr::select(.phase, f, avg, f_id_adj) %>%
        dplyr::filter(!is.na(avg)) %>%
        dplyr::pivot_wider(id_cols = f, names_from = .phase, values_from = c(avg, f_id_adj)) %>%
        dplyr::mutate(effect = avg_A - avg_E) %>%
        dplyr::mutate(effect_category = dplyr::case_when(
          effect > delta_threshold ~ 1,
          effect < -delta_threshold ~ 3,
          TRUE ~ 2
        )) %>%
        dplyr::mutate(effect_category = factor(effect_category,
          levels = 1:3,
          labels = c(
            "decreased",
            paste0("unchanged ($\\Delta\\leq\\pm$", delta_threshold, " SD)"),
            "increased"
          )
        ))

      label_data <- label_data %>% dplyr::left_join(teffect_segments %>% dplyr::select(f, effect_category), by = "f")
    }


    ### Start building up the radar plot
    theme_rbar <- ggplot2::theme_minimal() +
      ggplot2::theme(
        legend.position = "top",
        axis.text = element_blank(),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        legend.title = element_text(size = 10, face = "bold"),
        legend.text = element_text(size = 8),
        legend.key.size = unit(0.8, "cm")
      )

    # create ggplot2 object ----

    p <- ggplot2::ggplot(df_plot)
    p <- p + ggplot2::coord_polar()
    p <- p + ggplot2::scale_x_continuous(
      limits = x_lim, expand = c(0, 0),
      breaks = df_plot$f_id, labels = df_plot$f
    )
    p <- p + ggplot2::scale_y_continuous(limits = y_lim)
    p <- p + ggplot2::guides(fill = FALSE)
    p <- p + theme_rbar

    # draw inner circle ----
    p <- p + ggplot2::annotate("rect",
      xmin = x_lim[1], xmax = x_lim[2],
      ymin = y_lim[1], ymax = scale_rng[1] - 0.2,
      color = "transparent", fill = data$color_inner_circle, alpha = 0.2
    )

    if (!is.null(cluster_idx)) {
      # central inner label: cluster id and number of instances ----
      p <- p +
        ggplot2::annotate("text",
          x = x_lim[1], y = y_lim[1] + 0.5,
          label = cluster_label,
          size = 10 / .pt, fontface = "bold", lineheight = 1, vjust = 0.5
        )

      # the actual bars ----
      current_opts <- list(
        data = dplyr::filter(df_plot, !is.na(avg)),
        mapping = ggplot2::aes(x = f_id_adj, y = avg, fill = avg),
        width = 0.7
      )
      p <- p + do.call(geom_col, current_opts)
      p <- p + ggplot2::guides(fill = ggplot2::guide_colorbar(
        title = NULL, ticks.colour = "grey", nbin = 300,
        ticks.linewidth = 1.5
      ))
      p <- p + ggplot2::theme(legend.position = c(0.975, 0.975))
      p <- p + ggplot2::theme(legend.justification = c(1, 1))
      p <- p + ggplot2::theme(legend.direction = "vertical")
      p <- p + ggplot2::theme(legend.key.width = ggplot2::unit(0.5, "lines"))
      p <- p + ggplot2::theme(legend.key.height = ggplot2::unit(1, "lines"))

      p <- p + scale_fill_distiller(
        palette = "RdYlBu",
        limits = c(scale_rng[1], scale_rng[2]),
        breaks = seq(scale_rng[1], scale_rng[2], length.out = 7),
        labels = c(
          "-1.5 SD", "-1.0 SD", "-0.5 SD", "Average",
          "+0.5 SD", "+1.0 SD", "+1.5 SD"
        )
      )

      # p <- p + ggplot2::scale_fill_distiller(
      #   palette = "RdYlBu",
      #   limits = c(scale_rng[1], scale_rng[2]),
      #   breaks = c(scale_rng[1], 0, scale_rng[2]),
      #   labels = c("-1.5 SD", "Average", "+1.5 SD")
      # )
    } else {

      # add radar lines ----
      current_opts <- list(
        mapping = ggplot2::aes(x = f_id_adj, y = avg, group = .cluster, color = .cluster),
        na.rm = TRUE
      )

      current_opts <- list(
        mapping = ggplot2::aes(x = f_id_adj, y = avg, color = .cluster),
        size = 1.25,
        na.rm = TRUE
      )

      p <- p + do.call(geom_point, current_opts)

      p <- p + ggplot2::scale_color_manual(values = colour_clusters)
      p <- p + ggplot2::guides(color = ggplot2::guide_legend(title = NULL, ncol = 1))
      p <- p + ggplot2::theme(legend.position = c(0.975, 0.975))
      p <- p + ggplot2::theme(legend.justification = c(1, 1))
    }


    # add some lines between feature groups ----
    p <- p + ggplot2::geom_segment(
      data = grid_data, ggplot2::aes(x = start, xend = end, y = y, yend = y),
      color = "grey", size = 0.3, linetype = 1
    )
    # y axis text ----
    p <- p + ggplot2::annotate("text",
      x = rep(x_lim[2], 7),
      y = seq(scale_rng[1], scale_rng[2], length.out = 7),
      label = stringr::str_replace(sprintf("%+.1f", seq(scale_rng[1], scale_rng[2], length.out = 7)), "\\+0.0", "0"),
      color = "black", size = 9 / .pt, hjust = 0.5, angle = 0
    )
    # add 0 line ----
    p <- p + ggplot2::geom_segment(
      data = base_data, ggplot2::aes(
        x = start - 0.5, y = 0,
        xend = end + 0.5, yend = 0
      ),
      color = "black", size = 0.6
    )
    # add baseline ----
    p <- p + ggplot2::geom_segment(
      data = base_data, ggplot2::aes(
        x = start - 0.5, y = -1.7,
        xend = end + 0.5, yend = -1.7
      ),
      color = "black", size = 0.6
    )

    if (isTRUE(show_group_names)) {
      # add group ticks ----
      p <- p + ggplot2::geom_segment(data = base_data, ggplot2::aes(
        x = title, xend = title,
        y = -1.8, yend = -1.7
      ), color = "black")
      # add group names ----
      p <- p + ggplot2::geom_text(
        data = base_data, ggplot2::aes(
          x = title, y = scale_rng[1] - 0.15 * (scale_rng[2] - scale_rng[1]),
          label = group, hjust = hjust, vjust = vjust
        ),
        lineheight = 0.85,
        colour = "black", alpha = 0.8, size = 8 / .pt
      )
    }

    if (isTRUE(data$standard_error)) {
      # add standard error as error bar ----
      p <- p + ggplot2::geom_errorbar(ggplot2::aes(x = f_id_adj, ymin = avg - error, ymax = avg + error),
        color = "grey60", size = 0.4, width = 0.5, # alpha = 0.5,
        na.rm = TRUE
      )
      # add small horizontal line on top of error bar
      p <- p + ggplot2::geom_segment(ggplot2::aes(x = f_id_adj - 0.2, y = -sd, xend = f_id_adj + 0.2, yend = -sd),
        color = "grey60", size = 0.4, alpha = 0.5, na.rm = TRUE
      )
    }
    # add labels on top of each bar ----
    current_opts <- list(
      data = label_data,
      mapping = ggplot2::aes(
        x = f_id, y = y + 0.1,
        label = f, hjust = hjust,
        angle = angle
      ), size = 8 / .pt * c(1, 0.8)[1 + phase_present],
      alpha = 0.6, show.legend = FALSE
    )
    if (isTRUE(phase_present)) {
      current_opts$mapping <- ggplot2:::rename_aes(modifyList(
        current_opts$mapping,
        ggplot2::aes(color = effect_category)
      ))
    }

    p <- p + do.call(geom_text, current_opts)


    p <- p + ggplot2::scale_size_identity()

    if (isTRUE(phase_present)) {
      # add treatment effect arrows ----
      p <- p +
        ggplot2::geom_segment(
          data = teffect_segments, aes(
            x = f_id_adj_A - 0.35, xend = f_id_adj_E + 0.35,
            color = effect_category
          ),
          y = scale_rng[1], yend = scale_rng[1],
          size = 1
        ) +
        ggplot2::guides(color = ggplot2::guide_legend(title = "Treatment effect")) +
        ggplot2::scale_color_manual(
          values = c("darkgreen", "black", "red"), drop = FALSE,
          labels = unname(latex2exp::TeX(levels(teffect_segments$effect_category)))
        )
    }

    p
  }
)
Ashish-Soni08/ggradialbar documentation built on April 15, 2021, 4:11 a.m.