R/upset_plot.R

Defines functions separate_longer_delim print.upsetPlotData process_upset_data theme_upset_left theme_upset_top theme_upset_main show_numbers_x show_numbers_y upsetplot_left upsetplot_top upsetplot_main plot_upset

Documented in plot_upset print.upsetPlotData process_upset_data separate_longer_delim

## Aims
#
# 1. Reproduce upsetplot
# 2. Expand the function of upsetplot
#   2.1 Allow non-unique intersections
#   2.2 More applicable subplot types (boxplot, points), useful to show subsets properties


## Structures
#
# 1. plot and subplots
# 2. themes
# 3. subsets processing [see regions.R]



## (PART) Plot

#' Plot a upset plot
#'
#' This function generate a upset plot by creating a composite plot which contains subplots generated by ggplot2.
#'
#' @param venn a class Venn object
#' @param nintersects number of intersects. If NULL, all intersections will show.
#' @param order.intersect.by 'size', 'name', or "none"
#' @param order.set.by 'size', 'name', or "none"
#' @param relative_height the relative height of top panel in upset plot
#' @param relative_width the relative width of left panel in upset plot
#' @param top.bar.color default is "grey30"
#' @param top.bar.y.label default is NULL
#' @param top.bar.show.numbers default is TRUE
#' @param top.bar.numbers.size text size of numbers
#' @param sets.bar.color default is "grey30"
#' @param sets.bar.show.numbers default is FALSE
#' @param sets.bar.x.label default is "Set Size"
#' @param intersection.matrix.color default is "grey30"
#' @param specific whether only include specific items in subsets, default is TRUE.
#' @param ... useless
#' @return an upset plot
#'
#' @export
#' @name upset-plot
#' @examples
#'  list = list(A = sample(LETTERS, 20),
#'              B = sample(LETTERS, 22),
#'              C = sample(LETTERS, 14),
#'              D = sample(LETTERS, 30, replace = TRUE))
#'  venn = Venn(list)
#'  plot_upset(venn)
#'  plot_upset(venn, order.intersect.by = "name")
#'  plot_upset(venn, nintersects = 6)
plot_upset = function(venn,
                      nintersects = NULL,
                      order.intersect.by = c("size","name","none"),
                      order.set.by = c("size","name","none"),
                      relative_height = 3,
                      relative_width = 0.3,
                      top.bar.color = "grey30",
                      top.bar.y.label = NULL,
                      top.bar.show.numbers = TRUE,
                      top.bar.numbers.size = 3,
                      sets.bar.color = "grey30",
                      sets.bar.show.numbers = FALSE,
                      sets.bar.x.label = "Set Size",
                      intersection.matrix.color = "grey30",
                      specific = TRUE,
                      ...){
  # process arguments
  order.intersect.by = match.arg(order.intersect.by)
  order.set.by = match.arg(order.set.by)

  # subplot main
  data = process_upset_data(venn,
                           nintersects = nintersects,
                           order.intersect.by = order.intersect.by,
                           order.set.by = order.set.by,
                           specific = specific)
  p_main = upsetplot_main(data$main_data,
                          intersection.matrix.color = intersection.matrix.color)

  # subplot top
  p_top = upsetplot_top(data$top_data,
                        top.bar.color = top.bar.color,
                        top.bar.y.label = top.bar.y.label,
                        top.bar.show.numbers = top.bar.show.numbers,
                        top.bar.numbers.size = top.bar.numbers.size)

  # subplot left
  p_left = upsetplot_left(data$left_data,
                          sets.bar.color = sets.bar.color,
                          sets.bar.x.label = sets.bar.x.label,
                          sets.bar.show.numbers = sets.bar.show.numbers)

  # combine into a plot
  pp = aplot::insert_top(p_main, p_top, height = relative_height) |>
    aplot::insert_left(p_left, width = relative_width)
  class(pp) = c("upset_plot", class(pp))

  return(pp)
}

upsetplot_main = function(data, ...){
  param = list(...)
  ggplot2::ggplot(data, aes(.data$id, .data$set)) +
    ggplot2::geom_point(size = 4, color = param$intersection.matrix.color, na.rm = FALSE) +
    ggplot2::geom_path(aes(group = .data$id), linewidth = 1.5, color = param$intersection.matrix.color, na.rm = FALSE) +
    ggplot2::labs(x = "Set Intersection", y = NULL) +
    theme_upset_main()
}

upsetplot_top = function(data, ...){
  param = list(...)
  p = ggplot2::ggplot(data, aes(.data$id, .data$size)) +
    ggplot2::geom_col(fill = param$top.bar.color) +
    ggplot2::labs(x = NULL, y = param$top.bar.y.label) +
    scale_y_continuous(expand = ggplot2::expansion(mult = c(0.01, 0.05))) +
    theme_upset_top()
  if (param$top.bar.show.numbers) {
    p = p + ggplot2::geom_text(aes(label = .data$size,
                               y = .data$size + diff(range(.data$size)) * 0.03),
                               size = param$top.bar.numbers.size)
  }
  return(p)
}

upsetplot_left = function(data, ...){
  param = list(...)
  p = ggplot2::ggplot(data, aes(x = .data$size, y = .data$set)) +
    ggplot2::geom_col(orientation = "y", fill = param$sets.bar.color) +
    ggplot2::scale_y_discrete(position = "right") +
    ggplot2::scale_x_reverse() +
    ggplot2::labs(x = param$sets.bar.x.label, y = NULL) +
    theme_upset_left()
  if (param$sets.bar.show.numbers) p = show_numbers_x(p, value = "size")
  return(p)
}

show_numbers_y = function(p, value){

}

show_numbers_x = function(p, value){
  p + ggplot2::geom_text(aes(label = .data[[value]]),
                         vjust = 0.5)
}

## (PART) Theme

theme_upset_main = function(){
  ggplot2::theme_bw() +
    ggplot2::theme(
      axis.title.y = element_blank(),
      axis.ticks.x.bottom = element_blank(),
      axis.text.x.bottom = element_blank(),
      # panel.border = element_blank(),
      plot.margin = margin(t = -20)
    )
}

theme_upset_top = function(){
  ggplot2::theme_bw() +
    ggplot2::theme(
      axis.ticks.x.bottom = element_blank(),
      axis.text.x.bottom = element_blank(),
      # panel.border = element_blank(),
      plot.margin = margin(b = -20, unit = "pt")
    )
}

theme_upset_left = function(){
  ggplot2::theme_bw() +
    ggplot2::theme(
      axis.ticks.y = ggplot2::element_blank(),
      axis.title.y = ggplot2::element_blank(),
      axis.text.y = ggplot2::element_blank(),
      panel.border = ggplot2::element_blank(),
      panel.grid.major = ggplot2::element_blank(),
      plot.margin = margin(r = -60)
    )
}


## (PART) retrieve tidy data from primary subset datasets


#' process upset data
#'
#' @inheritParams upset-plot
#' @param specific whether return ONLY specific items for a subset, default is TRUE
#' @details
#'  ggVennDiagram, by default, only return the specific subsets of a region.
#'  However, sometimes, we want to show all the overlapping items for two or more sets.
#'  For example: https://github.com/gaospecial/ggVennDiagram/issues/64
#'  Therefore, we add a 'specific' switch to this function. While 'specific = FALSE',
#'  the seperator will be changed from "/" to "~", and all the overlapping items
#'  will be returned. This feature is useful in plotting upset plot.
#'
#' @return a upsetPlotData object
process_upset_data = function(venn,
                              nintersects = 30,
                              order.intersect.by = "size",
                              order.set.by = "name",
                              specific = TRUE){
  set_name = venn@names
  name_separator = ifelse(specific, "/", "~")

  # region data
  data = process_region_data(venn, sep = name_separator, specific = specific)
  data$size = data$count

  # top data
  top_data = data |> dplyr::select(c('id', 'name', 'item', 'size'))
  if (order.intersect.by %in% colnames(top_data)) {
    top_data = dplyr::mutate(top_data,
                             id = forcats::fct_reorder(.data$id, .data[[order.intersect.by]], .desc = TRUE))
  } else {
    top_data$id = forcats::as_factor(top_data$id)
  }

  # left data
  left_data = dplyr::tibble(set = set_name,
                            name = set_name,
                            size = lengths(venn@sets))
  if (order.set.by %in% colnames(left_data)) {
    left_data = dplyr::mutate(left_data,
                              set = forcats::fct_reorder(.data$set, .data[[order.set.by]], .desc = TRUE))
  } else {
    left_data$set = forcats::as_factor(left_data$set) |> forcats::fct_rev()
  }

  # main data
  main_data = data |> dplyr::select(c("id", "name", "size"))
  main_data$set_id = main_data$id
  main_data = separate_longer_delim(main_data, "set_id", delim = name_separator)
  main_data$set = factor(set_name[as.integer(main_data$set_id)],
                         levels = levels(left_data$set))
  main_data$id = factor(main_data$id, levels = levels(top_data$id))

  # filter intersections
  if (is.numeric(nintersects)) {
    keep_id = utils::head(levels(top_data$id), nintersects)
    main_data = main_data |> dplyr::filter(.data$id %in% keep_id)
    top_data = top_data |> dplyr::filter(.data$id %in% keep_id)
  }

  # return result as a list
  ret = list(top_data = top_data,
             left_data = left_data,
             main_data = main_data)
  class(ret) = 'upsetPlotData'
  return(ret)
}


#' S3 method for `upsetPlotData`
#'
#' @param x a upsetPlotData object
#' @param ... useless
#'
#' @method print upsetPlotData
#' @docType methods
#' @name print
#' @rdname print
#' @md
#' @export
print.upsetPlotData = function(x, ...){
  cat(sprintf("Class upsetPlotData.\n"))
  cat(sprintf("  Type: list; No. slots: %d; Slot names: %s.\n", length(x), paste(names(x), collapse = ", ")))
  cat(sprintf("  To view the data interactively, use 'obj[[\"slotName\"]]'.\n"))
}




#' Implement of `tidyr::separate_longer_delim`
#'
#' @param df a data.frame
#' @param col column
#' @param delim delimeter
#'
#' @return a data.frame
#' @md
separate_longer_delim <- function(df, col, delim) {
  # 将要拆分的列按照分隔符拆分成字符向量
  split_values <- strsplit(df[[col]], delim)

  # 扩展数据框
  result <- df[rep(seq_len(nrow(df)), times = lengths(split_values)), ]

  # 将拆分后的值填充到新的列中
  result[[col]] <- unlist(split_values)

  return(result)
}

Try the ggVennDiagram package in your browser

Any scripts or data that you put into this service are public.

ggVennDiagram documentation built on May 29, 2024, 10:21 a.m.