R/chordDiagramR.R

Defines functions chordDiagramR

Documented in chordDiagramR

# This function was written by James B Dorey on the 29th of September 2022
# Its purpose is to visualise duplicate occurrence data by using a chord diagram
# Please contact jbdorey[at]me.com for help

#' Build a chord diagram of duplicate occurrence links
#' 
#' This function outputs a figure which shows the relative size and direction of occurrence points 
#' duplicated between data providers, such as, SCAN, GBIF, ALA, etc. This function requires the 
#' outputs generated by [BeeBDC::dupeSummary()].
#' 
#'
#' @param dupeData A tibble or data frame. The duplicate file produced by [BeeBDC::dupeSummary()].
#' @param outPath Character. The path to a directory (folder) in which the output should be saved.
#' @param fileName Character. The name of the output file, ending in '.pdf'.
#' @param width Numeric. The width of the figure to save (in inches). Default = 7.
#' @param height Numeric. The height of the figure to save (in inches). Default = 6.
#' @param bg The plot's background colour. Default = "white".
#' @param smallGrpThreshold Numeric. The upper threshold of sub-dataSources to be listed as "other".
#' Default = 3.
#' @param title A character string. The figure title. Default = "Duplicated record sources".
#' @param palettes A vector of the palettes to be used. One palette for each major dataSource and "other"
#' using the `paletteer` package. Default = c("cartography::blue.pal", "cartography::green.pal", 
#' "cartography::sand.pal", "cartography::orange.pal", "cartography::red.pal",
#' "cartography::purple.pal", "cartography::brown.pal")
#' @param canvas.ylim Canvas limits from [circlize::circos.par()]. Default = c(-1.0,1.0).
#' @param canvas.xlim Canvas limits from [circlize::circos.par()]. Default = c(-0.6, 0.25).
#' @param text.col A character string. Text colour
#' @param legendX The x position of the legends, as measured in current viewport.
#'  Passed to ComplexHeatmap::draw(). Default = grid::unit(6, "mm").
#' @param legendY The y position of the legends, as measured in current viewport.
#'  Passed to ComplexHeatmap::draw(). Default = grid::unit(18, "mm").
#' @param legendJustify A character vector declaring the justification of the legends. 
#' Passed to ComplexHeatmap::draw(). Default = c("left", "bottom").
#' @param niceFacing TRUE/FALSE. The niceFacing option automatically adjusts the text facing 
#' according to their positions in the circle. Passed to [circlize::highlight.sector()].
#' @param self.link 1 or 2 (numeric). Passed to [circlize::chordDiagram()]:	
#' if there is a self link in one sector, 1 means the link will be degenerated as a 'mountain' and the width corresponds to the value for this connection. 2 means the width of the starting root and the ending root all have the width that corresponds to the value for the connection.
#'
#'
#' @return Saves a figure to the provided file path.
#' 
#' @importFrom circlize circos.clear circos.par chordDiagram mm_h circos.trackPlotRegion get.cell.meta.data highlight.sector circos.clear
#' @importFrom stringr str_replace str_c
#' @importFrom dplyr bind_cols full_join mutate select group_by if_else arrange n filter %>%
#' @importFrom paletteer paletteer_dynamic
#' @importFrom grid unit
#' 
#' 
#' 
#' @export
#'
#' @examples
#' \dontrun{
#'   # Create a basic example dataset of duplicates to visualise
#' basicData <- dplyr::tribble(
#'                             ~dataSource,    ~dataSource_keep,
#'                       "GBIF_Halictidae",         "USGS_data",
#'                       "GBIF_Halictidae",         "USGS_data",
#'                       "GBIF_Halictidae",         "USGS_data",
#'                       "GBIF_Halictidae",         "USGS_data",
#'                       "GBIF_Halictidae",         "USGS_data",
#'                       "GBIF_Halictidae",         "USGS_data",
#'                       "SCAN_Halictidae",   "GBIF_Halictidae",
#'                    "iDigBio_halictidae",   "GBIF_Halictidae",
#'                    "iDigBio_halictidae",   "SCAN_Halictidae",
#'                    "iDigBio_halictidae",   "SCAN_Halictidae",
#'                       "SCAN_Halictidae",   "GBIF_Halictidae",
#'                        "iDigBio_apidae",       "SCAN_Apidae",
#'                           "SCAN_Apidae",    "Ecd_Anthophila",
#'                        "iDigBio_apidae",    "Ecd_Anthophila",
#'                           "SCAN_Apidae",    "Ecd_Anthophila",
#'                        "iDigBio_apidae",    "Ecd_Anthophila",
#'                     "SCAN_Megachilidae", "SCAN_Megachilidae",
#'                       "CAES_Anthophila",   "CAES_Anthophila",
#'                       "CAES_Anthophila",   "CAES_Anthophila"
#'  )
#'
#' 
#'  chordDiagramR(
# # The duplicate data from the dupeSummary function output  
#' dupeData = basicData,
#' outPath = tempdir(),
#' fileName = "ChordDiagram.pdf",
#' # These can be modified to help fit the final pdf that's exported.
#' width = 9,
#' height = 7.5,
#' bg = "white",
#' # How few distinct dataSources should a group have to be listed as "other"
#' smallGrpThreshold = 3,
#' title = "Duplicated record sources",
#' # The default list of colour palettes to choose from using the paleteer package
#' palettes = c("cartography::blue.pal", "cartography::green.pal", 
#'              "cartography::sand.pal", "cartography::orange.pal", "cartography::red.pal",
#'              "cartography::purple.pal", "cartography::brown.pal"),
#' canvas.ylim = c(-1.0,1.0), 
#' canvas.xlim = c(-0.6, 0.25),
#' text.col = "black",
#' legendX = grid::unit(6, "mm"),
#' legendY = grid::unit(18, "mm"),
#' legendJustify = c("left", "bottom"),
#' niceFacing = TRUE)}
chordDiagramR <- function(
    # The duplicate data from the dupeSummary function output  
  dupeData = NULL,
  outPath = NULL,
  fileName = NULL,
  width = 7,
  height = 6,
  bg = "white",
  # How few distinct dataSources should a group have to be listed as "other"
  smallGrpThreshold = 3,
  title = "Duplicated record sources",
  # The default list of colour palettes to choose from 
  palettes = c("cartography::blue.pal", "cartography::green.pal", 
               "cartography::sand.pal", "cartography::orange.pal", "cartography::red.pal",
               "cartography::purple.pal", "cartography::brown.pal"),
  canvas.ylim = c(-1.0,1.0), 
  canvas.xlim = c(-0.6, 0.25),
  text.col = "black",
  legendX = grid::unit(6, "mm"),
  legendY = grid::unit(18, "mm"),
  legendJustify = c("left", "bottom"),
  niceFacing = TRUE,
  self.link = 2){
  
  # locally bind variabls to the function
  Frequency <- Frequency_dupe <- sourceName <- . <- sourceCategories <- groupCount <- cur_group_id <-
    groupNumber <- groupPalette <- groupColours <- par <- NULL
  error_func_BCM <- CHtest <- error_func_CH <- input <- instructions <- NULL
  
    requireNamespace("circlize")
  requireNamespace("dplyr")
  requireNamespace("paletteer")
  requireNamespace("grid")
  
  #### 0.0 Prep ####
  ##### 0.1 errors ####
  ###### a. FATAL errors ####
  if(is.null(dupeData)){
    stop(" - Please provide an argument for dupeData. I'm a program not a magician.")
  }
  if(is.null(outPath)){
    stop(" - Please provide an argument for outPath Seems reckless to let me just guess.")
  }
  if(is.null(fileName)){
    stop(" - Please provide an argument for fileName Seems reckless to let me just guess.")
  }
  if(nrow(dupeData) == 0){
    stop(" - There are no duplicates in the dupeData object. Stopping process.")
  }
  
    ##### 0.2 maintain par ####
  # Make sure to maintain prior par on exit from the function
  oldpar <- par(no.readonly = TRUE) 
  on.exit(oldpar)
  
  ##### 0.3 BcM + ComplexHeatmap ####
    ###### a. test ####
  # Check if BiocManager is installed
  # TRUE if BiocManager is found
  suppressWarnings(
    BcMtest <- system.file(package='BiocManager') %>% 
      stringr::str_count() > 0 
  )
    # Check if ComplexHeatmap is installed
      # TRUE if ComplexHeatmap is found
  suppressWarnings(
  CHtest <- system.file(package='ComplexHeatmap') %>% 
    stringr::str_count() > 0 
  )

    ###### b. BiocManager ####
  if(CHtest == FALSE){
    if(BcMtest == FALSE){
    # Set up instructions for download on fail
    instructions <- paste(" Please try installing the package for yourself", 
                          "using the following command: \n", 
                          " install.packages(\"BiocManager\")")
    # Set up fail function for tryCatch
    error_func_BCM <- function(e){
      stop(paste("Failed to install the BiocManager package.\n", 
                 instructions))
    }
    # Begin interactive input
    input <- 1
    if (interactive()){
      input <- utils::menu(c("Yes", "No"), 
                           title = paste0("Install the BiocManager package? \n",
                            "NOTE: if you need to install BiocManager, you may need to restart R",
                            " before installing ComplexHeatmap."))
    }
    if(input == 1){
      # Check for BiocManager
      if( suppressWarnings(system.file(package='BiocManager')) %>% stringr::str_count() == 0){
        message("Installing the BiocManager package.")
        tryCatch(
          utils::install.packages("BiocManager"), 
          error = error_func_BCM, warning = error_func_BCM)
      }# END BiocManager check
    
    else{
      stop(writeLines(paste("The ComplexHeatmap package is necessary for BeeBDC::chordDiagramR.\n", 
                            instructions)))
    } # END else
    } # END input == 1
    }# END BcMtest == FALSE
    } # END CHtest == FALSE
  
  
  
    ###### c. ComplexHeatmap ####
  if(CHtest == FALSE){
      # Set up instructions for download on fail
    instructions <- paste(" Please try installing the package for yourself", 
                          "using the following command: \n",
                          "BiocManager::install(\"ComplexHeatmap\")")
      # Set up fail function for tryCatch
    error_func_CH <- function(e){
      stop(paste("Failed to install the ComplexHeatmap package.\n", 
                 instructions))
    }
      # Begin interactive input
    input <- 1
    if (interactive()){
      input <- utils::menu(c("Yes", "No"), 
                        title = paste0("Install the ComplexHeatmap package? \n"))
    }
    if(input == 1){
        # Start ComplexHeatmap install
      message("Installing the ComplexHeatmap package.")
      tryCatch(
        BiocManager::install("ComplexHeatmap"), 
        error = error_func_CH, warning = error_func_CH)
      } # END input == 1
      
    else{
      stop(writeLines(paste("The ComplexHeatmap package is necessary for BeeBDC::chordDiagramR.\n", 
                 instructions)))
    } # END else
  } # END CHtest == FALSE
  

    #### 1.0 Data prep ####
# Create a table to go into chord diagram
  suppressMessages(
chordData <- table(dplyr::bind_cols(dupeData$dataSource, dupeData$dataSource_keep)),
classes = "message")


  # Create tables of the counts of kept source and duplicate source
keptSource <- table(dupeData$dataSource) %>%
  as.data.frame() %>% dplyr::tibble() %>% 
  stats::setNames(c("sourceName", "Frequency")) 
dupeSource <- table(dupeData$dataSource_keep) %>%
  as.data.frame() %>% dplyr::tibble() %>% 
  stats::setNames(c("sourceName", "Frequency_dupe")) 
  # Merge the sources and get their sum (for a total frequency count to order by)
colourTable <- dplyr::full_join(keptSource, dupeSource, by = "sourceName") %>%
  dplyr::mutate(Frequency = (Frequency + Frequency_dupe)) %>%
    # Drop the Frequency_dupe column
  dplyr::select(!Frequency_dupe) %>%
  # Get broad source (before first underscore)
  dplyr::mutate( sourceCategories = (sourceName %>%
                                       stringr::str_replace(
                                         string = .,
                                         pattern = "_.*",
                                         replacement = ""
                                       ))) %>%
  dplyr::group_by(sourceCategories) %>%
  dplyr::mutate(  # Count group number
    groupCount = dplyr::n(),
    # Combine small groups (< smallGrpThreshold)
    sourceCategories = dplyr::if_else(
      groupCount < smallGrpThreshold, "Other", sourceCategories)) %>%
  dplyr::arrange(sourceName, .by_group = TRUE) %>%
  # Re-group
  dplyr::group_by(sourceCategories) %>%
  dplyr::mutate(groupNumber = dplyr::cur_group_id(),
                # Re-count
                groupCount = dplyr::n(),
                groupPalette = palettes[groupNumber]) %>%
  # assign colours
  dplyr::mutate(groupColours = 
                  paletteer::paletteer_dynamic(
                    palette = groupPalette[[1]],
                    n = groupCount[[1]]) %>% list(),
                colour = unlist(groupColours)[dplyr::row_number()])


  #### 2.0 Build plot ####
circlize::circos.clear()
  circlize::circos.par(canvas.ylim = canvas.ylim, canvas.xlim = canvas.xlim)
  
# Create the chord diagrame
circlize::chordDiagram(
  x = chordData,
  order = colourTable$sourceName,
  directional = 1,
  direction.type = c("arrows"),
  link.arr.type = "big.arrow",
  reduce = 0,
    # self links fold directly back onto themselves instead of going to far side
  self.link = self.link,
  grid.col = colourTable$colour,
  keep.diagonal = TRUE,
  # name, grid, axis
  annotationTrack = c("grid"),
  preAllocateTracks = list(
    track.height = circlize::mm_h(4),
    track.margin = c(circlize::mm_h(1), 0)),
  scale = FALSE
)
circlize::circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
  xlim = circlize::get.cell.meta.data("xlim")
  ylim = circlize::get.cell.meta.data("ylim")
  sector.name = circlize::get.cell.meta.data("sector.index")
}, bg.border = NA)
# Highlight inputs
for(i in 1:length(unique(colourTable$sourceCategories))){
  loopCat <- colourTable %>%
    dplyr::filter(sourceCategories == unique(colourTable$sourceCategories)[i])
  circlize::highlight.sector(stringr::str_c(loopCat$sourceName), 
                             track.index = 1, col = loopCat$colour[[1]], 
                             text = unique(loopCat$sourceCategories), cex = 0.8, 
                             text.col = text.col, niceFacing = niceFacing)
}
legendList <- c()
# Make legends by creating a list of legends for each sourceCategory
for(i in 1:length(unique(colourTable$sourceCategories))){
  loopCat <- colourTable %>%
    dplyr::filter(sourceCategories == unique(colourTable$sourceCategories)[i])
  legendList[[i]] <- ComplexHeatmap::Legend(labels = stringr::str_c(loopCat$sourceName), 
                                            title = unique(stringr::str_c(loopCat$sourceCategories)), 
                                            legend_gp = grid::gpar(fill = c(loopCat$colour)))
} # END legend loop

lgd_list <- ComplexHeatmap::packLegend(list = legendList)
ComplexHeatmap::draw(lgd_list, x = legendX, 
                     y = legendY, just = legendJustify)

circlize::circos.clear()

title(title)

grDevices::dev.copy2pdf(file = paste(outPath, "/", fileName, sep = ""),
                        height = height, width = width, bg = bg)

#dev.off()
} # END function

Try the BeeBDC package in your browser

Any scripts or data that you put into this service are public.

BeeBDC documentation built on Nov. 4, 2024, 9:06 a.m.