Nothing
# This function was written by James B Dorey on the 29th of September 2022
# Its purpose is to visualise duplicate occurrence data by using a chord diagram
# Please contact jbdorey[at]me.com for help
#' Build a chord diagram of duplicate occurrence links
#'
#' This function outputs a figure which shows the relative size and direction of occurrence points
#' duplicated between data providers, such as, SCAN, GBIF, ALA, etc. This function requires the
#' outputs generated by [BeeBDC::dupeSummary()].
#'
#'
#' @param dupeData A tibble or data frame. The duplicate file produced by [BeeBDC::dupeSummary()].
#' @param outPath Character. The path to a directory (folder) in which the output should be saved.
#' @param fileName Character. The name of the output file, ending in '.pdf'.
#' @param width Numeric. The width of the figure to save (in inches). Default = 7.
#' @param height Numeric. The height of the figure to save (in inches). Default = 6.
#' @param bg The plot's background colour. Default = "white".
#' @param smallGrpThreshold Numeric. The upper threshold of sub-dataSources to be listed as "other".
#' Default = 3.
#' @param title A character string. The figure title. Default = "Duplicated record sources".
#' @param palettes A vector of the palettes to be used. One palette for each major dataSource and "other"
#' using the `paletteer` package. Default = c("cartography::blue.pal", "cartography::green.pal",
#' "cartography::sand.pal", "cartography::orange.pal", "cartography::red.pal",
#' "cartography::purple.pal", "cartography::brown.pal")
#' @param canvas.ylim Canvas limits from [circlize::circos.par()]. Default = c(-1.0,1.0).
#' @param canvas.xlim Canvas limits from [circlize::circos.par()]. Default = c(-0.6, 0.25).
#' @param text.col A character string. Text colour
#' @param legendX The x position of the legends, as measured in current viewport.
#' Passed to ComplexHeatmap::draw(). Default = grid::unit(6, "mm").
#' @param legendY The y position of the legends, as measured in current viewport.
#' Passed to ComplexHeatmap::draw(). Default = grid::unit(18, "mm").
#' @param legendJustify A character vector declaring the justification of the legends.
#' Passed to ComplexHeatmap::draw(). Default = c("left", "bottom").
#' @param niceFacing TRUE/FALSE. The niceFacing option automatically adjusts the text facing
#' according to their positions in the circle. Passed to [circlize::highlight.sector()].
#' @param self.link 1 or 2 (numeric). Passed to [circlize::chordDiagram()]:
#' if there is a self link in one sector, 1 means the link will be degenerated as a 'mountain' and the width corresponds to the value for this connection. 2 means the width of the starting root and the ending root all have the width that corresponds to the value for the connection.
#'
#'
#' @return Saves a figure to the provided file path.
#'
#' @importFrom circlize circos.clear circos.par chordDiagram mm_h circos.trackPlotRegion get.cell.meta.data highlight.sector circos.clear
#' @importFrom stringr str_replace str_c
#' @importFrom dplyr bind_cols full_join mutate select group_by if_else arrange n filter %>%
#' @importFrom paletteer paletteer_dynamic
#' @importFrom grid unit
#'
#'
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Create a basic example dataset of duplicates to visualise
#' basicData <- dplyr::tribble(
#' ~dataSource, ~dataSource_keep,
#' "GBIF_Halictidae", "USGS_data",
#' "GBIF_Halictidae", "USGS_data",
#' "GBIF_Halictidae", "USGS_data",
#' "GBIF_Halictidae", "USGS_data",
#' "GBIF_Halictidae", "USGS_data",
#' "GBIF_Halictidae", "USGS_data",
#' "SCAN_Halictidae", "GBIF_Halictidae",
#' "iDigBio_halictidae", "GBIF_Halictidae",
#' "iDigBio_halictidae", "SCAN_Halictidae",
#' "iDigBio_halictidae", "SCAN_Halictidae",
#' "SCAN_Halictidae", "GBIF_Halictidae",
#' "iDigBio_apidae", "SCAN_Apidae",
#' "SCAN_Apidae", "Ecd_Anthophila",
#' "iDigBio_apidae", "Ecd_Anthophila",
#' "SCAN_Apidae", "Ecd_Anthophila",
#' "iDigBio_apidae", "Ecd_Anthophila",
#' "SCAN_Megachilidae", "SCAN_Megachilidae",
#' "CAES_Anthophila", "CAES_Anthophila",
#' "CAES_Anthophila", "CAES_Anthophila"
#' )
#'
#'
#' chordDiagramR(
# # The duplicate data from the dupeSummary function output
#' dupeData = basicData,
#' outPath = tempdir(),
#' fileName = "ChordDiagram.pdf",
#' # These can be modified to help fit the final pdf that's exported.
#' width = 9,
#' height = 7.5,
#' bg = "white",
#' # How few distinct dataSources should a group have to be listed as "other"
#' smallGrpThreshold = 3,
#' title = "Duplicated record sources",
#' # The default list of colour palettes to choose from using the paleteer package
#' palettes = c("cartography::blue.pal", "cartography::green.pal",
#' "cartography::sand.pal", "cartography::orange.pal", "cartography::red.pal",
#' "cartography::purple.pal", "cartography::brown.pal"),
#' canvas.ylim = c(-1.0,1.0),
#' canvas.xlim = c(-0.6, 0.25),
#' text.col = "black",
#' legendX = grid::unit(6, "mm"),
#' legendY = grid::unit(18, "mm"),
#' legendJustify = c("left", "bottom"),
#' niceFacing = TRUE)}
chordDiagramR <- function(
# The duplicate data from the dupeSummary function output
dupeData = NULL,
outPath = NULL,
fileName = NULL,
width = 7,
height = 6,
bg = "white",
# How few distinct dataSources should a group have to be listed as "other"
smallGrpThreshold = 3,
title = "Duplicated record sources",
# The default list of colour palettes to choose from
palettes = c("cartography::blue.pal", "cartography::green.pal",
"cartography::sand.pal", "cartography::orange.pal", "cartography::red.pal",
"cartography::purple.pal", "cartography::brown.pal"),
canvas.ylim = c(-1.0,1.0),
canvas.xlim = c(-0.6, 0.25),
text.col = "black",
legendX = grid::unit(6, "mm"),
legendY = grid::unit(18, "mm"),
legendJustify = c("left", "bottom"),
niceFacing = TRUE,
self.link = 2){
# locally bind variabls to the function
Frequency <- Frequency_dupe <- sourceName <- . <- sourceCategories <- groupCount <- cur_group_id <-
groupNumber <- groupPalette <- groupColours <- par <- NULL
error_func_BCM <- CHtest <- error_func_CH <- input <- instructions <- NULL
requireNamespace("circlize")
requireNamespace("dplyr")
requireNamespace("paletteer")
requireNamespace("grid")
#### 0.0 Prep ####
##### 0.1 errors ####
###### a. FATAL errors ####
if(is.null(dupeData)){
stop(" - Please provide an argument for dupeData. I'm a program not a magician.")
}
if(is.null(outPath)){
stop(" - Please provide an argument for outPath Seems reckless to let me just guess.")
}
if(is.null(fileName)){
stop(" - Please provide an argument for fileName Seems reckless to let me just guess.")
}
if(nrow(dupeData) == 0){
stop(" - There are no duplicates in the dupeData object. Stopping process.")
}
##### 0.2 maintain par ####
# Make sure to maintain prior par on exit from the function
oldpar <- par(no.readonly = TRUE)
on.exit(oldpar)
##### 0.3 BcM + ComplexHeatmap ####
###### a. test ####
# Check if BiocManager is installed
# TRUE if BiocManager is found
suppressWarnings(
BcMtest <- system.file(package='BiocManager') %>%
stringr::str_count() > 0
)
# Check if ComplexHeatmap is installed
# TRUE if ComplexHeatmap is found
suppressWarnings(
CHtest <- system.file(package='ComplexHeatmap') %>%
stringr::str_count() > 0
)
###### b. BiocManager ####
if(CHtest == FALSE){
if(BcMtest == FALSE){
# Set up instructions for download on fail
instructions <- paste(" Please try installing the package for yourself",
"using the following command: \n",
" install.packages(\"BiocManager\")")
# Set up fail function for tryCatch
error_func_BCM <- function(e){
stop(paste("Failed to install the BiocManager package.\n",
instructions))
}
# Begin interactive input
input <- 1
if (interactive()){
input <- utils::menu(c("Yes", "No"),
title = paste0("Install the BiocManager package? \n",
"NOTE: if you need to install BiocManager, you may need to restart R",
" before installing ComplexHeatmap."))
}
if(input == 1){
# Check for BiocManager
if( suppressWarnings(system.file(package='BiocManager')) %>% stringr::str_count() == 0){
message("Installing the BiocManager package.")
tryCatch(
utils::install.packages("BiocManager"),
error = error_func_BCM, warning = error_func_BCM)
}# END BiocManager check
else{
stop(writeLines(paste("The ComplexHeatmap package is necessary for BeeBDC::chordDiagramR.\n",
instructions)))
} # END else
} # END input == 1
}# END BcMtest == FALSE
} # END CHtest == FALSE
###### c. ComplexHeatmap ####
if(CHtest == FALSE){
# Set up instructions for download on fail
instructions <- paste(" Please try installing the package for yourself",
"using the following command: \n",
"BiocManager::install(\"ComplexHeatmap\")")
# Set up fail function for tryCatch
error_func_CH <- function(e){
stop(paste("Failed to install the ComplexHeatmap package.\n",
instructions))
}
# Begin interactive input
input <- 1
if (interactive()){
input <- utils::menu(c("Yes", "No"),
title = paste0("Install the ComplexHeatmap package? \n"))
}
if(input == 1){
# Start ComplexHeatmap install
message("Installing the ComplexHeatmap package.")
tryCatch(
BiocManager::install("ComplexHeatmap"),
error = error_func_CH, warning = error_func_CH)
} # END input == 1
else{
stop(writeLines(paste("The ComplexHeatmap package is necessary for BeeBDC::chordDiagramR.\n",
instructions)))
} # END else
} # END CHtest == FALSE
#### 1.0 Data prep ####
# Create a table to go into chord diagram
suppressMessages(
chordData <- table(dplyr::bind_cols(dupeData$dataSource, dupeData$dataSource_keep)),
classes = "message")
# Create tables of the counts of kept source and duplicate source
keptSource <- table(dupeData$dataSource) %>%
as.data.frame() %>% dplyr::tibble() %>%
stats::setNames(c("sourceName", "Frequency"))
dupeSource <- table(dupeData$dataSource_keep) %>%
as.data.frame() %>% dplyr::tibble() %>%
stats::setNames(c("sourceName", "Frequency_dupe"))
# Merge the sources and get their sum (for a total frequency count to order by)
colourTable <- dplyr::full_join(keptSource, dupeSource, by = "sourceName") %>%
dplyr::mutate(Frequency = (Frequency + Frequency_dupe)) %>%
# Drop the Frequency_dupe column
dplyr::select(!Frequency_dupe) %>%
# Get broad source (before first underscore)
dplyr::mutate( sourceCategories = (sourceName %>%
stringr::str_replace(
string = .,
pattern = "_.*",
replacement = ""
))) %>%
dplyr::group_by(sourceCategories) %>%
dplyr::mutate( # Count group number
groupCount = dplyr::n(),
# Combine small groups (< smallGrpThreshold)
sourceCategories = dplyr::if_else(
groupCount < smallGrpThreshold, "Other", sourceCategories)) %>%
dplyr::arrange(sourceName, .by_group = TRUE) %>%
# Re-group
dplyr::group_by(sourceCategories) %>%
dplyr::mutate(groupNumber = dplyr::cur_group_id(),
# Re-count
groupCount = dplyr::n(),
groupPalette = palettes[groupNumber]) %>%
# assign colours
dplyr::mutate(groupColours =
paletteer::paletteer_dynamic(
palette = groupPalette[[1]],
n = groupCount[[1]]) %>% list(),
colour = unlist(groupColours)[dplyr::row_number()])
#### 2.0 Build plot ####
circlize::circos.clear()
circlize::circos.par(canvas.ylim = canvas.ylim, canvas.xlim = canvas.xlim)
# Create the chord diagrame
circlize::chordDiagram(
x = chordData,
order = colourTable$sourceName,
directional = 1,
direction.type = c("arrows"),
link.arr.type = "big.arrow",
reduce = 0,
# self links fold directly back onto themselves instead of going to far side
self.link = self.link,
grid.col = colourTable$colour,
keep.diagonal = TRUE,
# name, grid, axis
annotationTrack = c("grid"),
preAllocateTracks = list(
track.height = circlize::mm_h(4),
track.margin = c(circlize::mm_h(1), 0)),
scale = FALSE
)
circlize::circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = circlize::get.cell.meta.data("xlim")
ylim = circlize::get.cell.meta.data("ylim")
sector.name = circlize::get.cell.meta.data("sector.index")
}, bg.border = NA)
# Highlight inputs
for(i in 1:length(unique(colourTable$sourceCategories))){
loopCat <- colourTable %>%
dplyr::filter(sourceCategories == unique(colourTable$sourceCategories)[i])
circlize::highlight.sector(stringr::str_c(loopCat$sourceName),
track.index = 1, col = loopCat$colour[[1]],
text = unique(loopCat$sourceCategories), cex = 0.8,
text.col = text.col, niceFacing = niceFacing)
}
legendList <- c()
# Make legends by creating a list of legends for each sourceCategory
for(i in 1:length(unique(colourTable$sourceCategories))){
loopCat <- colourTable %>%
dplyr::filter(sourceCategories == unique(colourTable$sourceCategories)[i])
legendList[[i]] <- ComplexHeatmap::Legend(labels = stringr::str_c(loopCat$sourceName),
title = unique(stringr::str_c(loopCat$sourceCategories)),
legend_gp = grid::gpar(fill = c(loopCat$colour)))
} # END legend loop
lgd_list <- ComplexHeatmap::packLegend(list = legendList)
ComplexHeatmap::draw(lgd_list, x = legendX,
y = legendY, just = legendJustify)
circlize::circos.clear()
title(title)
grDevices::dev.copy2pdf(file = paste(outPath, "/", fileName, sep = ""),
height = height, width = width, bg = bg)
#dev.off()
} # END function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.