Nothing
## ----libraryChunk, load-packages, include=FALSE-------------------------------
# markdown packages
library(rgnparser)
library(magrittr)
library(knitr)
library(rmarkdown)
library(rmdformats)
library(prettydoc)
library(htmltools)
library(pkgdown)
# Load core packages
library(devtools)
library(BiocManager)
library(purrr)
library(here)
library(renv)
library(bdc)
library(CoordinateCleaner)
library(dplyr)
library(readr)
library(stringr)
library(lubridate)
library(tidyselect)
library(R.utils)
library(tidyr)
library(ggplot2)
library(forcats)
library(emld)
library(rlang)
library(xml2)
library(mgsub)
library(rvest)
library(rnaturalearth)
library(rnaturalearthdata)
library(countrycode)
library(janitor)
library(circlize)
library(paletteer)
library(cowplot)
library(igraph)
library(ggspatial)
library(sf)
library(parallel)
library(terra)
# Dont detect cores to avoid GitHbub error
old <- options() # code line i
on.exit(options(old)) # code line i+1
options(mc.cores = parallel::detectCores())
## ----secretRootPath, include=FALSE--------------------------------------------
# Set the RootPath to tempdir
RootPath <- tempdir()
if (!dir.exists(paste0(RootPath, "/Data_acquisition_workflow"))) {
dir.create(paste0(RootPath, "/Data_acquisition_workflow"), recursive = TRUE)
}
## ----global-options, include=FALSE--------------------------------------------
knitr::opts_chunk$set(error = TRUE,
eval = TRUE,
tidy = TRUE,
warning = FALSE,
root.dir = normalizePath(tempdir()))
## ----falseRootPath, eval=FALSE------------------------------------------------
# RootPath <- paste0("/your/path/here")
## ----CreateRootPath, warning=FALSE, collapse = TRUE---------------------------
# Create the working directory in the RootPath if it doesn't exist already
if (!dir.exists(paste0(RootPath, "/Data_acquisition_workflow"))) {
dir.create(paste0(RootPath, "/Data_acquisition_workflow"), recursive = TRUE)
}
# Set the working directory
setwd(paste0(RootPath,"/Data_acquisition_workflow"))
## ----activate, collapse = TRUE------------------------------------------------
renv::activate(project = paste0(RootPath,"/Data_acquisition_workflow"))
## ----installPackages, message=FALSE, warning=FALSE, results=FALSE, collapse = TRUE, eval = FALSE----
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager", repos = "http://cran.us.r-project.org")
#
# BiocManager::install("ComplexHeatmap")
## ----rnaturalearthhires, eval=FALSE-------------------------------------------
# # Install remotes if needed
# if (!require("remotes", quietly = TRUE))
# install.packages("remotes", repos = "http://cran.us.r-project.org")
# # Download and then load rnaturalearthhires
# remotes::install_github("ropensci/rnaturalearthhires")
# install.packages("rnaturalearthhires", repos = "https://ropensci.r-universe.dev", type = "source")
# library(rnaturalearthhires)
## ----installBeeBDC, results=TRUE, message=TRUE, eval = FALSE, collapse = TRUE----
# install.packages("BeeBDC")
# library(BeeBDC)
## ----snapshot, collapse = TRUE------------------------------------------------
renv::snapshot(project = paste0(RootPath,"/Data_acquisition_workflow"),
prompt = FALSE)
## ----dirMaker, collapse = TRUE, eval = FALSE----------------------------------
# BeeBDC::dirMaker(
# RootPath = RootPath,
# RDoc = "vignettes/BeeBDC_main.Rmd") %>%
# # Add paths created by this function to the environment()
# list2env(envir = parent.env(environment()))
## ----dirMakerSECRETELY, include = FALSE---------------------------------------
# For the sake of this tutorial, we will not use here::i_am in dirMaker, because we aren't allowed
# to mess with package directories in this way. This will work-around to use the tempdir()
DataPath <- paste0(RootPath, "/Data_acquisition_workflow")
OutPath_Check <- paste0(RootPath, "/Data_acquisition_workflow/Output/Check")
OutPath_Figures <- paste0(RootPath, "/Data_acquisition_workflow/Output/Figures")
OutPath_Intermediate <- paste0(RootPath, "/Data_acquisition_workflow/Output/Intermediate")
OutPath_Report <- paste0(RootPath, "/Data_acquisition_workflow/Output/Report")
# Create these files
if (!dir.exists(DataPath)) {
dir.create(DataPath, recursive = TRUE)}
if (!dir.exists(OutPath_Check)) {
dir.create(OutPath_Check, recursive = TRUE)}
if (!dir.exists(OutPath_Figures)) {
dir.create(OutPath_Figures, recursive = TRUE)}
if (!dir.exists(OutPath_Intermediate)) {
dir.create(OutPath_Intermediate, recursive = TRUE)}
if (!dir.exists(OutPath_Report)) {
dir.create(OutPath_Report, recursive = TRUE)}
## ----lapply_library, results=FALSE, collapse = TRUE---------------------------
lapply(c("ComplexHeatmap", "magrittr"),
library, character.only = TRUE)
## ----2.0, eval = FALSE--------------------------------------------------------
# # Load some package data — the taxonomy and a flagged example dataset
# # Download the full beesTaxonomy file
# taxonomyFile <- BeeBDC::beesTaxonomy()
## ----2.0secret, collapse = TRUE, eval = TRUE----------------------------------
# load in the small test dataset in the background
system.file("extdata", "testTaxonomy.rda", package="BeeBDC") |>
load()
# Rename the file
taxonomyFile <- testTaxonomy
rm(testTaxonomy)
## ----2.0ii--------------------------------------------------------------------
# Load the example beesFlagged dataset
beesFlagged <- BeeBDC::beesFlagged
selectedGenera <- taxonomyFile %>%
# Select only tribe anthophorini (for example)
dplyr::filter(tolower(tribe) == tolower("anthophorini")) %>%
distinct(genus)
# Filter the data
taxonData <- beesFlagged %>%
dplyr::filter(genus %in% selectedGenera$genus)
# View the data
taxonData
## ----3.0----------------------------------------------------------------------
# Select your study area
studyArea <- c("Canada", "United states", "Mexico", "Guatemala")
# Filter the data to that area
countryData <- beesFlagged %>%
dplyr::filter(country %in% studyArea)
# View the data
countryData
## ----4.1----------------------------------------------------------------------
filteredData <-
BeeBDC::summaryFun(data = beesFlagged,
# Choose the columns to NOT filter (or NULL to filter all columns)
dontFilterThese = c(".gridSummary", ".lonFlag", ".latFlag", ".uncer_terms",
".uncertaintyThreshold"),
# In the output, do you want to REMOVE all filtering columns (TRUE), or keep them (FALSE)
removeFilterColumns = TRUE,
# In the output, do you want to only keep clean data according to your filtering (TRUE),
# Or keep all data and simply update the .summary column (FALSE)
filterClean = TRUE)
## ----4.2----------------------------------------------------------------------
filteredData <- beesFlagged %>%
# Remove any exiting .uncertaintyThreshold column
dplyr::select(!tidyselect::any_of(".uncertaintyThreshold")) %>%
# Chose the coordinate uncertainty to filter to...
BeeBDC::coordUncerFlagR(data = .,
uncerColumn = "coordinateUncertaintyInMeters",
# 10 km here
threshold = 10000) %>%
# Now re-do the .summary column and filter the data using this new value
BeeBDC::summaryFun(
data = .,
dontFilterThese = c(".gridSummary", ".lonFlag", ".latFlag", ".uncer_terms"),
removeFilterColumns = TRUE,
filterClean = TRUE)
## ----4.2a---------------------------------------------------------------------
filteredData <- beesFlagged %>%
# Remove any exisitng .year_outOfRange column
dplyr::select(!".year_outOfRange") %>%
# Chose the minimum year to filter to...
bdc::bdc_year_outOfRange(data = .,
eventDate = "year",
year_threshold = 1970) %>%
# Now re-do the .summary column and filter the data using this new value
BeeBDC::summaryFun(
data = .,
dontFilterThese = c(".gridSummary", ".lonFlag", ".latFlag", ".uncer_terms",
".uncertaintyThreshold"),
removeFilterColumns = TRUE,
filterClean = TRUE)
## ----4.2b---------------------------------------------------------------------
filteredData <-
# The input dataset
beesFlagged %>%
# Chose the year range...
dplyr::filter(year > 1950 & year < 1970) %>%
# Now re-do the .summary column and filter the data using this new value
BeeBDC::summaryFun(
# Select the input dataset to filter
data = .,
# Choose the columns to NOT filter (or NULL to filter all columns)
dontFilterThese = c(".gridSummary", ".lonFlag", ".latFlag", ".uncer_terms",
".uncertaintyThreshold"),
# In the output, do you want to REMOVE all filtering columns (TRUE), or keep them (FALSE)
removeFilterColumns = TRUE,
# In the output, do you want to only keep clean data according to your filtering (TRUE),
# Or keep all data and simply update the .summary column (FALSE)
filterClean = TRUE)
## ----5.1, eval = FALSE--------------------------------------------------------
# if(!require("BiocManager", quietly = TRUE)){
# install.packages("BiocManager")}
# BiocManager::install("ComplexHeatmap", force = TRUE)
# renv::snapshot()
## ----5.1ii, eval = FALSE------------------------------------------------------
# duplicates <- fileFinder(path = "PATH TO A FOLDER CONTAINING THE duplicateRun_ — could be supp. materials folder",
# fileName = "duplicateRun_") %>%
# readr::read_csv() %>%
# # Select only the stingless bee data
# dplyr::filter(database_id %in% stinglessData$database_id |
# database_id_match %in% stinglessData$database_id)
## ----5.1on.exit, include = FALSE----------------------------------------------
oldpar <- par(no.readonly = TRUE)
on.exit(oldpar)
## ----5.1iii, eval = FALSE-----------------------------------------------------
# # Choose the global figure parameters
# par(mar = c(2, 2, 2, 2)/2, mfrow = c(1,1))
#
# # Create the chorDiagram. You can leave many of the below values out but we show here
# # the defaults
#
# BeeBDC::chordDiagramR(
# # The duplicate data from the dupeSummary function output
# dupeData = duplicates,
# outPath = OutPath_Figures,
# fileName = "ChordDiagram.pdf",
# # These can be modified to help fit the final pdf that's exported.
# width = 9,
# height = 7.5,
# bg = "white",
# # How few distinct dataSources should a group have to be listed as "other"
# smallGrpThreshold = 3,
# title = "Duplicated record sources",
# # The default list of colour palettes to choose from usign the paleteer package
# palettes = c("cartography::blue.pal", "cartography::green.pal",
# "cartography::sand.pal", "cartography::orange.pal", "cartography::red.pal",
# "cartography::purple.pal", "cartography::brown.pal"),
# canvas.ylim = c(-1.0,1.0),
# canvas.xlim = c(-0.6, 0.25),
# text.col = "black",
# legendX = grid::unit(6, "mm"),
# legendY = grid::unit(18, "mm"),
# legendJustify = c("left", "bottom"),
# niceFacing = TRUE)
## ----5.2----------------------------------------------------------------------
data("beesFlagged", package = "BeeBDC")
# Create a figure shoring the total number of duplicates, kept duplicates, and unique
# records for each datasource (simplified to the text before the first underscore) and
# the proportion of the above for each data source
BeeBDC::dupePlotR(
data = beesFlagged,
# The outPath to save the plot as
outPath = tempdir(),
fileName = "Fig3_duplicatePlot.pdf",
# Colours in order: duplicate, kept duplicate, unique
dupeColours = c("#F2D2A2","#B9D6BC", "#349B90"),
# Plot size and height
base_height = 7, base_width = 7,
legend.position = c(0.85, 0.8),
# Extra variables can be fed into forcats::fct_recode() to change names on plot
GBIF = "GBIF", SCAN = "SCAN", iDigBio = "iDigBio", USGS = "USGS", ALA = "ALA",
ASP = "ASP",
returnPlot = TRUE
)
## ----5.3b---------------------------------------------------------------------
# Visualise all flags for each dataSource (simplified to the text before the first underscore)
# A clever user might also realise the potential to summarise and produce outputs in other columns
BeeBDC::plotFlagSummary(
# WARNING: alternate path if wanting to produce figures for the selected taxonData (2.0 above)
# Select only the taxonData data
data = beesFlagged,
# Colours in order of pass (TRUE), fail (FALSE), and NA
flagColours = c("#127852", "#A7002D", "#BDBABB"),
fileName = paste0("FlagsPlot_Amell", Sys.Date(),".pdf"),
outPath = tempdir(),
width = 15, height = 9,
# OPTIONAL:
# # Filter to species
speciesName = "Apis mellifera Linnaeus, 1758",
# column to look in
nameColumn = "scientificName",
# Save the filtered data
saveFiltered = FALSE,
# Filter column to display on map
filterColumn = ".summary",
plotMap = TRUE,
# amount to jitter points if desired, e.g. 0.25 or NULL
jitterValue = NULL,
# Map opacity value for points between 0 and 1
mapAlpha = 1,
returnPlot = TRUE,
# Extra variables can be fed into forcats::fct_recode() to change names on plot
GBIF = "GBIF", SCAN = "SCAN", iDigBio = "iDigBio", USGS = "USGS", ALA = "ALA",
ASP = "ASP", CAES = "CAES", 'B. Mont.' = "BMont", 'B. Minkley' = "BMin", Ecd = "Ecd",
Gaiarsa = "Gai", EPEL = "EPEL"
)
## ----5.4----------------------------------------------------------------------
BeeBDC::summaryMaps(
data = beesFlagged,
width = 10, height = 10,
class_n = 3,
class_Style = "jenks",
outPath = tempdir(),
fileName = "CountryMaps_jenks.pdf",
returnPlot = TRUE
)
## ----6.0, eval = FALSE--------------------------------------------------------
# mapData %>%
# readr::write_excel_csv(paste0(DataPath, "/Output/Intermediate/", "cleanTaxon_",
# Sys.Date(), ".csv"))
## ----cleanup, include=FALSE, collapse = TRUE----------------------------------
# Remove the webpage folder
unlink(paste0(dirname(getwd()), "/inst/extdata/WebDir"), recursive = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.