Nothing
# This function was written by James Dorey to load and output updated summary information from flagging
# This function was written from the 9th of June 2022. For questions, please email James
# at jbdorey[at]me.com
#' Loads, appends, and saves occurrence flag data
#'
#' This function is used to save the flag data for your occurrence data as you run the BeeBDC script.
#' It will read and append existing files, if asked to. Your flags should also be saved in the occurrence
#' file itself automatically.
#'
#' @param data A data frame or tibble. Occurrence records as input.
#' @param outPath A character path. Where the file should be saved.
#' @param fileName Character. The name of the file to be saved
#' @param idColumns A character vector. The names of the columns that are to be kept along with the
#' flag columns. These columns should be useful for identifying unique records with flags.
#' Default = c("database_id", "id", "catalogNumber", "occurrenceID", "dataSource").
#' @param append Logical. If TRUE, this will find and append an existing file generated by this function.
#' @param printSummary Logical. If TRUE, print a [summary()] of all filter columns - i.e. those which
#' tidyselect::starts_with(".")
#'
#' @return Saves a file with id and flag columns and returns this as an object.
#' @export
#'
#' @importFrom dplyr %>%
#'
#' @examples
#' # Load the example data
#' data("beesFlagged")
#'
#' # Run the function
#' OutPath_Report <- tempdir()
#' flagFile <- flagRecorder(
#' data = beesFlagged,
#' outPath = paste(OutPath_Report, sep =""),
#' fileName = paste0("flagsRecorded_", Sys.Date(), ".csv"),
#' # These are the columns that will be kept along with the flags
#' idColumns = c("database_id", "id", "catalogNumber", "occurrenceID", "dataSource"),
#' # TRUE if you want to find a file from a previous part of the script to append to
#' append = FALSE)
flagRecorder <- function(
data = NULL,
outPath = NULL,
fileName = NULL,
idColumns = c("database_id", "id", "catalogNumber", "occurrenceID", "dataSource"),
append = NULL,
printSummary = FALSE){
# locally bind variables to the function
database_id <- . <- rowSum <- .summary <- NULL
#### 0.0 Prep ####
##### 0.1 Packages ####
requireNamespace("dplyr")
requireNamespace("tidyselect")
requireNamespace("lubridate")
requireNamespace("readr")
##### 0.2 Warnings ####
if(is.null(data)){
warning(" - Please provide a dataset.")
}
if(is.null(outPath)){
warning(" - Please provide an outPath to where the bdc folders are dataset.")
}
if(is.null(idColumns)){
warning(paste(" - No ID columns were selected! We will keep the following id columns intact:\n",
"database_id, id, catalogNumber, and occurrenceID.",
sep = ""))
idColumns = c("database_id", "id", "catalogNumber", "occurrenceID")
}
if(is.null(append)){
warning(" - Please provide a append argument - TRUE (find existing file) or FALSE (start from scratch).")
}
##### 0.3 Data in ####
###### a. Occurrence dataset ####
# Select the columns from the input occurrence dataset
data <- data %>%
dplyr::select( dplyr::all_of(idColumns), tidyselect::starts_with("."))
###### b. Existing data ####
if(append == TRUE){
# Find an existing file
flagPath <- fileFinder(path = outPath, fileName = "flagsRecorded_")
# Read it in
flagColumns <- readr::read_csv(flagPath)
# Find the new columns that need adding in
newColumns <- setdiff(colnames(data), colnames(flagColumns))
# Merge the new columns to the data tibble
data <- flagColumns %>%
dplyr::left_join(
# Select only the new columns to add, from the new tibble.
dplyr::select(data, c(database_id, dplyr::all_of(newColumns))),
# Merge by database_id
by = "database_id", keep = FALSE)
# Remove the spent dataframe
rm(flagColumns)
}
#### 1.0 Yes .summary ####
if(".summary" %in% colnames(data)){
# Update .summary column
summaryCol <- data %>%
# Select all columns starting with "."
dplyr::select(tidyselect::starts_with(".")) %>%
# Delete the summary column if it's there
dplyr::select(!tidyselect::starts_with(".summary")) %>%
# Make FALSE == 1 and TRUE == 0
dplyr::mutate_if(is.logical, ~abs(as.numeric(.) - 1)) %>%
# IF rowSum > 0 then there is at least one flag
dplyr::mutate(rowSum = rowSums(., na.rm = TRUE)) %>%
# Add the .summary column
dplyr::mutate(.summary = dplyr::if_else(rowSum > 0,
FALSE, TRUE)) %>%
dplyr::select(.summary)
# Add this column in
data <- data %>%
dplyr::mutate(.summary = summaryCol$.summary)
# User output
message(" - .summary column detected. This will be over-written.")
}
#### 2.0 No .summary ####
if(!".summary" %in% colnames(data)){
# Update .summary column
summaryCol <- data %>%
# Select al lcolumns starting with "."
dplyr::select(tidyselect::starts_with(".")) %>%
# Delete the summary column if it's there
dplyr::select(!tidyselect::starts_with(".summary")) %>%
# Make FALSE == 1 and TRUE == 0
dplyr::mutate_if(is.logical, ~abs(as.numeric(.) - 1)) %>%
# IF rowSum > 0 then there is at least one flag
dplyr::mutate(rowSum = rowSums(., na.rm = TRUE)) %>%
# Add the .summary column
dplyr::mutate(.summary = dplyr::if_else(rowSum > 0,
FALSE, TRUE)) %>%
dplyr::select(.summary)
# Add this column in
data <- data %>%
dplyr::mutate(.summary = summaryCol$.summary)
# User output
message(" - NO .summary column detected. This will added to the data.")
}
#### 3.0 Save ####
# Save this information as the csv flagsRecorded_DATE.csv
readr::write_excel_csv(data,
paste0(outPath, "/", fileName, sep = ""))
# User output
message(paste(
" - Data saved to ", paste0(outPath, "/", fileName, sep = ""),
sep = ""))
# User output
writeLines(paste(
" - Selected ", ncol(data), " columns. These include:\n",
paste(colnames(data)[1:ncol(data)-1], collapse = ", "),
", and ", paste(colnames(data)[ncol(data)]),
sep = ""
))
# Print summary if requested
if(printSummary == TRUE){
summary(dplyr::select(data, tidyselect::starts_with(".")))
}
# Return this file
return(data)
} # END flagRecorder
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.