#' Secondary function to transform data generated with powoSpecies
#'
#' @author Debora Zuanny & Domingos Cardoso
#'
#' @description Produces a spreadsheet with extra data about the
#' publication, adding the corrected year of publication and the counting of
#' synonyms of each species. It is an optional function to be used after run
#' expowo's \code{powoSpecies} and to use as input data in the expowo's
#' \code{accGraph}.
#'
#' @usage
#' get_year_pubs(inputdf = NULL,
#' verbose = TRUE,
#' save = FALSE,
#' dir = "results_POWO_with_year_pubs",
#' filename = "POWO_with_year_pubs")
#'
#' @param inputdf A dataframe generated using \code{powoSpecies} function with
#' the argument synonyms set as \code{TRUE} and containing the genus and species
#' column and the associated information about the protologue of each species.
#' The species name must be binomial, i.e. must contain both the genus name and
#' specific epithet.
#'
#' @param verbose Logical, if \code{FALSE}, a message showing each step during
#' the POWO search will not be printed in the console in full.
#' @param save Logical, if \code{TRUE}, the transformed input POWO database
#' will be saved on disk.
#'
#' @param dir Pathway to the computer's directory, where the file will be saved
#' provided that the argument \code{save} is set up in \code{TRUE}. The default
#' is to create a directory named **results_POWO_with_year_pubs** and the search
#' results will be saved within a subfolder named after the current date.
#'
#' @param filename Name of the output file to be saved. The default is to create
#' a file entitled **POWO_with_year_pubs**.
#'
#' @return Objects of class dataframe and saves the output on disk.
#'
#' @seealso \code{\link{megaGen}}
#' @seealso \code{\link{powoSpecies}}
#' @seealso \code{\link{powoFam}}
#' @seealso \code{\link{powoGenera}}
#' @seealso \code{\link{accGraph}}
#'
#' @examples
#' \dontrun{
#'
#' library(expowo)
#'
#' output <- powoSpecies(family = "Fabaceae",
#' genus = NULL,
#' synonyms = TRUE,
#' country = NULL,
#' save = FALSE,
#' dir = "Fabaceae_results_powoSpecies",
#' filename = "Fabaceae_spp")
#'
#'
#' output_year_pubs <- get_year_pubs(inputdf = output,
#' verbose = TRUE,
#' save = FALSE,
#' dir = "results_POWO_with_year_pubs",
#' filename = "POWO_with_year_pubs")
#' }
#'
#' @importFrom magrittr "%>%"
#' @importFrom tibble add_column
#'
#' @export
#'
get_year_pubs <- function(inputdf = NULL,
verbose = TRUE,
save = FALSE,
dir = "results_POWO_with_year_pubs",
filename = "POWO_with_year_pubs") {
# inputdf check if it is a dataframe and if was generated by powoSpecies
.arg_check_inputdf(inputdf)
# dir check
dir <- .arg_check_dir(dir)
# Get name of folder and file name if the results are intended to be saved
foldername <- paste0(dir, "/", format(Sys.time(), "%d%b%Y"))
# Fill in with NAs the empty cells
df <- data.frame(apply(inputdf, 2, function(x) gsub("^$", NA, x)))
# Creating new columns for the year and corrected year of publication
df <- df %>% tibble::add_column(year = NA, .after = "publication")
df <- df %>% tibble::add_column(year_basionym = NA, .after = "year")
df <- df %>% tibble::add_column(number_synonyms = NA, .after = "accepted_name")
# Extracting the year from the full publication information
# Clean examples such as "non C. caerulea Jacq.", ", non Engelh. (1898)"
df$publication <- gsub(",\\snon\\s[[:upper:]].*", "", df$publication)
# Get examples of not validly published names
tf <- grepl("[0-9][)],\\s", df$publication)
p <- unique(gsub(".*[)],\\s", "", df$publication[tf]))
tf_inval <- !grepl(paste0(p, collapse = "|"), df$publication)
if (any(tf_inval)) {
df$year[tf_inval] <- gsub(",\\scontrary.*", "", df$publication[tf_inval])
df$year[tf_inval] <- gsub(".*[(]|[)].*|.*.\\s|.*[.]", "", df$year[tf_inval])
df$year[tf_inval] <- gsub("unknown\\spublication|publication", "unknown",
df$year[tf_inval])
}
df$year[!tf_inval] <- gsub(".*[(]|,.*", "", df$publication[!tf_inval])
df$year[!tf_inval] <- gsub(".*[.]\\s|[)].*", "", df$year[!tf_inval])
df$year[!tf_inval] <- gsub("unknown\\spublication|publication", "unknown",
df$year[!tf_inval])
# Extracting the correct year for species with synonyms
# Fill corrected year for species without synonyms
tf_accepted <- df$status %in% "Accepted"
# Get the accepted names for species without any synonym
tftf <- !df$scientific_name[tf_accepted] %in% df$accepted_name
df$year_basionym[tf_accepted][tftf] <- df$year[tf_accepted][tftf]
# Count the number of synonyms
df$number_synonyms[tf_accepted][tftf] <- 0
df$number_synonyms[tf_accepted][!tftf] <- table(df$accepted_name)
# Extracting the correct year for species with synonyms
n_au <- unique(df$accepted_name[!is.na(df$accepted_name)])
for (i in seq_along(n_au)) {
tf <- df$scientific_name[tf_accepted] %in% n_au[i]
year_acc <- df$year[tf_accepted][tf]
if (any(tf_inval)) {
tftf <- df$accepted_name[tf_inval] %in% n_au[i]
temp <- df[tf_inval, ][tftf, ]
} else {
tftf <- df$accepted_name %in% n_au[i]
temp <- df[tftf, ]
}
temp <- temp[!grepl("\\svar[.]|\\sf[.]\\s|\\ssubsp[.]\\s",
temp$scientific_name), ]
if (length(temp$year) == 0) {
year_syn <- NA
} else {
year_syn <- min(temp$year)
}
if (year_acc < year_syn |
is.na(year_syn)) {
df$year_basionym[tf_accepted][tf] <- year_acc
} else {
df$year_basionym[tf_accepted][tf] <- year_syn
}
}
# Correct the publication year for "nomina conservanda" examples
tf <- grepl(", nom. cons.", df$publication[tf_accepted])
df$year[tf_accepted][tf] <- gsub(".*[(]|[)],\\snom[.] cons[.]*", "",
df$publication[tf_accepted][tf])
df$year_basionym[tf_accepted][tf] <- gsub(".*[(]|[)],\\snom[.] cons[.]*", "",
df$publication[tf_accepted][tf])
# Identify each change in the corrected year of species discovery
df$year <- as.numeric(df$year)
tf <- is.na(df$year_basionym)
df$year_basionym[!tf] <- as.numeric(df$year_basionym[!tf])
df$year <- as.numeric(df$year)
df$year_basionym <- as.numeric(df$year_basionym)
tf <- df$year == df$year_basionym
df <- df %>% tibble::add_column(year_changed = NA, .after = "year_basionym")
df$year_changed[tf] <- "no"
df$year_changed[!tf] <- "yes"
if (save) {
if (!dir.exists(dir)) {
dir.create(dir)
}
if (!dir.exists(foldername)) {
dir.create(foldername)
}
# Saving the new spreadsheet
saveCSV(df,
dir = dir,
filename = filename,
verbose = verbose,
save = save,
foldername = foldername)
}
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.