#' Create taxa map
#'
#' @description
#' Create the taxa table that will map the resolved taxa back to the
#' raw taxa in the original data table, and which will be populated with
#' provenance information about the taxa cleaning process.
#'
#' @usage
#' create_taxa_map(path, x, col)
#'
#' @param path
#' A character string specifying the path to which the taxa table will be
#' written.
#' @param x
#' A data frame containing the vector of taxa names to be cleaned.
#' @param col
#' A character string specifying the column in x containing taxa names to
#' be cleaned.
#'
#' @return
#' (data frame; taxa_map.csv) With the fields:
#' \itemize{
#' \item{'taxa_raw'} Unique taxa names listed in x.
#' \item{'taxa_trimmed'} The contents of taxa_raw, but with white space
#' and common abbreviations (e.g. "Spp.", "C.f.") trimmed. Column
#' contents are outputs from `trim_taxon`.
#' \item{'taxa_replacement'} The taxa name used as a replacement for
#' taxa_raw. Column contents are outputs from `replace_taxon`.
#' \item{'taxa_removed'} A logical value indicating whether the
#' corresponding taxa_raw should be removed. Column contents are
#' outputs from `remove_taxon`.
#' \item{'taxa_clean'} Cleaned taxa names that have been resolved to a
#' taxonomic authority. Column contents are outputs from `resolve_taxa`
#' and `resolve_common`.
#' \item{'rank'} Taxonomic rank for resolved taxon. Column contents
#' are outputs from `resolve_taxa` and `resolve_common`.
#' \item{'authority'} Taxonomic authorities against which taxa_clean
#' was resolved. Column contents are outputs from `resolve_taxa` and
#' `resolve_common`.
#' \item{'authority_id'} Unique identification numbers within each
#' authority. Column contents are outputs from `resolve_taxa` and
#' `resolve_common`.
#' \item{'score'} A numeric score, supplied by the authority,
#' indicating the strength of match between taxa_raw and taxa_clean.
#' Column contents are outputs from `resolve_taxa` and `resolve_common`.
#' \item{'difference'} A logical value indicating whether the contents
#' resolved_taxa differ from raw_taxa.
#' }
#'
#' @export
#'
create_taxa_map <- function(path, x, col){
# Check arguments ---------------------------------------------------------
if (missing(path)){
warning('Input argument "path" is missing. Include a path if you want results written to file.')
}
if (missing(x)){
stop('Input argument "x" is missing!')
}
if (!any(class(x) == 'data.frame')){
stop('Input argument "x" must be a data frame!')
}
if (missing(col)){
stop('Input argument "col" is missing!')
}
x <- as.data.frame(x)
# create taxon cleaning table -----------------------------------------
use_i <- length(
unique(
x[ , col]
)
)
data_out <- data.frame(
taxa_raw = rep(NA_character_, use_i),
taxa_trimmed = rep(NA_character_, use_i),
taxa_replacement = rep(NA_character_, use_i),
taxa_removed = rep(NA_character_, use_i),
taxa_clean = rep(NA_character_, use_i),
rank = rep(NA_character_, use_i),
authority = rep(NA_character_, use_i),
authority_id = rep(NA_character_, use_i),
score = rep(NA_character_, use_i),
difference = rep(NA_character_, use_i),
stringsAsFactors = F
)
# Populate taxa_raw -------------------------------------------------------
data_out[ , 'taxa_raw'] <- unique(
x[ , col]
)
# Write taxon cleaning table to file --------------------------------------
if (!missing(path)){
write_taxa_map(
x = data_out,
path = path
)
}
data_out
}
#' Read taxa map
#'
#' @description
#' Read taxa_map.csv generated by \code{create_taxa_map()}.
#'
#' @param path
#' (character) The path to the directory containing taxa_map.csv.
#'
#' @return
#' (data.table; data.frame) taxa_map.csv as an R object.
#'
#' @examples
#' mypath <- dirname(system.file("taxa_map.csv", package = "taxonomyCleanr"))
#' taxa_map <- read_taxa_map(mypath)
#' taxa_map
#'
#' @export
#'
read_taxa_map <- function(path) {
data.table::fread(
file = paste0(path, "/taxa_map.csv"),
fill = TRUE,
blank.lines.skip = TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.