R/create_taxa_map.R

Defines functions read_taxa_map create_taxa_map

Documented in create_taxa_map read_taxa_map

#' Create taxa map
#'
#' @description
#'     Create the taxa table that will map the resolved taxa back to the
#'     raw taxa in the original data table, and which will be populated with
#'     provenance information about the taxa cleaning process.
#'
#' @usage
#'     create_taxa_map(path, x, col)
#'
#' @param path
#'     A character string specifying the path to which the taxa table will be
#'     written.
#' @param x
#'     A data frame containing the vector of taxa names to be cleaned.
#' @param col
#'     A character string specifying the column in x containing taxa names to
#'     be cleaned.
#'
#' @return
#'     (data frame; taxa_map.csv) With the fields:
#'     \itemize{
#'         \item{'taxa_raw'} Unique taxa names listed in x.
#'         \item{'taxa_trimmed'} The contents of taxa_raw, but with white space
#'         and common abbreviations (e.g. "Spp.", "C.f.") trimmed. Column
#'         contents are outputs from `trim_taxon`.
#'         \item{'taxa_replacement'} The taxa name used as a replacement for
#'         taxa_raw. Column contents are outputs from `replace_taxon`.
#'         \item{'taxa_removed'} A logical value indicating whether the
#'         corresponding taxa_raw should be removed. Column contents are
#'         outputs from `remove_taxon`.
#'         \item{'taxa_clean'} Cleaned taxa names that have been resolved to a
#'         taxonomic authority. Column contents are outputs from `resolve_taxa`
#'         and `resolve_common`.
#'         \item{'rank'} Taxonomic rank for resolved taxon. Column contents
#'         are outputs from `resolve_taxa` and `resolve_common`.
#'         \item{'authority'} Taxonomic authorities against which taxa_clean
#'         was resolved. Column contents are outputs from `resolve_taxa` and
#'         `resolve_common`.
#'         \item{'authority_id'} Unique identification numbers within each
#'         authority. Column contents are outputs from `resolve_taxa` and
#'         `resolve_common`.
#'         \item{'score'} A numeric score, supplied by the authority,
#'         indicating the strength of match between taxa_raw and taxa_clean.
#'         Column contents are outputs from `resolve_taxa` and `resolve_common`.
#'         \item{'difference'} A logical value indicating whether the contents
#'         resolved_taxa differ from raw_taxa.
#'     }
#'
#' @export
#'

create_taxa_map <- function(path, x, col){

# Check arguments ---------------------------------------------------------

  if (missing(path)){
    warning('Input argument "path" is missing. Include a path if you want results written to file.')
  }
  if (missing(x)){
    stop('Input argument "x" is missing!')
  }
  if (!any(class(x) == 'data.frame')){
    stop('Input argument "x" must be a data frame!')
  }
  if (missing(col)){
    stop('Input argument "col" is missing!')
  }

  x <- as.data.frame(x)

# create taxon cleaning table -----------------------------------------

  use_i <- length(
    unique(
      x[ , col]
    )
  )

  data_out <- data.frame(
    taxa_raw = rep(NA_character_, use_i),
    taxa_trimmed = rep(NA_character_, use_i),
    taxa_replacement = rep(NA_character_, use_i),
    taxa_removed = rep(NA_character_, use_i),
    taxa_clean = rep(NA_character_, use_i),
    rank = rep(NA_character_, use_i),
    authority = rep(NA_character_, use_i),
    authority_id = rep(NA_character_, use_i),
    score = rep(NA_character_, use_i),
    difference = rep(NA_character_, use_i),
    stringsAsFactors = F
  )

# Populate taxa_raw -------------------------------------------------------

  data_out[ , 'taxa_raw'] <- unique(
    x[ , col]
    )

# Write taxon cleaning table to file --------------------------------------

  if (!missing(path)){
    write_taxa_map(
      x = data_out,
      path = path
    )
  }

  data_out

}






#' Read taxa map
#'
#' @description
#'     Read taxa_map.csv generated by \code{create_taxa_map()}.
#'
#' @param path
#'     (character) The path to the directory containing taxa_map.csv.
#'
#' @return
#'     (data.table; data.frame) taxa_map.csv as an R object.
#'
#' @examples
#' mypath <- dirname(system.file("taxa_map.csv", package = "taxonomyCleanr"))
#' taxa_map <- read_taxa_map(mypath)
#' taxa_map
#'
#' @export
#'
read_taxa_map <- function(path) {
  data.table::fread(
    file = paste0(path, "/taxa_map.csv"),
    fill = TRUE,
    blank.lines.skip = TRUE)
}
EDIorg/taxonomyCleanr documentation built on April 9, 2023, 2:43 a.m.