R/pm_fuzzy_match.R
In perumammals: Taxonomic Backbone and Name Validation Tools for Mammals of Peru

Documented in fuzzy_match_genus fuzzy_match_species_within_genus fuzzy_match_species_within_genus_helper get_ambiguous_matches

#' Fuzzy Match Genus Name Against Peru Mammals Database
#'
#' @description
#' Performs fuzzy matching of genus names against the peru_mammals database
#' using string distance (Levenshtein) to account for slight spelling variations.
#' Maximum distance is set to 1 character difference.
#'
#' This implementation uses a two-step approach to avoid warnings when no matches
#' are found:
#' 1. Perform stringdist_left_join to get all candidates
#' 2. Split into valid (finite distance) and invalid (NA distance)
#' 3. Process only valid matches to find best candidates
#'
#' @param df A data frame containing the genus names to be matched.
#'   Must include column: Orig.Genus
#' @param target_df A data frame representing peru_mammals database.
#'   Must include column: genus
#'
#' @return
#' A tibble with two additional columns:
#' - `fuzzy_match_genus`: Logical indicating if genus was matched
#' - `fuzzy_genus_dist`: Numeric distance for each match (lower = better)
#' - `Matched.Genus`: The matched genus name
#'
#' @details
#' If multiple genera match with the same string distance (ambiguous matches),
#' a warning is issued and the first match is automatically selected. To
#' examine ambiguous matches, use \code{get_ambiguous_matches(result, type = "genus")}.
#'
#' Ambiguous match information is stored as an attribute and includes:
#' - Original genus
#' - All matched genera with tied distances
#' - Family information from peru_mammals
#' - Number of species per genus
#'
#'
#' @keywords internal
fuzzy_match_genus <- function(df, target_df = NULL) {

  # ==========================================================================
  # SECTION 1: Input Validation
  # ==========================================================================

  assertthat::assert_that(
    'Orig.Genus' %in% colnames(df),
    msg = "fuzzy_match_genus() requires column: Orig.Genus"
  )

  # Validate target_df
  if (is.null(target_df)) {
    stop("target_df (peru_mammals) must be provided", call. = FALSE)
  }

  assertthat::assert_that(
    'genus' %in% colnames(target_df),
    msg = "target_df must include column: genus"
  )

  # Handle empty input
  if (nrow(df) == 0) {
    if (!all(c('fuzzy_match_genus', 'fuzzy_genus_dist') %in% colnames(df))) {
      return(tibble::add_column(df,
                                fuzzy_match_genus = logical(0),
                                fuzzy_genus_dist = numeric(0),
                                Matched.Genus = character(0)))
    } else {
      return(df)
    }
  }

  # Remove existing fuzzy_genus_dist if present (for sequential matching)
  if ('fuzzy_genus_dist' %in% colnames(df)) {
    df <- df |>
      dplyr::mutate(fuzzy_genus_dist = NULL)
  }

  # ==========================================================================
  # SECTION 2: Prepare Target Genera
  # ==========================================================================

  # Get unique genera from peru_mammals (case-insensitive)
  target_genera <- target_df |>
    dplyr::distinct(genus) |>
    dplyr::mutate(genus_upper = toupper(genus))

  # ==========================================================================
  # SECTION 3: Fuzzy Matching (Two-Step Approach)
  # ==========================================================================

  # Step 1: Perform join to get all candidates with distances
  matched_all <- df |>
    fuzzyjoin::stringdist_left_join(
      target_genera,
      by = c('Orig.Genus' = 'genus_upper'),
      max_dist = 1,
      distance_col = 'fuzzy_genus_dist'
    ) |>
    dplyr::mutate(Matched.Genus = genus_upper) |>
    dplyr::select(-c('genus', 'genus_upper'))

  # Step 2: Split into those with valid distances and those without
  matched_valid <- matched_all |>
    dplyr::filter(!is.na(fuzzy_genus_dist))

  matched_invalid <- matched_all |>
    dplyr::filter(is.na(fuzzy_genus_dist))

  # Step 3: Process valid matches to find best candidates
  if (nrow(matched_valid) > 0) {
    matched_temp <- matched_valid |>
      dplyr::group_by(Orig.Genus) |>
      dplyr::filter(fuzzy_genus_dist == min(fuzzy_genus_dist)) |>
      dplyr::ungroup()
  } else {
    # No valid matches found - return empty tibble with correct structure
    matched_temp <- matched_valid
  }

  # ==========================================================================
  # SECTION 4: Handle Ambiguous Matches
  # ==========================================================================

  # Detect ambiguous matches (multiple genera with same distance)
  ambiguous_matches <- matched_temp |>
    dplyr::filter(dplyr::n() > 1)

  if (nrow(ambiguous_matches) > 0) {
    # Count unique genera with ambiguous matches
    n_ambiguous_genera <- ambiguous_matches |>
      dplyr::distinct(Orig.Genus) |>
      nrow()

    # Issue informative warning
    warning(
      "Found ", n_ambiguous_genera, " genera with multiple fuzzy matches ",
      "(tied string distances).\n",
      "  The algorithm will automatically select the first match.\n",
      "  To examine ambiguous matches, use: ",
      "get_ambiguous_matches(result, type = 'genus')\n",
      "  Consider manual curation for critical applications.",
      call. = FALSE,
      immediate. = TRUE
    )

    # Store enriched ambiguous match information
    attr(matched_temp, "ambiguous_genera") <- ambiguous_matches |>
      dplyr::ungroup() |>
      dplyr::select(Orig.Genus, Matched.Genus, fuzzy_genus_dist) |>
      dplyr::distinct() |>
      # Join with target_df to get additional database information
      dplyr::left_join(
        target_df |>
          dplyr::mutate(genus_upper = toupper(genus)) |>
          dplyr::distinct(genus_upper, family) |>
          dplyr::rename(Matched.Genus = genus_upper, Matched.Family = family),
        by = "Matched.Genus"
      ) |>
      dplyr::distinct() |>
      # Add species count per genus
      dplyr::left_join(
        target_df |>
          dplyr::group_by(genus) |>
          dplyr::summarise(
            n_species_in_db = dplyr::n_distinct(species),
            example_species = paste(head(unique(species), 2), collapse = ", ")
          ) |>
          dplyr::mutate(genus_upper = toupper(genus)) |>
          dplyr::rename(Matched.Genus = genus_upper) |>
          dplyr::select(-genus),
        by = "Matched.Genus"
      ) |>
      dplyr::distinct() |>
      dplyr::arrange(Orig.Genus, fuzzy_genus_dist, Matched.Genus)
  }

  # ==========================================================================
  # SECTION 5: Select First Match for Ambiguous Cases
  # ==========================================================================

  # When there are multiple candidate matches with the same distance for a
  # given input row, we need to select one. However, we must preserve ALL
  # input rows - the grouping should be by the original input row identity,
  # not just by Orig.Genus.

  # Strategy: Group by all original columns (which includes unique identifiers
  # like 'sorter'), then take the first match candidate for each input row.

  if (nrow(matched_temp) > 0) {
    # Identify columns that were in the original input df
    original_cols <- intersect(colnames(df), colnames(matched_temp))

    # Group by original columns to preserve each input row
    matched <- matched_temp |>
      dplyr::group_by(dplyr::across(dplyr::all_of(original_cols))) |>
      dplyr::slice_head(n = 1) |>
      dplyr::ungroup()
  } else {
    matched <- matched_temp
  }

  # Preserve ambiguous match attribute
  if (!is.null(attr(matched_temp, "ambiguous_genera"))) {
    attr(matched, "ambiguous_genera") <- attr(matched_temp, "ambiguous_genera")
  }

  # ==========================================================================
  # SECTION 6: Identify Unmatched and Combine Results
  # ==========================================================================

  unmatched <- df |>
    fuzzyjoin::stringdist_anti_join(
      target_genera,
      by = c('Orig.Genus' = 'genus_upper'),
      max_dist = 1
    )

  assertthat::assert_that(
    nrow(df) == (nrow(matched) + nrow(unmatched)),
    msg = paste0(
      "Row count mismatch in fuzzy_match_genus():\n",
      "Input: ", nrow(df), " rows\n",
      "Matched: ", nrow(matched), " rows\n",
      "Unmatched: ", nrow(unmatched), " rows"
    )
  )

  res <- dplyr::bind_rows(
    matched,
    unmatched,
    .id = 'fuzzy_match_genus'
  ) |>
    dplyr::mutate(fuzzy_match_genus = (fuzzy_match_genus == 1)) |>
    dplyr::arrange(Orig.Genus) |>
    dplyr::relocate(Orig.Genus)

  # Preserve ambiguous match attribute in final result
  if (!is.null(attr(matched, "ambiguous_genera"))) {
    attr(res, "ambiguous_genera") <- attr(matched, "ambiguous_genera")
  }

  return(res)
}


#' Fuzzy Match Species within Genus in Peru Mammals Database
#'
#' @description
#' Performs fuzzy matching of species names within a matched genus using
#' string distance to account for spelling variations. Peru mammals database
#' does not include infraspecific taxa.
#'
#' @param df A data frame containing species data to be matched.
#'   Must include columns: Orig.Species, Matched.Genus
#' @param target_df A data frame representing peru_mammals database.
#'   Must include columns: genus, species
#'
#' @return
#' A tibble with additional columns:
#' - `fuzzy_match_species_within_genus`: Logical indicating match success
#' - `fuzzy_species_dist`: Numeric distance for each match
#' - `Matched.Species`: The matched species name
#'
#' @details
#' This function processes each matched genus separately for efficiency.
#' If multiple species match with the same distance, a warning is issued
#' and the first match is selected. Use \code{get_ambiguous_matches(result, type = "species")}
#' to examine ambiguous cases.
#'
#' Special handling for "sp." cases:
#' - "Akodon sp. Ancash" is treated as a complete specific epithet
#' - Fuzzy matching will work on the entire "SP. ANCASH" string
#'
#' @keywords internal
fuzzy_match_species_within_genus <- function(df, target_df = NULL) {

  # ==========================================================================
  # SECTION 1: Input Validation
  # ==========================================================================

  assertthat::assert_that(
    all(c('Orig.Species', 'Matched.Genus') %in% colnames(df)),
    msg = "fuzzy_match_species_within_genus() requires: Orig.Species, Matched.Genus"
  )

  if (is.null(target_df)) {
    stop("target_df (peru_mammals) must be provided", call. = FALSE)
  }

  assertthat::assert_that(
    all(c('genus', 'species') %in% colnames(target_df)),
    msg = "target_df must include columns: genus, species"
  )

  # Handle empty input
  if (nrow(df) == 0) {
    if (!all(c('fuzzy_match_species_within_genus', 'fuzzy_species_dist') %in% colnames(df))) {
      return(tibble::add_column(df,
                                fuzzy_match_species_within_genus = logical(0),
                                fuzzy_species_dist = numeric(0),
                                Matched.Species = character(0)))
    } else {
      return(df)
    }
  }

  # Remove existing fuzzy_species_dist if present
  if ('fuzzy_species_dist' %in% colnames(df)) {
    df <- df |>
      dplyr::mutate(fuzzy_species_dist = NULL)
  }

  # ==========================================================================
  # SECTION 2: Process by Matched Genus
  # ==========================================================================

  # Split by genus and process each group
  res_list <- df |>
    dplyr::group_by(Matched.Genus) |>
    dplyr::group_split()

  # Process each genus group
  res_with_attrs <- lapply(res_list, function(chunk) {
    fuzzy_match_species_within_genus_helper(chunk, target_df)
  })

  # ==========================================================================
  # SECTION 3: Consolidate Ambiguous Matches
  # ==========================================================================

  # Extract ambiguous attributes from all chunks
  all_ambiguous <- lapply(res_with_attrs, function(chunk_result) {
    attr(chunk_result, "ambiguous_species")
  })

  # Remove NULLs
  all_ambiguous <- Filter(Negate(is.null), all_ambiguous)

  # Combine all ambiguous matches
  consolidated_ambiguous <- if (length(all_ambiguous) > 0) {
    dplyr::bind_rows(all_ambiguous)
  } else {
    NULL
  }

  # ==========================================================================
  # SECTION 4: Combine Results
  # ==========================================================================

  res <- dplyr::bind_rows(res_with_attrs) |>
    dplyr::relocate(c('Orig.Genus', 'Orig.Species'))

  # Re-attach consolidated ambiguous attribute
  if (!is.null(consolidated_ambiguous) && nrow(consolidated_ambiguous) > 0) {
    attr(res, "ambiguous_species") <- consolidated_ambiguous
  }

  return(res)
}


#' Helper: Fuzzy Match Species within Genus
#'
#' @description
#' Helper function that performs fuzzy matching for a single genus.
#'
#' This implementation uses a two-step approach to avoid issues with empty
#' groups when filtering NAs:
#' 1. Perform stringdist_left_join to get all candidates
#' 2. Split into matched (finite distance) and unmatched (NA distance)
#' 3. Process matched candidates to find best matches
#' 4. Recombine for final output
#'
#' @param df Data frame for a single matched genus
#' @param target_df Peru mammals database
#'
#' @return Data frame with fuzzy match results
#'
#' @keywords internal
fuzzy_match_species_within_genus_helper <- function(df, target_df) {

  # Get the matched genus
  genus <- df |>
    dplyr::distinct(Matched.Genus) |>
    dplyr::pull()

  # Get species for this genus from peru_mammals
  genus_formatted <- .str_to_simple_cap(genus)

  database_subset <- target_df |>
    dplyr::filter(genus == genus_formatted) |>
    dplyr::mutate(species_upper = toupper(species)) |>
    dplyr::select(species_upper, species, scientific_name, common_name)

  if (nrow(database_subset) == 0) {
    # No species found for this genus (shouldn't happen after genus_match)
    return(df |>
             dplyr::mutate(
               fuzzy_match_species_within_genus = FALSE,
               fuzzy_species_dist = NA_real_
             ))
  }

  # ==========================================================================
  # Fuzzy Match Species (Two-Step Approach)
  # ==========================================================================

  # Step 1: Perform join to get all candidates with distances
  matched_all <- df |>
    fuzzyjoin::stringdist_left_join(
      database_subset,
      by = c('Orig.Species' = 'species_upper'),
      distance_col = 'fuzzy_species_dist'
    ) |>
    dplyr::mutate(Matched.Species = species_upper)

  # Step 2: Split into those with valid distances and those without
  matched_valid <- matched_all |>
    dplyr::filter(!is.na(fuzzy_species_dist))

  matched_invalid <- matched_all |>
    dplyr::filter(is.na(fuzzy_species_dist))

  # Step 3: Process valid matches to find best candidates
  if (nrow(matched_valid) > 0) {
    matched <- matched_valid |>
      dplyr::group_by(Orig.Genus, Orig.Species) |>
      dplyr::filter(fuzzy_species_dist == min(fuzzy_species_dist)) |>
      dplyr::ungroup()
  } else {
    # No valid matches found
    matched <- matched_valid  # Empty tibble with correct structure
  }

  # ==========================================================================
  # Handle Ambiguous Matches
  # ==========================================================================

  ambiguous_matches <- matched |>
    dplyr::group_by(Orig.Genus, Orig.Species) |>
    dplyr::filter(dplyr::n() > 1) |>
    dplyr::ungroup()

  if (nrow(ambiguous_matches) > 0) {
    n_ambiguous_species <- ambiguous_matches |>
      dplyr::distinct(Orig.Genus, Orig.Species) |>
      nrow()

    warning(
      "Found ", n_ambiguous_species, " species with multiple fuzzy matches ",
      "within genus '", genus_formatted, "' (tied string distances).\n",
      "  The algorithm will automatically select the first match.\n",
      "  To examine ambiguous matches, use: ",
      "get_ambiguous_matches(result, type = 'species')\n",
      "  Consider manual curation for critical applications.",
      call. = FALSE,
      immediate. = TRUE
    )

    # Store enriched information
    attr(matched, "ambiguous_species") <- ambiguous_matches |>
      dplyr::select(Orig.Genus, Orig.Species, Matched.Species,
                    fuzzy_species_dist, scientific_name, common_name) |>
      dplyr::distinct() |>
      dplyr::arrange(Orig.Genus, Orig.Species, fuzzy_species_dist, Matched.Species)
  }

  # ==========================================================================
  # Select First Match for Ambiguous Cases
  # ==========================================================================

  #matched_final <- matched |>
  #  dplyr::group_by(Orig.Genus, Orig.Species) |>
  #  dplyr::arrange(fuzzy_species_dist) |>
  #  dplyr::slice_head(n = 1) |>
  #  dplyr::ungroup()
#
  ## Clean up extra columns before combining
  #matched_final <- matched_final |>
  #  dplyr::select(-dplyr::any_of(c('species', 'species_upper',
  #                                 'scientific_name', 'common_name')))
#
  ## Preserve attribute
  #if (!is.null(attr(matched, "ambiguous_species"))) {
  #  attr(matched_final, "ambiguous_species") <- attr(matched, "ambiguous_species")
  #}
  # When there are multiple candidate matches with the same distance for a
  # given input row, we need to select one. However, we must preserve ALL
  # input rows - the grouping should be by the original input row identity.

  # The issue: grouping by (Orig.Genus, Orig.Species) loses rows when multiple
  # input rows have the same genus+species but different sorter values.
  #
  # Example:
  # Input has: sorter=1 "AKODON TORQES", sorter=2 "AKODON TORQES", sorter=3 "AKODON TORQES"
  # After fuzzy match, all three might match to "AKODON TORQUES"
  # Old code: groups by (AKODON, TORQES), takes 1 row -> LOSES sorter 2 & 3
  # New code: groups by (sorter, AKODON, TORQES), takes 1 row per sorter -> PRESERVES all

  if (nrow(matched) > 0) {
    # Identify columns that were in the original input df
    # These typically include: sorter, Orig.Genus, Orig.Species, Matched.Genus, etc.
    original_cols <- intersect(colnames(df), colnames(matched))

    # Group by original columns to preserve each input row's identity
    matched_final <- matched |>
      dplyr::group_by(dplyr::across(dplyr::all_of(original_cols))) |>
      dplyr::arrange(fuzzy_species_dist) |>
      dplyr::slice_head(n = 1) |>
      dplyr::ungroup()
  } else {
    matched_final <- matched
  }

  # Clean up extra columns before combining
  matched_final <- matched_final |>
    dplyr::select(-dplyr::any_of(c('species', 'species_upper',
                                   'scientific_name', 'common_name')))

  # Preserve attribute
  if (!is.null(attr(matched, "ambiguous_species"))) {
    attr(matched_final, "ambiguous_species") <- attr(matched, "ambiguous_species")
  }

  # ==========================================================================
  # Identify Unmatched (using anti_join on original df)
  # ==========================================================================

  unmatched <- fuzzyjoin::stringdist_anti_join(
    df,
    database_subset,
    by = c('Orig.Species' = 'species_upper')
  )

  # ==========================================================================
  # Validate Row Counts
  # ==========================================================================

  assertthat::assert_that(
    nrow(df) == (nrow(matched_final) + nrow(unmatched)),
    msg = paste0(
      "Row count mismatch in fuzzy_match_species_within_genus_helper:\n",
      "Input: ", nrow(df), " rows\n",
      "Matched: ", nrow(matched_final), " rows\n",
      "Unmatched: ", nrow(unmatched), " rows\n",
      "Genus: ", genus_formatted
    )
  )

  # ==========================================================================
  # Combine and Return
  # ==========================================================================

  combined <- dplyr::bind_rows(
    matched_final,
    unmatched,
    .id = 'fuzzy_match_species_within_genus'
  ) |>
    dplyr::mutate(
      fuzzy_match_species_within_genus = (fuzzy_match_species_within_genus == "1")
    ) |>
    dplyr::relocate(c('Orig.Genus', 'Orig.Species'))

  # Preserve attribute
  if (!is.null(attr(matched_final, "ambiguous_species"))) {
    attr(combined, "ambiguous_species") <- attr(matched_final, "ambiguous_species")
  }

  return(combined)
}


#' Retrieve Ambiguous Match Information for Peru Mammals
#'
#' @description
#' Extracts information about ambiguous matches (multiple candidates with
#' tied distances) from matching results. Useful for quality control and
#' manual curation. Adapted for peru_mammals (genus and species only).
#'
#' @param match_result A tibble returned by matching functions.
#' @param type Character. Type of ambiguous matches to retrieve:
#'   \itemize{
#'     \item \code{"genus"} (default): Ambiguous genus-level matches
#'     \item \code{"species"}: Ambiguous species-level matches
#'     \item \code{"all"}: Both types
#'   }
#' @param save_to_file Logical. If TRUE, saves results to CSV.
#'   Default is FALSE (CRAN compliant).
#' @param output_dir Character. Directory to save file if save_to_file = TRUE.
#'   Defaults to \code{tempdir()}.
#'
#' @return
#' A tibble with ambiguous match details, or NULL if none exist.
#' Includes original names, matched names, distances, and database metadata.
#'
#' @details
#' During fuzzy matching, multiple candidates may have identical string
#' distances. The matching algorithm automatically selects the first candidate,
#' but this function allows you to review all alternatives for quality control.
#'
#'
#' @export
get_ambiguous_matches <- function(match_result,
                                  type = c("genus", "species", "all"),
                                  save_to_file = FALSE,
                                  output_dir = tempdir()) {
  type <- match.arg(type)

  if (!inherits(match_result, "data.frame")) {
    stop("match_result must be a data frame or tibble.", call. = FALSE)
  }

  # ==========================================================================
  # Helper to Extract and Standardize
  # ==========================================================================

  extract_ambiguous <- function(attr_name, match_type_label) {
    data <- attr(match_result, attr_name)
    if (is.null(data) || !is.data.frame(data) || nrow(data) == 0) {
      return(NULL)
    }
    tibble::as_tibble(data) |>
      dplyr::mutate(Match_Type = match_type_label, .before = 1)
  }

  # ==========================================================================
  # Extract Based on Type
  # ==========================================================================

  result_list <- list()

  if (type %in% c("genus", "all")) {
    result_list$genus <- extract_ambiguous("ambiguous_genera", "Genus")
  }

  if (type %in% c("species", "all")) {
    result_list$species <- extract_ambiguous("ambiguous_species", "Species")
  }

  # Remove NULLs
  result_list <- purrr::compact(result_list)

  if (length(result_list) == 0) {
    message("No ambiguous ", type, " matches found.")
    return(invisible(NULL))
  }

  # ==========================================================================
  # Combine Results
  # ==========================================================================

  result <- if (type == "all") {
    dplyr::bind_rows(result_list)
  } else {
    result_list[[type]]
  }

  # Remove duplicates
  result <- result |>
    dplyr::distinct()

  # ==========================================================================
  # Summary
  # ==========================================================================

  n_rows <- nrow(result)
  orig_cols <- grep("^Orig\\.", names(result), value = TRUE)
  n_orig <- if (length(orig_cols) > 0) {
    result |>
      dplyr::distinct(dplyr::across(dplyr::all_of(orig_cols))) |>
      nrow()
  } else {
    NA_integer_
  }

  message(
    "Found ", n_rows, " ambiguous match(es)",
    if (!is.na(n_orig)) paste0(" for ", n_orig, " unique input name(s)") else "",
    "."
  )

  # ==========================================================================
  # Optional File Save
  # ==========================================================================

  if (save_to_file) {
    if (!dir.exists(output_dir)) {
      dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
    }

    filename <- sprintf(
      "perumammals_ambiguous_%s_%s.csv",
      type,
      format(Sys.time(), "%Y%m%d_%H%M%S")
    )
    filepath <- file.path(output_dir, filename)

    tryCatch({
      readr::write_csv(result, filepath)
      message("Saved to: ", filepath)
    }, error = function(e) {
      warning("Failed to save file: ", e$message, call. = FALSE)
    })
  }

  return(result)
}
Any scripts or data that you put into this service are public.
perumammals documentation built on Jan. 6, 2026, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
perumammals
Taxonomic Backbone and Name Validation Tools for Mammals of Peru

R/pm_fuzzy_match.R
In perumammals: Taxonomic Backbone and Name Validation Tools for Mammals of Peru

Defines functions get_ambiguous_matches fuzzy_match_species_within_genus_helper fuzzy_match_species_within_genus fuzzy_match_genus

Documented in fuzzy_match_genus fuzzy_match_species_within_genus fuzzy_match_species_within_genus_helper get_ambiguous_matches

Try the perumammals package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

perumammals Taxonomic Backbone and Name Validation Tools for Mammals of Peru

R/pm_fuzzy_match.R In perumammals: Taxonomic Backbone and Name Validation Tools for Mammals of Peru

Defines functions get_ambiguous_matches fuzzy_match_species_within_genus_helper fuzzy_match_species_within_genus fuzzy_match_genus

Documented in fuzzy_match_genus fuzzy_match_species_within_genus fuzzy_match_species_within_genus_helper get_ambiguous_matches

Try the perumammals package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

perumammals
Taxonomic Backbone and Name Validation Tools for Mammals of Peru

R/pm_fuzzy_match.R
In perumammals: Taxonomic Backbone and Name Validation Tools for Mammals of Peru