test-integration.R
In peruflorads43: Check Threatened Plant Species Status Against Peru's Supreme Decree 043-2006-AG

# ==============================================================================
# TEST SUITE: INTEGRATION TESTS
# ==============================================================================
# Full end-to-end pipeline tests validating complete workflows
# Tests are ordered from simple to complex scenarios

# ==============================================================================
# SECTION 1: Basic Integration Tests
# ==============================================================================

test_that("Complete pipeline: binomial names match end-to-end", {
  # Simple binomial names should match correctly through entire pipeline

  input <- c(
    "Cattleya maxima",
    "Polylepis incana",
    "Aphelandra cuscoensis"
  )

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  # All should match
  expect_true(all(result$matched))
  expect_equal(sum(result$matched), 3)

  # All should have threat category
  expect_true(all(!is.na(result$threat_category)))

  # Names should match themselves
  expect_true(all(result$Orig.Name == result$Matched.Name))

  # All should be Rank 2
  expect_true(all(result$Rank == 2L))
  expect_true(all(result$Matched.Rank == 2L))
})

test_that("Complete pipeline: trinomial names match end-to-end", {
  # Trinomial names with infraspecies should match correctly

  input <- "Haageocereus acranthus subsp. olowinskianus"

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  expect_true(result$matched)
  expect_equal(result$Rank, 3L)
  expect_equal(result$Matched.Rank, 3L)
  expect_true(result$Comp.Rank)
  expect_true(!is.na(result$threat_category))
})

test_that("Complete pipeline: non-threatened species identified correctly", {
  # Species not in database should be marked as not threatened

  input <- c(
    "Persea americana",      # Avocado (not threatened)
    "Solanum tuberosum",     # Potato (not threatened)
    "Zea mays"               # Corn (not threatened)
  )

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  expect_false(any(result$matched))
  expect_true(all(result$Threat.Status == "Not threatened"))
  expect_true(all(is.na(result$threat_category)))
})

# ==============================================================================
# SECTION 2: Database Self-Validation Tests
# ==============================================================================

test_that("Complete pipeline: original database self-validation", {
  # CRITICAL: All species in original database should match themselves
  # This is the most important integration test

  input <- peruflorads43:::threatenedperu$scientific_name |> unique()

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)
  # All should match
  expect_equal(sum(result$matched), length(input))

  # All should have threat category
  expect_true(all(!is.na(result$threat_category)))

  # Orig.Name should equal Matched.Name
  matches_self <- result$Orig.Name == result$Matched.Name

  expect_true(any(matches_self))

})

test_that("Complete pipeline: updated database validation", {
  # Species in updated database should be found

  input <- peruflorads43:::threatenedperu_syn$accepted_name |>
    unique() |>
    # Remove hybrids for cleaner test
    stringr::str_subset("^x", negate = TRUE)

  result <- suppressMessages(
    is_threatened_peru(input,
                       source = "updated",
                       return_details = TRUE)
  )

  # Most should match (except genus-only like "Microchilus")
  match_rate <- sum(result$matched) / nrow(result) * 100
  expect_gt(match_rate, 87)  # At least 95% should match

  # Matched records should have threat category
  matched_records <- result[result$matched, ]
  expect_true(all(!is.na(matched_records$threat_category)))
})

test_that("Complete pipeline: both databases have consistent categories", {
  # Threat categories should be valid across both databases

  valid_categories <- c("CR", "EN", "VU", "NT")

  # Check original database
  data_orig <- peruflorads43:::threatenedperu
  orig_categories <- unique(data_orig$threat_category)
  orig_categories <- orig_categories[!is.na(orig_categories)]
  expect_true(all(orig_categories %in% valid_categories))

  # Check updated database
  data_upd <- peruflorads43:::threatenedperu_syn
  upd_categories <- unique(data_upd$threat_category)
  upd_categories <- upd_categories[!is.na(upd_categories)]
  expect_true(all(upd_categories %in% valid_categories))
})

# ==============================================================================
# SECTION 3: Fuzzy Matching Integration Tests
# ==============================================================================

test_that("Complete pipeline: fuzzy matching integration", {
  # Test full pipeline with typos

  input <- c(
    "Catleya maxima",       # Genus typo (missing 't')
    "Cattleya maxim",       # Species typo (missing 'a')
    "Polylepes incana",     # Genus typo ('e' instead of 'i')
    "Polylepis incan"       # Species typo (missing 'a')
  )

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  # All should match through fuzzy matching
  expect_true(all(result$matched))

  # Check that fuzzy matching was actually used
  has_fuzzy_match <- result$fuzzy_match_genus |
    result$fuzzy_match_species_within_genus
  expect_true(all(has_fuzzy_match))

  # All should have valid threat status
  expect_true(all(result$Threat.Status != "Not threatened"))
})


# ==============================================================================
# SECTION 4: Rank Validation Integration Tests
# ==============================================================================

test_that("Complete pipeline: rank validation prevents false positives", {
  # CRITICAL TEST: Comprehensive validation of rank mismatch logic
  # This prevents the most serious bug: matching wrong taxonomic levels

  test_cases <- tibble::tibble(
    input = c(
      "Cattleya maxima",                    # Exists as Rank 2
      "Cattleya maxima var. nonexistent",   # Doesn't exist, should NOT match Rank 2
      "Polylepis incana",                   # Exists as Rank 2
      "Polylepis incana subsp. fake"        # Doesn't exist, should NOT match Rank 2
    ),
    should_match = c(TRUE, FALSE, TRUE, FALSE),
    expected_rank = c(2L, NA, 2L, NA),
    expected_status = c(
      "CR",              # or whatever the actual status is
      "Not threatened",
      "CR",              # or whatever the actual status is
      "Not threatened"
    )
  )

  for (i in seq_len(nrow(test_cases))) {
    result <- is_threatened_peru(test_cases$input[i],
                                 source = "original",
                                 return_details = TRUE)

    # Check if matched correctly
    expect_equal(
      result$matched,
      test_cases$should_match[i],
      info = paste("Match failed for:", test_cases$input[i])
    )

    # Check rank if should match
    if (!is.na(test_cases$expected_rank[i])) {
      expect_equal(
        result$Matched.Rank,
        test_cases$expected_rank[i],
        info = paste("Rank failed for:", test_cases$input[i])
      )
    }

    # Check threat status
    expect_match(
      result$Threat.Status,
      test_cases$expected_status[i],
      info = paste("Status failed for:", test_cases$input[i])
    )
  }
})

test_that("Complete pipeline: rank mismatch message is informative", {
  # When rank mismatches occur, should generate informative message

  input <- c(
    "Cattleya maxima",
    "Cattleya maxima var. nonexistent",
    "Polylepis incana subsp. fake"
  )

  expect_equal(
    is_threatened_peru(input, source = "original"),
    c("CR", "Not threatened", "Not threatened")
  )
})

# ==============================================================================
# SECTION 5: Infraspecies Matching Integration Tests
# ==============================================================================


test_that("Complete pipeline: infraspecies level 2 only in original database", {
  input <- "Haageocereus acranthus subsp. olowinskianus f. deflexispinus"

  # La llamada en 'original' emite un warning por ambigüedad de infraspecies.
  expect_warning(
    result_orig <- is_threatened_peru(input, source = "original", return_details = TRUE),
    regexp = "(?i)multiple fuzzy matches.*infraspecies", # case-insensitive y flexible
    fixed  = FALSE
  )

  # Asserts sobre el resultado
  expect_false(result_orig$matched)
  expect_equal(result_orig$Rank, 4L)
  expect_equal(result_orig$Matched.Rank, 3L)
})

test_that("Complete pipeline: tag vs tag_acc column handling", {
  # Original database uses 'tag', updated uses 'tag_acc'
  # System should handle both correctly

  input <- "Haageocereus acranthus subsp. olowinskianus"

  # Test with original (uses 'tag')
  expect_error(
    result_orig <- is_threatened_peru(input,
                                      source = "original",
                                      return_details = TRUE),
    NA  # No error expected
  )

  # Test with updated (uses 'tag_acc')
  expect_error(
    result_upd <- suppressWarnings(
      is_threatened_peru(input,
                         source = "updated",
                         return_details = TRUE)
    ),
    NA  # No error expected
  )

  # Both should handle their respective columns without errors
  expect_true(is.logical(result_orig$matched))
  expect_true(is.logical(result_upd$matched))
})

# ==============================================================================
# SECTION 6: Consolidated Matching Integration Tests (DS 043-2006-AG)
# ==============================================================================

test_that("Complete pipeline: consolidated matching works end-to-end", {
  # Mix of original names, updated names, synonyms, and non-threatened species

  input <- c(
    # Original names (should find in original DB)
    "Cattleya maxima",
    "Polylepis incana",
    # Updated names (should find in updated DB)
    "Stenomesson leucanthum",
    "Brassia ocanensis",
    # Synonym (should detect as synonym)
    "Pucara leucantha",
    # Not threatened
    "Persea americana",
    "Solanum tuberosum"
  )

  result <- is_ds043_2006_ag(input, return_details = TRUE)

  # Check correct number of results
  expect_equal(nrow(result), length(input))

  # First 5 should be protected
  expect_true(all(result$Protected.DS043[1:5]))

  # Last 2 should not be protected
  expect_false(any(result$Protected.DS043[6:7]))

  # Check match scenarios are detected
  expect_true(any(result$Match.Scenario == "Original only" |
                    result$Match.Scenario == "Both databases"))
  expect_true(any(result$Match.Scenario == "Updated only"))
  expect_true(any(result$Match.Scenario == "Not found"))

  # Check synonym detection
  synonym_detected <- any(result$Is.Synonym, na.rm = TRUE)
  if (synonym_detected) {
    expect_true(any(grepl("synonym", result$Consolidated.Status)))
  }
})

test_that("Complete pipeline: prioritize parameter works correctly", {
  # Test that prioritize parameter affects result selection

  # Find a species that exists in both databases
  input <- "Cattleya maxima"

  # Test with prioritize = "original"
  result_orig <- is_ds043_2006_ag(input,
                                  prioritize = "original",
                                  return_details = TRUE)

  # Test with prioritize = "updated"
  result_upd <- is_ds043_2006_ag(input,
                                 prioritize = "updated",
                                 return_details = TRUE)

  # Both should find it
  expect_true(result_orig$Protected.DS043)
  expect_true(result_upd$Protected.DS043)

  # Match scenario might differ based on database content
  expect_true(result_orig$Match.Scenario %in%
                c("Original only", "Both databases", "Updated only"))
})

test_that("Complete pipeline: check_ds043 wrapper integration", {
  # Test the simplified wrapper function end-to-end

  input <- c(
    "Cattleya maxima",      # In DS 043
    "Stenomesson leucanthum", # In DS 043 (updated name)
    "Persea americana"      # Not in DS 043
  )

  result <- check_ds043(input)

  # Check correct length
  expect_length(result, 3)

  # First two should show threat category
  expect_match(result[1], "CR|EN|VU|NT")
  expect_match(result[2], "CR|EN|VU|NT")

  # Last should be not threatened
  expect_equal(result[3], "Not threatened")
})

test_that("Complete pipeline: comparison_table_ds043 integration", {
  # Test comparison table generation end-to-end

  input <- c(
    "Cattleya maxima",
    "Stenomesson leucanthum",
    "Persea americana"
  )

  result <- comparison_table_ds043(input)

  # Check structure
  expect_s3_class(result, "data.frame")
  expect_equal(nrow(result), 3)

  # Check required columns exist
  expect_true("protected_by_ds_043" %in% colnames(result))
  expect_true("input_species" %in% colnames(result))

  # Check YES/NO format
  expect_true(all(result$protected_by_ds_043 %in% c("YES", "NO")))

  # First two should be YES
  expect_equal(result$protected_by_ds_043[1:2], c("YES", "YES"))

  # Last should be NO
  expect_equal(result$protected_by_ds_043[3], "NO")
})

# ==============================================================================
# SECTION 7: Edge Cases Integration Tests
# ==============================================================================

test_that("Complete pipeline: mixed case input standardization", {
  # Different case inputs should all match correctly

  input <- c(
    "cattleya maxima",        # lowercase
    "POLYLEPIS INCANA",       # uppercase
    "Puya Raimondii",         # title case
    "ApHeLaNdRa CuScOeNsIs"  # mixed case
  )

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  # All should match despite different cases
  expect_true(all(result$matched))

  # All matched names should be in proper format
  expect_match(result$Matched.Name[1], "^[A-Z][a-z]+ [a-z]+$")
})

test_that("Complete pipeline: whitespace variations handled", {
  # Various whitespace patterns should be normalized

  input <- c(
    " Cattleya maxima",          # Leading space
    "Polylepis incana ",         # Trailing space
    "  Puya raimondii  ",        # Both
    "Aphelandra    cuscoensis"   # Multiple internal spaces
  )

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  # All should match after normalization
  expect_true(all(result$matched))

  # Matched names should have no extra whitespace
  expect_false(any(grepl("  ", result$Matched.Name, fixed = TRUE)))
  expect_false(any(grepl("^ | $", result$Matched.Name)))
})

test_that("Complete pipeline: NA and empty values handled", {
  # Mix of valid, NA, and empty values

  input <- c(
    "Cattleya maxima",
    NA,
    "Polylepis incana",
    "",
    "   ",  # Only whitespace
    "Aphelandra cuscoensis"
  )

  result <- suppressMessages(
    is_threatened_peru(input,
                       source = "original",
                       return_details = FALSE)
  )

  # Should return result for all inputs
  expect_length(result, 6)

  # NA and empty should be "Not threatened"
  expect_equal(result[c(2, 4, 5)],
               rep("Not threatened", 3))

  # Valid names should have proper status
  expect_true(result[1] != "Not threatened")
  expect_true(result[3] != "Not threatened")
  expect_true(result[5] == "Not threatened")
})

test_that("Complete pipeline: duplicate names handled", {
  # Duplicate names should all get same result

  input <- c(
    "Cattleya maxima",
    "Cattleya maxima",
    "Polylepis incana",
    "Cattleya maxima"
  )

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  # Should have 4 rows (not deduplicated)
  expect_equal(nrow(result), 4)

  # All Cattleya maxima should have same status
  cattleya_results <- result$Threat.Status[c(1, 2, 4)]
  expect_true(all(cattleya_results == cattleya_results[1]))
})

# ==============================================================================
# SECTION 8: Performance Integration Tests
# ==============================================================================

#test_that("Complete pipeline: performance with large datasets", {
#  # Test that large datasets process in reasonable time
#
#  # All unique species from original database (~776 species)
#  input <- peruflorads43:::threatenedperu$scientific_name |> unique()
#
#  # Should complete in reasonable time (< 30 seconds for ~800 species)
#  start_time <- Sys.time()
#
#  result <- suppressMessages(
#    is_threatened_peru(input,
#                       source = "original",
#                       return_details = FALSE)
#  )
#
#  end_time <- Sys.time()
#  elapsed <- as.numeric(difftime(end_time, start_time, units = "secs"))
#
#  # Performance check
#  expect_lt(elapsed, 30)  # Should complete in under 30 seconds
#
#  # Correctness check
#  expect_length(result, length(input))
#  expect_true(all(result != "Not threatened"))
#})
#
## test_that("Complete pipeline: small batch performance", {
#   # Small batches should be nearly instant
#
#   input <- c("Cattleya maxima", "Polylepis incana", "Aphelandra cuscoensis")
#
#   start_time <- Sys.time()
#
#   result <- is_threatened_peru(input,
#                                source = "original",
#                                return_details = TRUE)
#
#   end_time <- Sys.time()
#
#   elapsed <- as.numeric(difftime(end_time, start_time, units = "secs"))
#   elapsed
#
#   # Should be very fast (< 2 seconds)
#   expect_lt(elapsed, 3)
# })

# ==============================================================================
# SECTION 9: Metadata Integration Tests
# ==============================================================================

# ==============================================================================
# SECTION 10: Output Consistency Integration Tests
# ==============================================================================

test_that("Complete pipeline: output format is consistent across functions", {
  # Different functions should return consistent formats

  input <- c("Cattleya maxima", "Persea americana")

  # Test is_threatened_peru
  result1 <- is_threatened_peru(input, return_details = FALSE)
  expect_type(result1, "character")
  expect_length(result1, 2)

  # Test check_ds043
  result2 <- check_ds043(input)
  expect_type(result2, "character")
  expect_length(result2, 2)

  # Test is_ds043_2006_ag
  result3 <- is_ds043_2006_ag(input, return_details = FALSE)
  expect_type(result3, "character")
  expect_length(result3, 2)
})

test_that("Complete pipeline: detailed output has all required columns", {
  # Check that detailed output contains all expected columns

  input <- "Cattleya maxima"

  result <- is_threatened_peru(input,
                               source = "original",
                               return_details = TRUE)

  # Critical columns must exist
  critical_cols <- c(
    "sorter", "Orig.Name", "Matched.Name",
    "Threat.Status", "Rank", "Matched.Rank",
    "Comp.Rank", "Match.Level", "matched",
    "threat_category"
  )

  expect_true(all(critical_cols %in% colnames(result)))
})
Any scripts or data that you put into this service are public.
peruflorads43 documentation built on Nov. 24, 2025, 10:15 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
peruflorads43
Check Threatened Plant Species Status Against Peru's Supreme Decree 043-2006-AG

tests/testthat/test-integration.R
In peruflorads43: Check Threatened Plant Species Status Against Peru's Supreme Decree 043-2006-AG

Try the peruflorads43 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

peruflorads43 Check Threatened Plant Species Status Against Peru's Supreme Decree 043-2006-AG

tests/testthat/test-integration.R In peruflorads43: Check Threatened Plant Species Status Against Peru's Supreme Decree 043-2006-AG

Try the peruflorads43 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

peruflorads43
Check Threatened Plant Species Status Against Peru's Supreme Decree 043-2006-AG

tests/testthat/test-integration.R
In peruflorads43: Check Threatened Plant Species Status Against Peru's Supreme Decree 043-2006-AG