tests/testthat/test-pm_integration.R

# =============================================================================
# TEST SUITE 1: Tests de Integración Completa
# =============================================================================

test_that("Flujo completo de matching funciona end-to-end", {
  data("peru_mammals", package = "perumammals")

  # Test 1.1: Tomar muestra de especies del dataset y hacer matching
  sample_species <- c(
    "Platyrrhinus brachycephalus",
    "Monodelphis ronaldi",
    "Eptesicus furinalis",
    "Nephelomys levipes",
    "Leopardus jacobita",
    "Nectomys rattus",
    "Trinycteris nicefori",
    "Puma yagouaroundi",
    "Saimiri sciureus",
    "Akodon boliviensis",
    "Lycalopex sechurae",
    "Anoura aequatoris",
    "Neusticomys peruviensis",
    "Hylaeamys yunganus",
    "Chrotopterus auritus",
    "Coendou prehensilis",
    "Gardnerycteris koepckeae",
    "Cyclopes ida",
    "Thomasomys pyrrhonotus",
    "Monodelphis gardeni",
    "Myotis albescens",
    "Hadrosciurus sp. 3",
    "Myoprocta pratti",
    "Chibchanomys orcesi",
    "Oxymycterus inca",
    "Cabassous unicinctus",
    "Monodelphis handleyi",
    "Lagidium viscacia",
    "Megaptera novaeangliae",
    "Dasyprocta variegata",
    "Myotis riparius",
    "Myotis oxyotus",
    "Amphinectomys savamis",
    "Akodon orophilus",
    "Platyrrhinus ismaeli",
    "Caenolestes caniventer",
    "Odocoileus peruvianus",
    "Proechimys simonsi",
    "Cynomops abrasus",
    "Potos flavus",
    "Cynomops kuizha",
    "Nyctinomops laticaudatus",
    "Arctocephalus australis",
    "Mesomys leniceps",
    "Thomasomys macrotis",
    "Monodelphis glirina",
    "Centronycteris maximiliani",
    "Isothrix barbarabrownae",
    "Neotomys ebriosus",
    "Marmosa simonsi",
    "Oligoryzomys sp. C"
  )

  # Verificar
  length(sample_species)  # Debe ser 50
  result <- validate_peru_mammals(sample_species, quiet = TRUE)
  sample_size <- nrow(result)
  expect_equal(nrow(result), sample_size)
  expect_true(all(result$matched))
  expect_true(all(result$Matched.Rank == 2L))

  # Test 1.2: Verificar que los matches son correctos
  for (i in 1:min(10, sample_size)) {
    original <- sample_species[i]
    matched <- result$Matched.Name[i]
    expect_equal(original, matched)
  }
})

test_that("Matching preserva orden de input", {
  species_ordered <- c(
    "Mus musculus",
    "Akodon torques",
    "Rattus rattus",
    "Thomasomys kalinowskii"
  )

  result <- validate_peru_mammals(species_ordered, quiet = TRUE)

  # Test 1.3: Verificar que el orden se mantiene
  expect_equal(result$Orig.Name, species_ordered)
  expect_equal(nrow(result), length(species_ordered))
})


# =============================================================================
# TEST SUITE 2: Edge Cases - Nombres Problemáticos
# =============================================================================

test_that("Manejo de caracteres especiales", {
  # Test 2.1: Nombres con números
  result_num <- validate_peru_mammals("Species123", quiet = TRUE)
  expect_false(result_num$matched)

  # Test 2.2: Nombres con guiones
  result_hyphen <- validate_peru_mammals("Genus-species", quiet = TRUE)
  expect_s3_class(result_hyphen, "data.frame")

  # Test 2.3: Nombres con puntos
  result_dot <- validate_peru_mammals("Genus sp.", quiet = TRUE)
  expect_s3_class(result_dot, "data.frame")

  # Test 2.4: Nombres con paréntesis
  result_paren <- validate_peru_mammals("Genus (species)", quiet = TRUE)
  expect_s3_class(result_paren, "data.frame")
})


test_that("Manejo de caracteres Unicode y acentos", {
  # Test 2.9: Nombres con tildes
  result_accent <- validate_peru_mammals("Génus spécies", quiet = TRUE)
  expect_s3_class(result_accent, "data.frame")

  # Test 2.10: Caracteres no ASCII
  result_unicode <- validate_peru_mammals("Genüs spëcies", quiet = TRUE)
  expect_s3_class(result_unicode, "data.frame")
})


# =============================================================================
# TEST SUITE 3: Edge Cases - Vectores Especiales
# =============================================================================

test_that("Manejo de vectores edge case", {
  # Test 3.1: Vector de un solo elemento
  result_single <- validate_peru_mammals("Akodon torques", quiet = TRUE)
  result_single
  expect_equal(nrow(result_single), 1)

  # Test 3.2: Vector con muchos NAs
  many_nas <- c(NA, NA, "Akodon torques", NA, "Mus musculus", NA)
  result_nas <- validate_peru_mammals(many_nas, quiet = TRUE)
  expect_equal(nrow(result_nas), length(many_nas))

  # Test 3.4: Vector con strings vacíos
  empties <- c("", "", "Akodon torques", "")
  result_empties <- validate_peru_mammals(empties, quiet = TRUE)
  expect_equal(nrow(result_empties), 4)

  # Test 3.5: Vector con todo el mismo valor repetido
  repeated <- rep("Akodon torques", 20)
  result_repeated <- validate_peru_mammals(repeated, quiet = TRUE)
  expect_equal(nrow(result_repeated), 20)
  expect_true(all(result_repeated$matched))
})



# =============================================================================
# TEST SUITE 4: Edge Cases - Fuzzy Matching Límites
# =============================================================================

test_that("Fuzzy matching con distancias extremas", {
  # Test 4.1: Solo un caracter diferente
  result_1char <- validate_peru_mammals("Akodn torques", quiet = TRUE)  # Falta una 'o'
  expect_s3_class(result_1char, "data.frame")

  # Test 4.2: Dos caracteres diferentes
  result_2char <- validate_peru_mammals("Akdn torques", quiet = TRUE)  # Faltan 'o' y 'o'
  expect_s3_class(result_2char, "data.frame")

  # Test 4.3: Nombre completamente diferente (no debe hacer match)
  result_different <- validate_peru_mammals("Xxxxx yyyyy", quiet = TRUE)
  expect_false(result_different$matched)

  # Test 4.4: Transposición de letras
  result_transpose <- validate_peru_mammals("Akdoon torques", quiet = TRUE)
  expect_s3_class(result_transpose, "data.frame")
})

test_that("Fuzzy matching con nombres similares", {
  data("peru_mammals", package = "perumammals")

  # Test 4.5: Si hay géneros similares, fuzzy debería elegir el más cercano
  # Esto dependerá de qué géneros realmente existan en peru_mammals

  # Test 4.6: Especies del mismo género con nombres similares
  akodon_species <- subset(peru_mammals, genus == "Akodon")

  if (nrow(akodon_species) >= 2) {
    # Tomar dos especies y crear typos
    sp1 <- akodon_species$species[1]
    sp1_typo <- paste0(substr(sp1, 1, nchar(sp1)-1), "x")

    result_sp_typo <- validate_peru_mammals(paste("Akodon", sp1_typo), quiet = TRUE)
    expect_s3_class(result_sp_typo, "data.frame")
  }
})

test_that("Performance con muchos fuzzy matches", {
  # Test 5.2: Lista con muchos typos (fuzzy matching intensivo)
  # Usamos solo typos que sabemos que existen en la DB
  typos <- c(
    "Akdon torques",      # typo en genus (Akodon existe)
    "Akodon torqes",      # typo en species (torques existe)
    "Akdon torqes",       # typo en ambos
    "Panthera onca",      # correcto para control
    "Pantera onca"        # typo en genus (Panthera existe)
  )

  start_time <- Sys.time()

  # Suprimir warnings de ambiguous matches (son informativos, no errores)
  suppressWarnings({
    result_typos <- validate_peru_mammals(rep(typos, 10), quiet = TRUE)
  })

  end_time <- Sys.time()

  # Performance: menos de 20 segundos
  execution_time <- as.numeric(difftime(end_time, start_time, units = "secs"))
  expect_true(execution_time < 20)

  # Verificaciones de resultado
  expect_equal(nrow(result_typos), 50)

  # Todos deben matchear porque todos los nombres base existen en Peru
  expect_true(all(result_typos$matched))

  # Debe haber fuzzy matches en genus
  expect_true(any(result_typos$genus_dist > 0, na.rm = TRUE))

  # Debe haber fuzzy matches en species
  expect_true(any(result_typos$species_dist > 0, na.rm = TRUE))

  # Verificar metadata de ambiguous matches
  expect_true(!is.null(attr(result_typos, "ambiguous_genera")) ||
                !is.null(attr(result_typos, "ambiguous_species")))
})


# =============================================================================
# TEST SUITE 5: Tests de Performance
# =============================================================================

test_that("Performance con datasets grandes (solo exact matches)", {
  # Solo nombres correctos para medir performance pura
  large_valid <- rep(c("Akodon torques",
                       "Panthera onca",      # Sin typo
                       "Thomasomys kalinowskii",
                       "Puma concolor"),
                     length.out = 200)

  start_time <- Sys.time()
  result_large <- validate_peru_mammals(large_valid, quiet = TRUE)
  end_time <- Sys.time()

  # Verificaciones
  expect_equal(nrow(result_large), 200)
  expect_equal(sum(result_large$matched), 200)  # Todos deben matchear

  # TODOS deben ser exact matches
  all_exact <- all(result_large$genus_dist == 0 &
                     result_large$species_dist == 0)
  expect_true(all_exact)

  # Performance debe ser razonable
  execution_time <- as.numeric(difftime(end_time, start_time, units = "secs"))
  expect_true(execution_time < 30)
})


test_that("Fuzzy matching con ambiguous matches", {
  # Específicamente probar casos ambiguos
  ambiguous_names <- c(
    "Pantera onca",     # Typo → Panthera
    "Akdon torques"     # Typo → Akodon
  )

  # Capturar TODOS los warnings
  warnings_caught <- character()

  result <- withCallingHandlers(
    validate_peru_mammals(rep(ambiguous_names, 25), quiet = TRUE),
    warning = function(w) {
      warnings_caught <<- c(warnings_caught, conditionMessage(w))
      invokeRestart("muffleWarning")
    }
  )

  # Verificar que se generaron warnings de ambiguous matches
  expect_true(any(grepl("multiple fuzzy matches", warnings_caught)))
  expect_true(length(warnings_caught) >= 2)  # Debería haber al menos 2

  # Todos deben matchear a pesar de ser fuzzy
  expect_equal(sum(result$matched), 50)

  # Verificar que hay ambiguous matches guardados
  ambig <- get_ambiguous_matches(result, type = "all")
  expect_true(!is.null(ambig))
  expect_true(nrow(ambig) > 0)

  # Verificar que hay al menos 4 casos ambiguos
  expect_true(nrow(ambig) >= 4)
})


# =============================================================================
# TEST SUITE 6: Integración con Ecorregiones
# =============================================================================

test_that("Integración entre especies y ecorregiones", {
  data("peru_mammals", package = "perumammals")
  data("peru_mammals_ecoregions", package = "perumammals")

  # Test 6.1: Matching de especies y lookup de ecorregiones
  test_species <- head(peru_mammals$scientific_name, 5)
  match_result <- validate_peru_mammals(test_species, quiet = TRUE)

  # Para cada especie matched, debe haber info de ecorregiones disponible
  for (i in 1:nrow(match_result)) {
    if (match_result$matched[i]) {
      species_name <- match_result$Matched.Name[i]
      eco_info <- subset(peru_mammals_ecoregions,
                         scientific_name == species_name)

      # Puede o no tener ecorregiones, pero la búsqueda debe funcionar
      expect_s3_class(eco_info, "data.frame")
    }
  }
})

test_that("Workflow completo: match + lookup de metadatos", {
  data("peru_mammals", package = "perumammals")

  # Test 6.2: Flujo completo de usuario típico
  user_input <- c("Akodon torques", "Invalid name", "Mus musculus")

  # Paso 1: Matching
  match_result <- validate_peru_mammals(user_input, quiet = TRUE)
  expect_equal(nrow(match_result), 3)

  # Paso 2: Filtrar solo los matched
  valid_matches <- subset(match_result, matched == TRUE)
  #valid_matches
  expect_true(nrow(valid_matches) == 1)

  # Paso 3: Lookup de información adicional
  for (i in 1:nrow(valid_matches)) {
    species_name <- valid_matches$Matched.Name[i]
    species_info <- subset(peru_mammals, scientific_name == species_name)

    expect_true(nrow(species_info) == 1)
    expect_true("family" %in% names(species_info))
    expect_true("endemic" %in% names(species_info))
  }
})


# =============================================================================
# TEST SUITE 7: Consistencia de Resultados
# =============================================================================

test_that("Resultados son determinísticos", {
  # Test 7.1: Múltiples ejecuciones dan mismo resultado
  species_list <- c("Akodon torques", "Mus musculus", "Genus unknown")

  results <- list()
  for (i in 1:5) {
    results[[i]] <- validate_peru_mammals(species_list, quiet = TRUE)
  }

  # Todos los resultados deben ser idénticos
  for (i in 2:5) {
    expect_equal(results[[1]]$Matched.Name, results[[i]]$Matched.Name)
    expect_equal(results[[1]]$matched, results[[i]]$matched)
    expect_equal(results[[1]]$Matched.Rank, results[[i]]$Matched.Rank)
  }
})

test_that("Independencia entre llamadas", {
  # Test 7.2: Una llamada no afecta a la siguiente
  result1 <- validate_peru_mammals("Akodon torques", quiet = TRUE)
  result2 <- validate_peru_mammals("Mus musculus", quiet = TRUE)
  result3 <- validate_peru_mammals("Akodon torques", quiet = TRUE)

  # Primera y tercera llamada deben dar exactamente el mismo resultado
  expect_equal(result1$Matched.Name, result3$Matched.Name)
  expect_equal(result1$matched, result3$matched)
})


# =============================================================================
# TEST SUITE 8: Compatibilidad con dplyr/tidyverse
# =============================================================================

test_that("Resultados son compatibles con tidyverse", {
#  skip_if_not_installed("dplyr")

  library(dplyr)

  # Test 8.1: Resultado se puede usar con %>%
  result <- validate_peru_mammals(c("Akodon torques", "Mus musculus"), quiet = TRUE) %>%
    filter(matched == TRUE) %>%
    select(Matched.Name, Matched.Genus, Matched.Species)
#result
  expect_s3_class(result, "data.frame")
  expect_true(nrow(result) == 1)

  # Test 8.2: Se puede hacer join con otros datasets
  data("peru_mammals", package = "perumammals")

  match_result <- validate_peru_mammals(c("Akodon torques", "Mus musculus"), quiet = TRUE)

  joined <- match_result %>%
    filter(matched == TRUE) %>%
    left_join(peru_mammals,
              by = c("Matched.Name" = "scientific_name"))

  expect_s3_class(joined, "data.frame")
#  joined
  expect_true("family.x" %in% names(joined))
})


# =============================================================================
# TEST SUITE 9: Validación de Mensajes de Error
# =============================================================================

test_that("Mensajes de error son informativos", {
  # Test 9.1: Error con input NULL
  expect_error(
    validate_peru_mammals(NULL),
    "character vector"
  )

  # Test 9.2: Error con input numérico
  expect_error(
    validate_peru_mammals(123),
    "character vector"
  )

  # Test 9.3: Error con quiet inválido
#  expect_error(
 #   validate_peru_mammals("Akodon torques", quiet = TRUE),
  #  "logical"
  #)
})


# =============================================================================
# TEST SUITE 10: Tests de Regresión
# =============================================================================

test_that("Casos conocidos mantienen comportamiento esperado", {
  # Test 10.1: Especies comunes que deben hacer match
  common_species <- c(
    "Mus musculus",
    "Rattus rattus"
  )

  result <- validate_peru_mammals(common_species, quiet = TRUE)
  #result
  expect_false(all(result$matched))

  # Test 10.2: Género que debe existir
  result_genus <- validate_peru_mammals("Akodon", quiet = TRUE)
 # result_genus
  expect_true(!result_genus$matched)
  expect_equal(result_genus$Matched.Rank, NA_real_)

  # Test 10.3: Nombre que no debe existir
  result_invalid <- validate_peru_mammals("Fakeus nonexistus", quiet = TRUE)
  expect_false(result_invalid$matched)
})



# =============================================================================
# TEST SUITE 12: Validación Cruzada de Datos
# =============================================================================

test_that("Datos son internamente consistentes", {
  data("peru_mammals", package = "perumammals")
  data("peru_mammals_ecoregions", package = "perumammals")

  # Test 12.1: Todas las especies en ecoregions están en el dataset principal
  eco_species <- unique(peru_mammals_ecoregions$scientific_name)
  main_species <- peru_mammals$scientific_name

  missing_species <- setdiff(eco_species, main_species)
  expect_equal(length(missing_species), 0,
               info = paste("Missing species:", paste(missing_species, collapse = ", ")))

  # Test 12.2: pm_ids son únicos y consistentes
  eco_ids <- unique(peru_mammals_ecoregions$pm_id)
  main_ids <- peru_mammals$pm_id

  missing_ids <- setdiff(eco_ids, main_ids)
  expect_equal(length(missing_ids), 0)
})

Try the perumammals package in your browser

Any scripts or data that you put into this service are public.

perumammals documentation built on Jan. 6, 2026, 5:06 p.m.