Nothing
# Tests for reptiledb_012025 dataset
library(testthat)
library(reptiledb.data)
test_that("reptiledb_012025 dataset exists and loads correctly", {
expect_true(exists("reptiledb_012025"))
expect_s3_class(reptiledb_012025, "data.frame")
expect_s3_class(reptiledb_012025, "tbl_df")
})
test_that("reptiledb_012025 has correct structure", {
# Test number of columns
expect_equal(ncol(reptiledb_012025), 13)
# Test number of rows (approximately)
expect_gt(nrow(reptiledb_012025), 14000)
expect_lt(nrow(reptiledb_012025), 20000) # reasonable upper bound
# Test expected column names
expected_cols <- c("order", "family", "genus", "epithet", "species",
"species_author", "species_name_year", "subspecies_name",
"subspecie_author_info", "subspecies_name_author",
"subspecies_year", "change", "rdb_sp_id")
expect_equal(colnames(reptiledb_012025), expected_cols)
})
test_that("reptiledb_012025 has correct column types", {
# Factor columns
expect_s3_class(reptiledb_012025$order, "factor")
expect_s3_class(reptiledb_012025$family, "factor")
expect_s3_class(reptiledb_012025$genus, "factor")
expect_s3_class(reptiledb_012025$epithet, "factor")
expect_s3_class(reptiledb_012025$species_author, "factor")
expect_s3_class(reptiledb_012025$change, "factor")
# Character columns
expect_type(reptiledb_012025$species, "character")
expect_type(reptiledb_012025$species_name_year, "character")
expect_type(reptiledb_012025$subspecies_name, "character")
expect_type(reptiledb_012025$subspecie_author_info, "character")
expect_type(reptiledb_012025$subspecies_name_author, "character")
expect_type(reptiledb_012025$subspecies_year, "character")
# Numeric columns
expect_type(reptiledb_012025$rdb_sp_id, "double")
})
test_that("reptiledb_012025 key columns have no missing values", {
# Essential taxonomic columns should not be completely empty
expect_true(all(!is.na(reptiledb_012025$order)))
expect_true(all(!is.na(reptiledb_012025$family)))
expect_true(all(!is.na(reptiledb_012025$genus)))
expect_true(all(!is.na(reptiledb_012025$epithet)))
expect_true(all(!is.na(reptiledb_012025$species)))
expect_true(all(!is.na(reptiledb_012025$rdb_sp_id)))
})
test_that("reptiledb_012025 species column format is correct", {
# Species should be in format "Genus epithet"
species_pattern <- "^[A-Z][a-z]+ [a-z]+$"
species_valid <- grepl(species_pattern, reptiledb_012025$species)
# Allow for some exceptions but expect most to follow pattern
valid_percentage <- sum(species_valid) / length(species_valid)
expect_gt(valid_percentage, 0.95) # At least 95% should be valid
})
test_that("reptiledb_012025 has expected reptile orders", {
# Check for major reptile orders
expected_orders <- c("Sauria", "Serpentes", "Testudines", "Crocodilia")
actual_orders <- levels(reptiledb_012025$order)
# At least some of the major orders should be present
major_orders_present <- sum(expected_orders %in% actual_orders)
expect_gt(major_orders_present, 2) # At least 3 major orders
})
test_that("reptiledb_012025 year columns have reasonable values", {
# Remove NA values for testing
species_years <- reptiledb_012025$species_name_year[!is.na(reptiledb_012025$species_name_year)]
subspecies_years <- reptiledb_012025$subspecies_year[!is.na(reptiledb_012025$subspecies_year)]
if(length(species_years) > 0) {
# Extract numeric years (assuming format like "1901", "1832", etc.)
species_year_nums <- as.numeric(species_years)
species_year_nums <- species_year_nums[!is.na(species_year_nums)]
if(length(species_year_nums) > 0) {
expect_true(all(species_year_nums >= 1750)) # Linnaean nomenclature era
expect_true(all(species_year_nums <= 2025)) # Current year
}
}
if(length(subspecies_years) > 0) {
subspecies_year_nums <- as.numeric(subspecies_years)
subspecies_year_nums <- subspecies_year_nums[!is.na(subspecies_year_nums)]
if(length(subspecies_year_nums) > 0) {
expect_true(all(subspecies_year_nums >= 1750))
expect_true(all(subspecies_year_nums <= 2025))
}
}
})
test_that("reptiledb_012025 subspecies data is consistent", {
# When subspecies_name is present, other subspecies fields should also be present
has_subspecies <- !is.na(reptiledb_012025$subspecies_name)
if(sum(has_subspecies) > 0) {
subspecies_data <- reptiledb_012025[has_subspecies, ]
# If subspecies name exists, at least author info should exist
expect_true(sum(!is.na(subspecies_data$subspecie_author_info)) > 0 |
sum(!is.na(subspecies_data$subspecies_name_author)) > 0)
}
})
test_that("reptiledb_012025 family names follow conventions", {
# Reptile family names typically end in -idae
family_names <- levels(reptiledb_012025$family)
idae_families <- sum(grepl("idae$", family_names))
# Most families should follow this convention
expect_gt(idae_families / length(family_names), 0.7) # At least 70%
})
test_that("reptiledb_012025 data completeness", {
# Calculate completeness for each column
completeness <- sapply(reptiledb_012025, function(x) sum(!is.na(x)) / length(x))
# Core taxonomic fields should be highly complete
expect_gt(completeness[["order"]], 0.99)
expect_gt(completeness[["family"]], 0.99)
expect_gt(completeness[["genus"]], 0.99)
expect_gt(completeness[["epithet"]], 0.99)
expect_gt(completeness[["species"]], 0.99)
# Subspecies fields are expected to have many NAs (not all species have subspecies)
expect_lt(completeness[["subspecies_name"]], 0.5) # Less than 50% complete is normal
})
test_that("reptiledb_012025 has reasonable data distribution", {
# Test that we have multiple families represented
expect_gt(length(levels(reptiledb_012025$family)), 50)
# Test that we have multiple genera
expect_gt(length(levels(reptiledb_012025$genus)), 1000)
# Test that Scincidae family is present (as shown in example)
expect_true("Scincidae" %in% levels(reptiledb_012025$family))
# Test that Sauria order is present (as shown in example)
expect_true("Sauria" %in% levels(reptiledb_012025$order))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.