Nothing
# Snapshot tests for data integrity
test_that("data integrity - snapshots", {
old <- options(tibble.print_min = 20)
on.exit(options(old))
expect_snapshot(babynamesIL[c(1:20, (nrow(babynamesIL) - 19):nrow(babynamesIL)), ])
expect_snapshot(babynamesIL_totals[c(1:20, (nrow(babynamesIL_totals) - 19):nrow(babynamesIL_totals)), ])
})
# Structural tests for main dataset
test_that("babynamesIL has correct structure", {
expect_equal(ncol(babynamesIL), 6)
expect_true(all(c("sector", "year", "sex", "name", "n", "prop") %in% names(babynamesIL)))
# Check types
expect_type(babynamesIL$sector, "character")
expect_type(babynamesIL$year, "double")
expect_type(babynamesIL$sex, "character")
expect_type(babynamesIL$name, "character")
expect_type(babynamesIL$n, "integer")
expect_type(babynamesIL$prop, "double")
})
test_that("babynamesIL has correct sectors and sex values", {
expect_setequal(unique(babynamesIL$sector), c("Jewish", "Muslim", "Christian-Arab", "Druze"))
expect_setequal(unique(babynamesIL$sex), c("F", "M"))
})
test_that("babynamesIL has correct year range", {
expect_equal(min(babynamesIL$year), 1949)
expect_equal(max(babynamesIL$year), 2024)
})
test_that("babynamesIL proportions are valid", {
expect_true(all(babynamesIL$prop >= 0 & babynamesIL$prop <= 1))
# Check proportions sum to ~1 for each year/sector/sex group
prop_sums <- babynamesIL %>%
dplyr::group_by(sector, year, sex) %>%
dplyr::summarise(prop_sum = sum(prop), .groups = "drop")
expect_true(all(abs(prop_sums$prop_sum - 1) < 0.01))
})
test_that("high-volume names do not have long recent gaps", {
gap_check <- babynamesIL %>%
dplyr::group_by(sector, sex, name) %>%
dplyr::summarise(
years = list(sort(unique(year))),
max_n = max(n),
.groups = "drop"
) %>%
dplyr::mutate(
gap_info = purrr::map(
years,
~ {
yrs <- .x
if (length(yrs) < 2) {
return(tibble::tibble(prev_year = numeric(), next_year = numeric(), gap = numeric()))
}
tibble::tibble(
prev_year = yrs[-length(yrs)],
next_year = yrs[-1],
gap = diff(yrs) - 1
)
}
)
) %>%
dplyr::select(-years) %>%
tidyr::unnest(gap_info) %>%
dplyr::filter(max_n >= 1000, prev_year >= 2005, gap >= 3)
offenders <- unique(gap_check$name)
expect_equal(
nrow(gap_check),
0,
info = if (length(offenders) == 0) "" else paste("Names with gaps:", paste(offenders, collapse = ", "))
)
})
# Structural tests for totals dataset
test_that("babynamesIL_totals has correct structure", {
expect_equal(ncol(babynamesIL_totals), 4)
expect_true(all(c("sector", "sex", "name", "total") %in% names(babynamesIL_totals)))
expect_setequal(unique(babynamesIL_totals$sector), c("Jewish", "Muslim", "Christian-Arab", "Druze"))
expect_setequal(unique(babynamesIL_totals$sex), c("F", "M"))
})
test_that("totals and yearly data are consistent", {
# Every name in yearly data must have a total
yearly_names <- babynamesIL %>%
dplyr::distinct(sector, sex, name)
matched <- dplyr::inner_join(yearly_names, babynamesIL_totals, by = c("sector", "sex", "name"))
expect_equal(nrow(matched), nrow(yearly_names))
# CBS totals must be >= sum of filtered yearly data
yearly_sums <- babynamesIL %>%
dplyr::group_by(sector, sex, name) %>%
dplyr::summarise(yearly_sum = sum(n), .groups = "drop") %>%
dplyr::inner_join(babynamesIL_totals, by = c("sector", "sex", "name"))
expect_true(all(yearly_sums$total >= yearly_sums$yearly_sum))
# Totals may include names not in yearly data (sub-threshold names)
expect_gte(nrow(babynamesIL_totals), nrow(yearly_names))
})
# Tests for archive datasets
test_that("babynamesIL_1948 exists and has correct structure", {
expect_true(exists("babynamesIL_1948"))
expect_equal(ncol(babynamesIL_1948), 6)
expect_true(all(c("sector", "year", "sex", "name", "n", "prop") %in% names(babynamesIL_1948)))
# All rows should be from 1948
expect_equal(unique(babynamesIL_1948$year), 1948)
# 1948 data uses old "Christian" naming
expect_true("Christian" %in% unique(babynamesIL_1948$sector))
})
test_that("babynamesIL_other exists and has correct structure", {
expect_true(exists("babynamesIL_other"))
expect_equal(ncol(babynamesIL_other), 6)
expect_true(all(c("sector", "year", "sex", "name", "n", "prop") %in% names(babynamesIL_other)))
# All rows should be from "Other" sector
expect_equal(unique(babynamesIL_other$sector), "Other")
# Year range should be 1985-2021
expect_equal(min(babynamesIL_other$year), 1985)
expect_equal(max(babynamesIL_other$year), 2021)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.