Nothing
test_that("semantic_search generates correct n-grams for single word query", {
skip_if_not_installed("dplyr")
# Create mock census vector data
mock_vectors <- data.frame(
vector = c("v_TEST_1", "v_TEST_2", "v_TEST_3"),
details = c("Total population count", "Male population aged 15", "Female income median"),
label = c("Population", "Male", "Female"),
stringsAsFactors = FALSE
)
# Test with single word - should match "population"
result <- cancensus:::semantic_search("population", mock_vectors)
# Should find vectors with "population" in details
expect_true(!is.null(result))
expect_true(nrow(result) > 0)
expect_true(any(grepl("population", result$details, ignore.case = TRUE)))
})
test_that("semantic_search generates correct n-grams for multi-word query", {
skip_if_not_installed("dplyr")
mock_vectors <- data.frame(
vector = c("v_TEST_1", "v_TEST_2", "v_TEST_3", "v_TEST_4"),
details = c(
"Total population count by age",
"Male population aged 15 to 24",
"Female population aged 25 to 34",
"Income median for households"
),
label = c("Pop1", "Pop2", "Pop3", "Income"),
stringsAsFactors = FALSE
)
# Test with two-word query
result <- cancensus:::semantic_search("male population", mock_vectors)
# Should match vectors with "male" and "population"
expect_true(!is.null(result))
if (nrow(result) > 0) {
expect_true(any(grepl("male", result$details, ignore.case = TRUE)))
}
})
test_that("semantic_search handles empty vectors", {
skip_if_not_installed("dplyr")
mock_vectors <- data.frame(
vector = character(0),
details = character(0),
label = character(0),
stringsAsFactors = FALSE
)
# Should handle empty input - may error or warn depending on implementation
# This is an edge case that likely doesn't occur in real usage
expect_error(
result <- cancensus:::semantic_search("population", mock_vectors)
)
})
test_that("semantic_search handles vectors with punctuation", {
skip_if_not_installed("dplyr")
mock_vectors <- data.frame(
vector = c("v_TEST_1", "v_TEST_2"),
details = c(
"Population: total, all ages (2021)",
"Income - median household income"
),
label = c("Pop", "Income"),
stringsAsFactors = FALSE
)
# Punctuation should be handled correctly
result <- cancensus:::semantic_search("population total", mock_vectors)
expect_true(!is.null(result))
# Should find match despite punctuation
if (nrow(result) > 0) {
expect_true(any(grepl("population", result$details, ignore.case = TRUE)))
}
})
test_that("semantic_search handles case insensitivity", {
skip_if_not_installed("dplyr")
mock_vectors <- data.frame(
vector = c("v_TEST_1", "v_TEST_2"),
details = c(
"POPULATION TOTAL",
"population total"
),
label = c("Pop1", "Pop2"),
stringsAsFactors = FALSE
)
# Should find matches regardless of case
result1 <- cancensus:::semantic_search("POPULATION", mock_vectors)
result2 <- cancensus:::semantic_search("population", mock_vectors)
result3 <- cancensus:::semantic_search("Population", mock_vectors)
# All should return results
expect_true(!is.null(result1))
expect_true(!is.null(result2))
expect_true(!is.null(result3))
})
test_that("semantic_search with no close matches warns user", {
skip_if_not_installed("dplyr")
mock_vectors <- data.frame(
vector = c("v_TEST_1", "v_TEST_2"),
details = c(
"Population total count",
"Income median value"
),
label = c("Pop", "Income"),
stringsAsFactors = FALSE
)
# Query with no close match should warn
expect_warning(
result <- cancensus:::semantic_search("zzzzxxxxxqqqqq", mock_vectors),
"No close matches found"
)
})
test_that("semantic_search handles short sentences correctly", {
skip_if_not_installed("dplyr")
mock_vectors <- data.frame(
vector = c("v_TEST_1", "v_TEST_2", "v_TEST_3"),
details = c(
"Total", # Very short
"Population", # Single word
"Total population count by age groups" # Long
),
label = c("T", "P", "TPC"),
stringsAsFactors = FALSE
)
# Should handle varying length details
result <- cancensus:::semantic_search("total", mock_vectors)
expect_true(!is.null(result))
if (nrow(result) > 0) {
expect_true(any(grepl("total", result$details, ignore.case = TRUE)))
}
})
test_that("semantic_search n-gram optimization produces identical results", {
skip_if_not_installed("dplyr")
# Create a realistic-sized test set
mock_vectors <- data.frame(
vector = paste0("v_TEST_", 1:50),
details = c(
"Total population by age groups and gender",
"Male population aged 0 to 14 years",
"Female population aged 15 to 24 years",
"Total households by family composition",
"Median household income after tax",
"Average household income before tax",
"Total dwelling units by structure type",
"Population density per square kilometer",
"Labour force participation rate by age",
"Unemployment rate for all ages",
rep("Other census variable details", 40)
),
label = paste0("Label_", 1:50),
stringsAsFactors = FALSE
)
# Test several queries
queries <- c("population", "household income", "aged 15", "total")
for (query in queries) {
# The function should still work correctly with optimization
result <- cancensus:::semantic_search(query, mock_vectors)
# Should return results for reasonable queries
if (query %in% c("population", "household income", "total")) {
expect_true(!is.null(result) || inherits(result, "data.frame"))
}
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.