Nothing
context("validate_data() function")
library(testthat)
library(logitr)
# Helper function to create test data based on yogurt dataset
create_test_data <- function() {
data(yogurt, package = "logitr")
return(yogurt)
}
test_that("validate_data works with clean data", {
data <- create_test_data()
# Should pass validation
result <- validate_data(
data,
outcome = "choice",
obsID = "obsID",
pars = c("price", "feat", "brand")
)
expect_true(result$valid)
expect_length(result$errors, 0)
expect_s3_class(result, "logitr_validation")
expect_true(result$summary$total_observations > 0)
expect_true(result$summary$total_alternatives > 0)
})
test_that("validate_data catches missing required columns", {
data <- create_test_data()
# Remove outcome column
data_no_outcome <- data[, !names(data) %in% "choice"]
result <- validate_data(
data_no_outcome,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("Missing required columns.*choice", result$errors)))
# Remove obsID column
data_no_obsid <- data[, !names(data) %in% "obsID"]
result <- validate_data(
data_no_obsid,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("Missing required columns.*obsID", result$errors)))
# Missing parameter column
result <- validate_data(
data,
outcome = "choice",
obsID = "obsID",
pars = "nonexistent_column"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl(
"Missing required columns.*nonexistent_column",
result$errors
)))
})
test_that("validate_data catches outcome variable issues", {
data <- create_test_data()
# Test invalid outcome values (not 0/1)
data_bad_outcome <- data
data_bad_outcome$choice[1:5] <- c(2, 3, -1, 0.5, 99)
result <- validate_data(
data_bad_outcome,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("invalid values.*2.*3.*-1.*0.5.*99", result$errors)))
# Test character outcome variable
data_char_outcome <- data
data_char_outcome$choice <- as.character(data_char_outcome$choice)
result <- validate_data(
data_char_outcome,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("must be numeric.*or logical", result$errors)))
# Test missing outcome values
data_na_outcome <- data
data_na_outcome$choice[c(1, 10, 50)] <- NA
result <- validate_data(
data_na_outcome,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("missing values in rows.*1.*10.*50", result$errors)))
})
test_that("validate_data catches multiple choices per observation", {
data <- create_test_data()
# Create multiple choices for obsID 1 (rows 1-4 in original data)
data_multiple_choices <- data
data_multiple_choices$choice[1:2] <- 1 # Both alternatives 1 and 2 chosen
result <- validate_data(
data_multiple_choices,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("Multiple choices.*found in obsID", result$errors)))
# Check that diagnostics are stored
expect_true(!is.null(result$diagnostics$multiple_choice_details))
})
test_that("validate_data catches observations with no choices", {
data <- create_test_data()
# Remove choice from obsID 1 (make all choices 0)
data_no_choices <- data
obsid_1_rows <- which(data_no_choices$obsID == 1)
data_no_choices$choice[obsid_1_rows] <- 0
result <- validate_data(
data_no_choices,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("No choices.*found in obsID", result$errors)))
# Check that diagnostics are stored
expect_true(!is.null(result$diagnostics$no_choice_details))
})
test_that("validate_data catches non-contiguous obsID blocks", {
data <- create_test_data()
# Create non-contiguous obsID pattern
data_noncontiguous <- data
# Take first 12 rows (3 observations of 4 alternatives each)
data_noncontiguous <- data_noncontiguous[1:12, ]
# Change obsID pattern to create non-contiguous blocks
# Original: 1,1,1,1, 2,2,2,2, 3,3,3,3
# Modified: 1,1, 2,2, 1,1, 2,2, 3,3,3,3 (obsID 1 and 2 appear twice)
data_noncontiguous$obsID <- c(1, 1, 2, 2, 1, 1, 2, 2, 3, 3, 3, 3)
result <- validate_data(
data_noncontiguous,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("non-contiguous blocks", result$errors)))
# Check that diagnostics are stored
expect_true(!is.null(result$diagnostics$noncontiguous_details))
})
test_that("validate_data handles missing obsID values", {
data <- create_test_data()
# Add missing obsID values
data_na_obsid <- data
data_na_obsid$obsID[c(5, 15, 25)] <- NA
result <- validate_data(
data_na_obsid,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl(
"ObsID has missing values in rows.*5.*15.*25",
result$errors
)))
})
test_that("validate_data handles parameter column issues", {
data <- create_test_data()
# Test missing values in parameter columns
data_na_pars <- data
data_na_pars$price[c(1, 10, 50)] <- NA
data_na_pars$feat[c(2, 20)] <- NA
result <- validate_data(
data_na_pars,
outcome = "choice",
obsID = "obsID",
pars = c("price", "feat", "brand")
)
expect_true(result$valid) # Should still be valid, but with warnings
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl(
"' price '.*missing values.*1.*10.*50",
result$warnings
)))
expect_true(any(grepl("' feat '.*missing values.*2.*20", result$warnings)))
# Check that parameter info is stored
expect_true(!is.null(result$summary$parameter_info))
expect_true("price" %in% names(result$summary$parameter_info))
expect_true(result$summary$parameter_info$price$na_count == 3)
})
test_that("validate_data validates scalePar correctly", {
data <- create_test_data()
# Test non-numeric scalePar
data_char_scale <- data
data_char_scale$price <- as.character(data_char_scale$price)
result <- validate_data(
data_char_scale,
outcome = "choice",
obsID = "obsID",
scalePar = "price"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("Scale parameter.*must be numeric", result$errors)))
# Test missing values in scalePar
data_na_scale <- data
data_na_scale$price[c(1, 10)] <- NA
result <- validate_data(
data_na_scale,
outcome = "choice",
obsID = "obsID",
scalePar = "price"
)
expect_true(result$valid) # Valid but with warnings
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl(
"Scale parameter.*missing values.*1.*10",
result$warnings
)))
# Check that scalePar info is stored
expect_true(!is.null(result$summary$scalePar_info))
expect_equal(result$summary$scalePar_info$na_count, 2)
})
test_that("validate_data validates panel structure", {
data <- create_test_data()
# Test missing panelID values
data_na_panel <- data
data_na_panel$id[c(1, 50, 100)] <- NA
result <- validate_data(
data_na_panel,
outcome = "choice",
obsID = "obsID",
panelID = "id"
)
expect_true(result$valid) # Valid but with warnings
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl(
"Panel ID.*missing values.*1.*50.*100",
result$warnings
)))
# Test panel summary statistics are computed
result <- validate_data(
data,
outcome = "choice",
obsID = "obsID",
panelID = "id"
)
expect_true(!is.null(result$summary$individuals))
expect_true(!is.null(result$summary$obs_per_individual))
expect_s3_class(result, "logitr_validation")
})
test_that("validate_data handles empty or invalid data frames", {
# Test empty data frame
empty_data <- data.frame()
result <- validate_data(
empty_data,
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("empty", result$errors)))
# Test non-data.frame input
result <- validate_data(
list(a = 1),
outcome = "choice",
obsID = "obsID"
)
expect_false(result$valid)
expect_s3_class(result, "logitr_validation")
expect_true(any(grepl("must be a data.frame", result$errors)))
})
test_that("validate_data handles logical outcome variables", {
data <- create_test_data()
# Convert to logical
data_logical <- data
data_logical$choice <- as.logical(data_logical$choice)
result <- validate_data(
data_logical,
outcome = "choice",
obsID = "obsID"
)
expect_true(result$valid)
expect_s3_class(result, "logitr_validation")
expect_length(result$errors, 0)
})
test_that("validate_data provides correct summary statistics", {
data <- create_test_data()
result <- validate_data(
data,
outcome = "choice",
obsID = "obsID",
pars = c("price", "feat", "brand")
)
# Check summary statistics make sense
expect_s3_class(result, "logitr_validation")
expect_true(result$summary$total_observations > 0)
expect_true(
result$summary$total_alternatives > result$summary$total_observations
)
expect_true(result$summary$valid_choices == result$summary$total_observations)
expect_true(!is.null(result$summary$alternatives_per_obs))
expect_true(!is.null(result$summary$parameter_info))
})
test_that("validate_data handles factor variables correctly", {
data <- create_test_data()
# Ensure brand is factor
data$brand <- as.factor(data$brand)
result <- validate_data(
data,
outcome = "choice",
obsID = "obsID",
pars = c("price", "feat", "brand")
)
expect_true(result$valid)
expect_s3_class(result, "logitr_validation")
expect_length(result$errors, 0)
# Check that factor info is properly stored
expect_equal(result$summary$parameter_info$brand$type, "factor")
expect_true(result$summary$parameter_info$brand$n_levels > 1)
})
test_that("validate_data handles mixed data type scenarios", {
data <- create_test_data()
# Mix of issues: some missing values, but otherwise valid structure
data_mixed <- data
data_mixed$price[c(1, 100)] <- NA
data_mixed$brand[50] <- NA
result <- validate_data(
data_mixed,
outcome = "choice",
obsID = "obsID",
pars = c("price", "feat", "brand")
)
expect_true(result$valid) # Should be valid despite warnings
expect_s3_class(result, "logitr_validation")
expect_true(length(result$warnings) > 0) # Should have warnings about missing values
expect_length(result$errors, 0) # No errors
})
test_that("validate_data handles edge cases with obsID gaps", {
data <- create_test_data()[1:20, ] # Take first 20 rows (5 observations)
# Create gaps in obsID sequence (1,2,5,7,10) - this should be OK
data_gaps <- data
unique_obs <- unique(data_gaps$obsID)
new_ids <- c(1, 2, 12, 7, 10)
for (i in seq_along(unique_obs)) {
data_gaps$obsID[data_gaps$obsID == unique_obs[i]] <- new_ids[i]
}
result <- validate_data(
data_gaps,
outcome = "choice",
obsID = "obsID"
)
expect_true(result$valid) # Gaps in sequence should be OK
expect_s3_class(result, "logitr_validation")
expect_length(result$errors, 0)
})
test_that("validate_data returns appropriate structures", {
data <- create_test_data()
# Test that we can capture the returned object
result <- validate_data(
data,
outcome = "choice",
obsID = "obsID"
)
expect_true(is.list(result))
expect_s3_class(result, "logitr_validation")
expect_true(all(
c("valid", "errors", "warnings", "summary", "diagnostics", "data_info") %in%
names(result)
))
})
test_that("print method works correctly", {
data <- create_test_data()
# Test print method with valid data
result <- validate_data(
data,
outcome = "choice",
obsID = "obsID",
pars = c("price", "feat", "brand")
)
expect_output(print(result), "LOGITR DATA VALIDATION")
expect_output(print(result), "Data Overview")
expect_output(print(result), "VALIDATION RESULTS")
expect_output(print(result), "Data validation PASSED")
# Test print method with errors
data_bad <- data
data_bad$choice[1:2] <- 1 # Multiple choices
result_bad <- validate_data(data_bad, outcome = "choice", obsID = "obsID")
expect_output(print(result_bad), "ERRORS found")
expect_output(print(result_bad), "Multiple choices")
expect_output(print(result_bad), "Detailed locations")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.