tests/testthat/test-missing_value.R

# Tests for estimate_missing_values() exported function
# This tests the user-facing wrapper, not the internal implementation

# ==============================================================================
# TEST: Basic functionality - RCBD with all methods
# ==============================================================================

test_that("estimate_missing_values works with RCBD and all methods", {
  data(seldata)

  # Create test data with missing values
  test_data <- seldata[, 3:6]
  test_data[c(1, 10, 25), 1] <- NA
  test_data[c(5, 15), 2] <- NA

  # Test all methods
  methods <- c("REML", "Yates", "Healy", "Regression", "Mean", "Bartlett")

  for (method in methods) {
    result <- estimate_missing_values(
      data = test_data,
      genotypes = seldata$treat,
      replications = seldata$rep,
      design = "RCBD",
      method = method
    )

    expect_false(anyNA(result), info = paste("Method:", method))
    expect_equal(dim(result), dim(test_data), info = paste("Method:", method))
    expect_true(all(is.finite(result)), info = paste("Method:", method))

    # Check that non-missing values are preserved
    non_missing_idx <- which(is.finite(test_data[, 3]))
    expect_equal(result[non_missing_idx, 3], test_data[non_missing_idx, 3],
      info = paste("Method:", method)
    )
  }
})

test_that("estimate_missing_values works with LSD and all methods", {
  # Create Latin Square Design test data
  set.seed(789)
  n <- 5 # 5x5 Latin square
  lsd_data <- matrix(rnorm(n * n * 3, mean = 20, sd = 5), nrow = n * n, ncol = 3)
  colnames(lsd_data) <- c("trait1", "trait2", "trait3")

  gen_vec <- rep(1:n, times = n)
  row_vec <- rep(1:n, each = n)
  col_vec <- rep(1:n, length.out = n * n)

  # Introduce missing values
  lsd_data[c(2, 7, 15), 1] <- NA
  lsd_data[c(10, 20), 2] <- NA

  methods <- c("REML", "Yates", "Healy", "Regression", "Mean", "Bartlett")

  for (method in methods) {
    result <- estimate_missing_values(
      data = lsd_data,
      genotypes = gen_vec,
      replications = row_vec,
      columns = col_vec,
      design = "LSD",
      method = method
    )

    expect_false(anyNA(result), info = paste("LSD Method:", method))
    expect_equal(dim(result), dim(lsd_data), info = paste("LSD Method:", method))
    expect_true(all(is.finite(result)), info = paste("LSD Method:", method))
  }
})

test_that("estimate_missing_values works with SPD design", {
  # Create Split Plot Design test data
  set.seed(456)
  n_blocks <- 3
  n_main <- 4
  n_sub <- 5
  n_obs <- n_blocks * n_main * n_sub

  spd_data <- matrix(rnorm(n_obs * 3, mean = 15, sd = 4), nrow = n_obs, ncol = 3)
  colnames(spd_data) <- c("yield", "height", "protein")

  block_vec <- rep(1:n_blocks, each = n_main * n_sub)
  main_vec <- rep(rep(1:n_main, each = n_sub), times = n_blocks)
  sub_vec <- rep(1:n_sub, times = n_blocks * n_main)

  # Introduce missing values
  spd_data[c(5, 15, 25, 35), 1] <- NA
  spd_data[c(10, 30), 2] <- NA

  # SPD only supports Mean method
  result <- estimate_missing_values(
    data = spd_data,
    genotypes = sub_vec,
    replications = block_vec,
    main_plots = main_vec,
    design = "SPD",
    method = "Mean"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(spd_data))
  expect_true(all(is.finite(result)))
})

# ==============================================================================
# TEST: Input type handling
# ==============================================================================

test_that("estimate_missing_values handles data.frame input", {
  data(seldata)

  # Use data frame instead of matrix
  test_df <- seldata[, 3:5]
  test_df[c(1, 10), 1] <- NA

  result <- estimate_missing_values(
    data = test_df,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Yates"
  )

  expect_false(anyNA(result))
  expect_true(is.matrix(result))
  expect_equal(dim(result), dim(test_df))
})

test_that("estimate_missing_values handles character genotype labels", {
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[c(1, 10), 1] <- NA

  # Convert numeric genotypes to character
  gen_char <- paste0("G", seldata$treat)
  rep_char <- paste0("R", seldata$rep)

  result <- estimate_missing_values(
    data = test_data,
    genotypes = gen_char,
    replications = rep_char,
    design = "RCBD",
    method = "Mean"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
})

test_that("estimate_missing_values handles factor genotype labels", {
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[c(1, 10), 1] <- NA

  # Convert to factors
  gen_factor <- factor(seldata$treat)
  rep_factor <- factor(seldata$rep)

  result <- estimate_missing_values(
    data = test_data,
    genotypes = gen_factor,
    replications = rep_factor,
    design = "RCBD",
    method = "Mean"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
})

# ==============================================================================
# TEST: Column name preservation
# ==============================================================================

test_that("estimate_missing_values preserves column names", {
  data(seldata)

  test_data <- seldata[, 3:5]
  original_names <- colnames(test_data)
  test_data[c(1, 10), 1] <- NA

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Mean"
  )

  expect_equal(colnames(result), original_names)
})

test_that("estimate_missing_values preserves custom column names", {
  data(seldata)

  test_data <- seldata[, 3:5]
  colnames(test_data) <- c("Yield", "Height", "Protein")
  test_data[c(1, 10), 1] <- NA

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Mean"
  )

  expect_equal(colnames(result), c("Yield", "Height", "Protein"))
})

test_that("estimate_missing_values handles data without column names", {
  data(seldata)

  test_data <- as.matrix(seldata[, 3:5])
  colnames(test_data) <- NULL # Remove column names
  test_data[c(1, 10), 1] <- NA

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Mean"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
  expect_null(colnames(result))
})

# ==============================================================================
# TEST: Design-specific validation
# ==============================================================================

test_that("estimate_missing_values requires columns for LSD", {
  skip_on_cran() # error handling test or warning test
  set.seed(123)
  lsd_data <- matrix(rnorm(25 * 3), nrow = 25, ncol = 3)
  lsd_data[1, 1] <- NA

  gen_vec <- rep(1:5, times = 5)
  row_vec <- rep(1:5, each = 5)

  expect_error(
    estimate_missing_values(
      data = lsd_data,
      genotypes = gen_vec,
      replications = row_vec,
      design = "LSD",
      method = "Mean"
    ),
    "col_idx"
  )
})

test_that("estimate_missing_values requires main_plots for SPD", {
  skip_on_cran() # error handling test or warning test
  set.seed(123)
  spd_data <- matrix(rnorm(60 * 3), nrow = 60, ncol = 3)
  spd_data[1, 1] <- NA

  block_vec <- rep(1:3, each = 20)
  sub_vec <- rep(1:5, times = 12)

  expect_error(
    estimate_missing_values(
      data = spd_data,
      genotypes = sub_vec,
      replications = block_vec,
      design = "SPD",
      method = "Mean"
    ),
    "main_idx"
  )
})

test_that("estimate_missing_values validates design argument", {
  skip_on_cran() # error handling test or warning test
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[1, 1] <- NA

  expect_error(
    estimate_missing_values(
      data = test_data,
      genotypes = seldata$treat,
      replications = seldata$rep,
      design = "InvalidDesign",
      method = "Mean"
    ),
    "should be one of"
  )
})

test_that("estimate_missing_values validates method argument", {
  skip_on_cran() # error handling test or warning test
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[1, 1] <- NA

  expect_error(
    estimate_missing_values(
      data = test_data,
      genotypes = seldata$treat,
      replications = seldata$rep,
      design = "RCBD",
      method = "InvalidMethod"
    ),
    "should be one of"
  )
})

# ==============================================================================
# TEST: Method fallback for SPD
# ==============================================================================

test_that("estimate_missing_values falls back to Mean for SPD with unsupported methods", {
  set.seed(456)
  n_blocks <- 3
  n_main <- 4
  n_sub <- 5
  n_obs <- n_blocks * n_main * n_sub

  spd_data <- matrix(rnorm(n_obs * 3), nrow = n_obs, ncol = 3)
  spd_data[c(5, 15), 1] <- NA

  block_vec <- rep(1:n_blocks, each = n_main * n_sub)
  main_vec <- rep(rep(1:n_main, each = n_sub), times = n_blocks)
  sub_vec <- rep(1:n_sub, times = n_blocks * n_main)

  # Test that unsupported methods issue warning and fall back to Mean
  unsupported_methods <- c("REML", "Yates", "Healy", "Regression", "Bartlett")

  for (method in unsupported_methods) {
    expect_warning(
      result <- estimate_missing_values(
        data = spd_data,
        genotypes = sub_vec,
        replications = block_vec,
        main_plots = main_vec,
        design = "SPD",
        method = method
      ),
      "Mean.*SPD"
    )

    expect_false(anyNA(result), info = paste("SPD fallback for:", method))
  }
})

# ==============================================================================
# TEST: Tolerance parameter
# ==============================================================================

test_that("estimate_missing_values respects tolerance parameter", {
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[c(1, 10, 25), 1] <- NA
  test_data[c(5, 15), 2] <- NA

  # Test with different tolerance values
  result_strict <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Yates",
    tolerance = 1e-8
  )

  result_loose <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Yates",
    tolerance = 1e-4
  )

  expect_false(anyNA(result_strict))
  expect_false(anyNA(result_loose))

  # Both should produce valid results
  expect_true(all(is.finite(result_strict)))
  expect_true(all(is.finite(result_loose)))
})

# ==============================================================================
# TEST: Edge cases
# ==============================================================================

test_that("estimate_missing_values handles data with no missing values", {
  data(seldata)

  # No missing values
  test_data <- seldata[, 3:5]

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Mean"
  )

  expect_equal(result, as.matrix(test_data))
})

test_that("estimate_missing_values handles data with many missing values", {
  data(seldata)

  test_data <- seldata[, 3:5]
  # Introduce many missing values (about 30%)
  set.seed(999)
  missing_idx <- sample(seq_len(nrow(test_data)), size = floor(0.3 * nrow(test_data)))
  test_data[missing_idx, 1] <- NA

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "REML"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
})

test_that("estimate_missing_values handles single trait", {
  data(seldata)

  test_data <- seldata[, 3, drop = FALSE]
  test_data[c(1, 10, 25), 1] <- NA

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Mean"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
})

test_that("estimate_missing_values handles missing values across all traits", {
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[1, ] <- NA # All traits missing for one observation
  test_data[10, 1] <- NA
  test_data[15, 2] <- NA

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Mean"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
})

# ==============================================================================
# TEST: Consistency with internal function
# ==============================================================================

test_that("estimate_missing_values produces same results as internal function", {
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[c(1, 10, 25), 1] <- NA
  test_data[c(5, 15), 2] <- NA

  # Call exported wrapper
  result_wrapper <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "Yates"
  )

  # Call internal function directly
  gen_idx <- as.integer(factor(seldata$treat))
  rep_idx <- as.integer(factor(seldata$rep))

  result_internal <- selection.index:::missing_value_estimation(
    data_mat = as.matrix(test_data),
    gen_idx = gen_idx,
    rep_idx = rep_idx,
    design_type = "RCBD",
    method = "Yates"
  )

  expect_equal(result_wrapper, result_internal)
})

# ==============================================================================
# TEST: Default arguments
# ==============================================================================

test_that("estimate_missing_values uses default design and method", {
  data(seldata)

  test_data <- seldata[, 3:5]
  test_data[c(1, 10), 1] <- NA

  # Test with defaults (RCBD, REML)
  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
})

# ==============================================================================
# TEST: Integration with real data patterns
# ==============================================================================

test_that("estimate_missing_values works with realistic missing patterns", {
  data(seldata)

  test_data <- seldata[, 3:7]

  # Simulate realistic missing patterns:
  # - Random missing values
  # - Entire observation missing
  # - Systematic missing (e.g., measurement failure in one block)

  set.seed(2020)
  # Random missing
  test_data[sample(seq_len(nrow(test_data)), 5), sample(seq_len(ncol(test_data)), 1)] <- NA

  # Entire observation
  test_data[10, ] <- NA

  # Systematic (all observations from block 1, trait 2)
  block1_idx <- which(seldata$rep == 1)
  test_data[block1_idx[1:3], 2] <- NA

  result <- estimate_missing_values(
    data = test_data,
    genotypes = seldata$treat,
    replications = seldata$rep,
    design = "RCBD",
    method = "REML"
  )

  expect_false(anyNA(result))
  expect_equal(dim(result), dim(test_data))
  expect_true(all(is.finite(result)))
})

# ==============================================================================
# TEST: All method combinations for RCBD
# ==============================================================================

test_that("estimate_missing_values works with all RCBD method combinations", {
  data(seldata)

  test_data <- as.matrix(seldata[, 3:5])
  test_data[c(1, 5, 10, 15, 20), 1] <- NA
  test_data[c(2, 8, 14), 2] <- NA

  methods <- c("REML", "Yates", "Healy", "Regression", "Mean", "Bartlett")

  results <- list()

  for (method in methods) {
    result <- estimate_missing_values(
      data = test_data,
      genotypes = seldata$treat,
      replications = seldata$rep,
      design = "RCBD",
      method = method
    )

    results[[method]] <- result

    # All methods should produce complete results
    expect_false(anyNA(result), info = paste("Method:", method))
    expect_equal(dim(result), dim(test_data), info = paste("Method:", method))

    # Check that non-missing values are identical to original
    non_missing_mask <- is.finite(test_data)
    expect_equal(result[non_missing_mask], test_data[non_missing_mask],
      info = paste("Method:", method)
    )
  }

  # All methods should produce finite results
  for (method in methods) {
    expect_true(all(is.finite(results[[method]])),
      info = paste("Method:", method, "- all finite")
    )
  }
})

# ==============================================================================
# TEST: All method combinations for LSD
# ==============================================================================

test_that("estimate_missing_values works with all LSD method combinations", {
  # Create 6x6 Latin Square Design
  set.seed(555)
  n <- 6
  lsd_data <- matrix(rnorm(n * n * 4, mean = 25, sd = 6), nrow = n * n, ncol = 4)
  colnames(lsd_data) <- paste0("T", 1:4)

  gen_vec <- rep(1:n, times = n)
  row_vec <- rep(1:n, each = n)
  col_vec <- rep(1:n, length.out = n * n)

  # Introduce missing values
  lsd_data[c(2, 10, 20, 30), 1] <- NA
  lsd_data[c(5, 15, 25), 2] <- NA
  lsd_data[c(8, 18), 3] <- NA

  methods <- c("REML", "Yates", "Healy", "Regression", "Mean", "Bartlett")

  for (method in methods) {
    result <- estimate_missing_values(
      data = lsd_data,
      genotypes = gen_vec,
      replications = row_vec,
      columns = col_vec,
      design = "LSD",
      method = method
    )

    expect_false(anyNA(result), info = paste("LSD Method:", method))
    expect_equal(dim(result), dim(lsd_data), info = paste("LSD Method:", method))

    # Check preservation of non-missing values
    non_missing_mask <- is.finite(lsd_data)
    expect_equal(result[non_missing_mask], lsd_data[non_missing_mask],
      info = paste("LSD Method:", method)
    )
  }
})

Try the selection.index package in your browser

Any scripts or data that you put into this service are public.

selection.index documentation built on March 9, 2026, 1:06 a.m.