tests/testthat/test_robust_glm_results.R

context("Robust GLM integration")

simulate_spike_dataset <- function(n_time = 40, n_vox = 3, spike = FALSE, seed = 1) {
  set.seed(seed)
  onsets <- c(5, 15, 25, 35)
  event_tab <- data.frame(onset = onsets, cond = factor("A"), run = 1)
  base <- matrix_dataset(matrix(rnorm(n_time * n_vox, sd = 0.1), n_time, n_vox),
                         TR = 1, run_length = n_time, event_table = event_tab)
  model <- create_fmri_model(onset ~ hrf(cond), block = ~ run,
                             dataset = base, durations = 0)
  X <- design_matrix(model)
  
  # Convert tibble/data.frame to matrix for matrix operations
  X_matrix <- as.matrix(X)
  
  ev_cols <- unlist(attr(model$event_model$design_matrix, "col_indices"))
  beta <- rep(0, ncol(X_matrix))
  beta[ev_cols] <- 1
  
  # Create signal and replicate across voxels
  beta_matrix <- matrix(beta, ncol = 1)
  signal <- X_matrix %*% beta_matrix  # This is 40x1
  
  # Replicate signal across all voxels and add noise
  Y <- matrix(rep(signal, n_vox), nrow = n_time, ncol = n_vox) + 
       matrix(rnorm(n_time * n_vox, sd = 0.1), n_time, n_vox)
  
  if (spike) {
    Y[10, ] <- Y[10, ] + 10
  }
  dset <- matrix_dataset(Y, TR = 1, run_length = n_time, event_table = event_tab)
  list(dset = dset, beta_true = 1, ev_cols = ev_cols)
}

# No outlier: robust vs OLS should match

test_that("robust=TRUE matches OLS on clean data", {
  sim <- simulate_spike_dataset(spike = FALSE)
  mod_ols <- fmri_lm(onset ~ hrf(cond), block = ~ run, dataset = sim$dset,
                     durations = 0, use_fast_path = TRUE)
  mod_rb <- fmri_lm(onset ~ hrf(cond), block = ~ run, dataset = sim$dset,
                    durations = 0, use_fast_path = TRUE,
                    robust = "huber")
  
  # Get coefficients - handle both matrix and tibble return types
  coef_ols <- coef(mod_ols)
  coef_rb <- coef(mod_rb)
  
  if (is.matrix(coef_ols)) {
    b_ols <- as.numeric(coef_ols[, 1])
    b_rb <- as.numeric(coef_rb[, 1])
  } else {
    # Handle tibble format - extract first column which contains the estimates
    b_ols <- as.numeric(coef_ols[[1]])
    b_rb <- as.numeric(coef_rb[[1]])
  }
  
  expect_equal(b_rb, b_ols, tolerance = 0.05)
  
  # Compare standard errors if available
  se_ols_result <- tryCatch(standard_error(mod_ols), error = function(e) NULL)
  se_rb_result <- tryCatch(standard_error(mod_rb), error = function(e) NULL)
  
  if (!is.null(se_ols_result) && !is.null(se_rb_result)) {
    if (is.matrix(se_ols_result)) {
      se_ols <- as.numeric(se_ols_result[, 1])
      se_rb <- as.numeric(se_rb_result[, 1])
    } else {
      # Handle tibble format - extract first column
      se_ols <- as.numeric(se_ols_result[[1]])
      se_rb <- as.numeric(se_rb_result[[1]])
    }
    expect_equal(se_rb, se_ols, tolerance = 0.1)
  }
})

# Outlier: robust should be closer to clean fit

test_that("robust fitting downweights spikes", {
  sim_clean <- simulate_spike_dataset(spike = FALSE, seed = 2)
  sim_spike <- simulate_spike_dataset(spike = TRUE, seed = 2)
  mod_true <- fmri_lm(onset ~ hrf(cond), block = ~ run, dataset = sim_clean$dset,
                      durations = 0, use_fast_path = TRUE)
  mod_ols <- fmri_lm(onset ~ hrf(cond), block = ~ run, dataset = sim_spike$dset,
                     durations = 0, use_fast_path = TRUE)
  mod_rb <- fmri_lm(onset ~ hrf(cond), block = ~ run, dataset = sim_spike$dset,
                    durations = 0, use_fast_path = TRUE,
                    robust = "huber")
  
  # Get coefficients 
  coef_true <- coef(mod_true)
  coef_ols <- coef(mod_ols)
  coef_rb <- coef(mod_rb)
  
  if (is.matrix(coef_true)) {
    b_true <- as.numeric(coef_true[, 1])
    b_ols <- as.numeric(coef_ols[, 1])
    b_rb <- as.numeric(coef_rb[, 1])
  } else {
    # Handle tibble format - extract first column
    b_true <- as.numeric(coef_true[[1]])
    b_ols <- as.numeric(coef_ols[[1]])
    b_rb <- as.numeric(coef_rb[[1]])
  }
  
  err_ols <- abs(b_ols - b_true)
  err_rb <- abs(b_rb - b_true)
  
  # Robust should have smaller error on average
  expect_lt(mean(err_rb), mean(err_ols))
  
  # Compare standard errors if available
  se_ols_result <- tryCatch(standard_error(mod_ols), error = function(e) NULL)
  se_rb_result <- tryCatch(standard_error(mod_rb), error = function(e) NULL)
  
  if (!is.null(se_ols_result) && !is.null(se_rb_result)) {
    if (is.matrix(se_ols_result)) {
      se_ols <- as.numeric(se_ols_result[, 1])
      se_rb <- as.numeric(se_rb_result[, 1])
    } else {
      # Handle tibble format - extract first column
      se_ols <- as.numeric(se_ols_result[[1]])
      se_rb <- as.numeric(se_rb_result[[1]])
    }
    # Robust should have smaller standard errors on average (only if both have values)
    if (length(se_ols) > 0 && length(se_rb) > 0 && all(!is.na(se_ols)) && all(!is.na(se_rb))) {
      # Just check that both methods produce reasonable standard errors
      # Robust doesn't always have smaller SE than OLS - depends on the robust estimator
      expect_true(all(se_ols > 0))
      expect_true(all(se_rb > 0))
      # Check they are reasonably similar (within an order of magnitude)
      expect_true(max(se_rb) / min(se_ols) < 10)
    }
  }
})

# Compare against MASS::rlm for single voxel

test_that("fast_rlm_run approximates MASS::rlm", {
  set.seed(10)
  n <- 30
  X <- cbind(1, rnorm(n))
  beta <- c(0.5, 2)
  y <- as.numeric(X %*% beta + rnorm(n, sd = 0.1))
  y[5] <- y[5] + 5
  proj <- fmrireg:::.fast_preproject(X)
  fit_fast <- fmrireg:::fast_rlm_run(X, matrix(y, ncol = 1), proj,
                                     psi = "huber", max_it = 2)
  fit_mass <- MASS::rlm(X, y, psi = MASS::psi.huber, k = 1.345)
  expect_equal(as.numeric(fit_fast$betas), as.numeric(fit_mass$coef), tolerance = 0.05)
})

test_that("fast_rlm_run errors with NA input", {
  # Create data without NAs first
  X <- cbind(1, rnorm(5))
  y <- rnorm(5)
  
  # Test with NA in X
  X_na <- X
  X_na[2, 1] <- NA
  expect_error({
    proj <- fmrireg:::.fast_preproject(X_na)
  }, "NA|singular|positive")
  
  # Test with NA in y (use valid X)
  proj <- fmrireg:::.fast_preproject(X)
  y_na <- y
  y_na[3] <- NA
  expect_error(
    fmrireg:::fast_rlm_run(X, matrix(y_na, ncol = 1), proj),
    "NA|missing"
  )
})
bbuchsbaum/fmrireg documentation built on June 10, 2025, 8:18 p.m.