# Test statistical inference components
library(fmrireg)
library(testthat)
test_that("standard errors are computed correctly for OLS", {
# Known regression problem
set.seed(123)
n <- 100
X <- cbind(1, rnorm(n), rnorm(n))
beta <- c(2, 1, -0.5)
sigma <- 1.5
Y <- X %*% beta + rnorm(n, sd = sigma)
# Create proper GLM context with projection
proj <- .fast_preproject(X)
ctx <- glm_context(
X = X,
Y = matrix(Y, ncol = 1),
proj = proj
)
result <- solve_glm_core(ctx)
# Manual calculation of standard errors
residuals <- Y - X %*% result$betas
s2 <- sum(residuals^2) / (n - ncol(X))
XtX_inv <- solve(t(X) %*% X)
se_expected <- sqrt(diag(XtX_inv) * s2)
# Compare coefficient estimates
expect_equal(as.vector(result$betas), beta, tolerance = 0.25)
# Check that standard errors make sense (we can't directly compare without SE calculation)
expect_true(all(result$sigma2 > 0))
expect_equal(length(result$sigma2), 1)
})
test_that("robust standard errors account for heteroscedasticity", {
# Heteroscedastic data
n <- 200
X <- cbind(1, rnorm(n))
beta <- c(1, 2)
# Variance increases with X
errors <- rnorm(n) * (1 + abs(X[, 2]))
Y <- X %*% beta + errors
# OLS fit
proj_ols <- .fast_preproject(X)
ctx_ols <- glm_context(
X = X,
Y = matrix(Y, ncol = 1),
proj = proj_ols
)
result_ols <- solve_glm_core(ctx_ols)
# Robust fit using robust_iterative_fitter
cfg_robust <- list(
type = "huber",
k_huber = 1.345,
c_tukey = 4.685,
max_iter = 3,
scale_scope = "run"
)
robust_result <- robust_iterative_fitter(
initial_glm_ctx = ctx_ols,
cfg_robust_options = cfg_robust,
X_orig_for_resid = X
)
# Robust estimates should be different from OLS
expect_true(any(abs(robust_result$betas_robust - result_ols$betas) > 0.01))
})
test_that("effective degrees of freedom computed for AR models", {
# AR(1) model
n <- 100
X <- cbind(1, rnorm(n))
ar_coef <- 0.6
# Generate AR(1) errors
e <- arima.sim(list(ar = ar_coef), n = n)
Y <- X %*% c(1, 2) + as.vector(e)
# Initial OLS fit
proj <- .fast_preproject(X)
ctx <- glm_context(
X = X,
Y = matrix(Y, ncol = 1),
proj = proj
)
initial_result <- solve_glm_core(ctx)
residuals <- Y - X %*% initial_result$betas
# Estimate AR(1) parameter
phi_est <- estimate_ar_parameters(residuals, 1)
# Apply AR whitening
whitened <- ar_whiten_transform(X, matrix(Y, ncol = 1), phi_est, exact_first = FALSE)
# Fit on whitened data
proj_w <- .fast_preproject(whitened$X)
ctx_w <- glm_context(
X = whitened$X,
Y = whitened$Y,
proj = proj_w,
phi_hat = phi_est
)
result_ar <- solve_glm_core(ctx_w)
# AR-corrected estimates should be different from OLS
expect_true(any(abs(result_ar$betas - initial_result$betas) > 0.01))
# Check that AR parameter was estimated
expect_true(!is.null(phi_est))
expect_true(abs(phi_est) > 0.1) # Should be positive for AR(1) data
})
test_that("F-statistics computed correctly for contrasts", {
# Multi-factor design
n <- 120
factor1 <- rep(c("A", "B"), each = 60)
factor2 <- rep(c("X", "Y", "Z"), times = 40)
X <- model.matrix(~ factor1 * factor2)
beta <- c(10, 2, -1, 1, 0.5, -0.5)
Y <- X %*% beta + rnorm(n, sd = 2)
# Fit model
proj <- .fast_preproject(X)
ctx <- glm_context(
X = X,
Y = matrix(Y, ncol = 1),
proj = proj
)
result <- solve_glm_core(ctx)
# Test main effect of factor1 (simple contrast)
# Create contrast vector (testing factor1B coefficient)
contrast_vec <- c(0, 1, 0, 0, 0, 0) # Test factor1B main effect
# Manual F-test calculation
est <- as.vector(result$betas)
contrast_est <- sum(contrast_vec * est)
# Calculate variance of contrast
XtX_inv <- proj$XtXinv
contrast_var <- as.numeric(t(contrast_vec) %*% XtX_inv %*% contrast_vec) * result$sigma2
# F-statistic for single contrast
f_stat <- (contrast_est^2) / contrast_var
# F-statistic should be positive
expect_true(f_stat > 0)
# Check that we can compute it
expect_true(!is.na(f_stat))
expect_true(is.finite(f_stat))
})
test_that("multiple comparison corrections work", {
# Multiple voxels with varying effect sizes
n <- 50
n_voxels <- 100
X <- cbind(1, rnorm(n))
# Most voxels null, some with true effects
Y <- matrix(rnorm(n * n_voxels), n, n_voxels)
signal_voxels <- 1:10
Y[, signal_voxels] <- Y[, signal_voxels] + X[, 2] * 3
# Fit all voxels
p_values <- numeric(n_voxels)
for (v in 1:n_voxels) {
proj <- .fast_preproject(X)
ctx <- glm_context(
X = X,
Y = Y[, v, drop = FALSE],
proj = proj
)
result <- solve_glm_core(ctx)
# Test coefficient 2 (slope)
se_est <- sqrt(proj$XtXinv[2, 2] * result$sigma2)
t_stat <- result$betas[2, 1] / se_est
p_values[v] <- 2 * pt(-abs(t_stat), df = n - ncol(X))
}
# Apply corrections
p_bonf <- p.adjust(p_values, method = "bonferroni")
p_fdr <- p.adjust(p_values, method = "fdr")
p_holm <- p.adjust(p_values, method = "holm")
# Bonferroni most conservative
expect_true(all(p_bonf >= p_values))
expect_true(mean(p_bonf) > mean(p_fdr))
# FDR should detect more than Bonferroni
expect_true(sum(p_fdr < 0.05) >= sum(p_bonf < 0.05))
# Signal voxels should mostly survive FDR
expect_true(mean(p_fdr[signal_voxels] < 0.05) > 0.5)
})
test_that("sandwich variance estimator for robust inference", {
# Heteroscedastic clustered data
n_clusters <- 20
n_per_cluster <- 10
n <- n_clusters * n_per_cluster
cluster_id <- rep(1:n_clusters, each = n_per_cluster)
X <- cbind(1, rnorm(n))
# Clustered errors
Y <- numeric(n)
for (i in 1:n_clusters) {
idx <- cluster_id == i
cluster_effect <- rnorm(1, sd = 2)
Y[idx] <- X[idx, ] %*% c(1, 2) + cluster_effect + rnorm(sum(idx))
}
# Simple OLS fit (without cluster correction for now)
proj <- .fast_preproject(X)
ctx <- glm_context(
X = X,
Y = matrix(Y, ncol = 1),
proj = proj
)
result <- solve_glm_core(ctx)
# Basic test - clustered data should show some autocorrelation
# Calculate residuals
residuals <- Y - X %*% result$betas
# Simple test that the model fits
expect_true(all(result$sigma2 > 0))
expect_equal(dim(result$betas), c(2, 1))
# Test that we can detect clustering in residuals (autocorrelation)
cluster_resid_var <- numeric(n_clusters)
for (i in 1:n_clusters) {
idx <- cluster_id == i
cluster_resid_var[i] <- var(residuals[idx])
}
# There should be some variation in cluster residual variances
expect_true(sd(cluster_resid_var) > 0)
})
test_that("confidence intervals have correct coverage", {
# Simulation to check coverage
n_sims <- 50 # Reduced for testing speed
coverage <- numeric(n_sims)
true_beta <- c(1, 2)
for (sim in 1:n_sims) {
n <- 50
X <- cbind(1, rnorm(n))
Y <- X %*% true_beta + rnorm(n)
proj <- .fast_preproject(X)
ctx <- glm_context(
X = X,
Y = matrix(Y, ncol = 1),
proj = proj
)
result <- solve_glm_core(ctx)
# 95% CI for beta[2]
se_est <- sqrt(proj$XtXinv[2, 2] * result$sigma2)
ci_lower <- result$betas[2, 1] - 1.96 * se_est
ci_upper <- result$betas[2, 1] + 1.96 * se_est
coverage[sim] <- (true_beta[2] >= ci_lower) & (true_beta[2] <= ci_upper)
}
# Coverage should be close to 95%
expect_equal(mean(coverage), 0.95, tolerance = 0.15)
})
test_that("permutation testing for robust p-values", {
# Small sample where permutation is appropriate
n <- 20
X <- cbind(1, sample(c(0, 1), n, replace = TRUE)) # Binary predictor
Y <- 3 + 2 * X[, 2] + rnorm(n)
# Original fit
proj <- .fast_preproject(X)
ctx <- glm_context(
X = X,
Y = matrix(Y, ncol = 1),
proj = proj
)
# Original statistic
result <- solve_glm_core(ctx)
se_orig <- sqrt(proj$XtXinv[2, 2] * result$sigma2)
orig_t <- result$betas[2, 1] / se_orig
# Permutation distribution
n_perms <- 99 # Reduced for testing speed
perm_t <- numeric(n_perms)
for (i in 1:n_perms) {
Y_perm <- Y[sample(n)]
ctx_perm <- glm_context(
X = X,
Y = matrix(Y_perm, ncol = 1),
proj = proj # Can reuse projection since X doesn't change
)
result_perm <- solve_glm_core(ctx_perm)
se_perm <- sqrt(proj$XtXinv[2, 2] * result_perm$sigma2)
perm_t[i] <- result_perm$betas[2, 1] / se_perm
}
# Permutation p-value
p_perm <- mean(abs(perm_t) >= abs(orig_t))
# Should detect the true effect (but may be variable due to small sample)
expect_true(p_perm < 0.5) # At least somewhat significant
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.