Nothing
set.seed(1)
glm_gaussian_data <- mock_glm_gaussian_data()
cat_init_glm_gaussian <- mock_cat_glm_gaussian_initialization(glm_gaussian_data)
glm_binomial_data <- mock_glm_binomial_data()
cat_init_glm_binomial <- mock_cat_glm_binomial_initialization(glm_binomial_data)
cox_data <- mock_cox_data()
cat_init_cox <- mock_cat_cox_initialization(cox_data)
lmm_data <- mock_lmm_data()
cat_init_lmm <- mock_cat_lmm_initialization(lmm_data)
# ----- GLMs -----
test_that("get_glm_sample_data works correctly", {
# Test data generation for binomial family
binomial_data <- get_glm_sample_data(
family_string = "binomial",
n = 20,
mean = 0.3
)
expect_equal(length(binomial_data), 20)
expect_true(all(binomial_data %in% c(0, 1)))
# Test data generation for Gaussian family
gaussian_data <- get_glm_sample_data(
family_string = "gaussian",
n = 20,
mean = 5,
sd = 2
)
expect_equal(length(gaussian_data), 20)
expect_true(all(is.numeric(gaussian_data)))
# Check for error with an invalid family
expect_null(get_glm_sample_data(
family_string = "invalid_family",
n = 20,
mean = 5,
sd = 2
))
})
test_that("get_hmc_mcmc_result works correctly", {
# Define a simple quadratic potential function and its gradient
neg_log_den_func <- function(pos) {
0.5 * sum(pos^2)
}
neg_log_den_grad_func <- function(pos) {
pos
}
# Set initial position, iteration count, scale, and steps
coefs_0 <- c(1, 1)
iter <- 10
hmc_scale <- 0.01
hmc_steps <- 5
# Run the function
result <- get_hmc_mcmc_result(
neg_log_den_func,
neg_log_den_grad_func,
coefs_0,
iter = iter,
hmc_scale = hmc_scale,
hmc_steps = hmc_steps
)
# Check the output type and length
expect_type(result, "double")
expect_equal(length(result), length(coefs_0))
result_2 <- get_hmc_mcmc_result(
neg_log_den_func,
neg_log_den_grad_func,
coefs_0,
iter = iter,
hmc_scale = hmc_scale,
hmc_steps = hmc_steps
)
# The results should not match exactly
expect_false(all(result == result_2))
})
test_that("hmc_neal_2010 works correctly", {
# Define a simple quadratic potential function and its gradient
neg_log_den_func <- function(pos) {
0.5 * sum(pos^2)
}
neg_log_den_grad_func <- function(pos) {
pos
}
# Set parameters for the HMC function
leapfrog_stepsize <- 0.1
leapfrog_step <- 10
current_pos <- c(1, 1)
# Call the HMC function
result <- hmc_neal_2010(
neg_log_den_func, neg_log_den_grad_func,
leapfrog_stepsize, leapfrog_step, current_pos
)
# Test that the output is a list
expect_type(result, "list")
# Test that the list has named elements position, potential_energy, and accepted
expect_named(result, c("position", "potential_energy", "accepted"))
# Test that position is a numeric vector of the same length as current_pos
expect_type(result$position, "double")
expect_equal(length(result$position), length(current_pos))
# Test that potential_energy is a numeric scalar
expect_type(result$potential_energy, "double")
expect_length(result$potential_energy, 1)
# Test that accepted is a logical value
expect_type(result$accepted, "logical")
expect_length(result$accepted, 1)
# Run multiple iterations to see if the function accepts/rejects states correctly
accept_count <- 0
total_runs <- 100
for (i in seq_len(total_runs)) {
result <- hmc_neal_2010(neg_log_den_func, neg_log_den_grad_func,
leapfrog_stepsize = 0.1, leapfrog_step = 10,
current_pos = c(1, 1)
)
if (result$accepted) accept_count <- accept_count + 1
}
# The function should accept some of the proposals
expect_gt(accept_count, 0)
# The function should also reject some proposals in the set
expect_lt(accept_count, total_runs)
# Test for a small step size where energy should be conserved well
leapfrog_stepsize <- 0.01
leapfrog_step <- 20
current_pos <- c(1, 1)
result_small_stepsize <- hmc_neal_2010(
neg_log_den_func, neg_log_den_grad_func,
leapfrog_stepsize, leapfrog_step, current_pos
)
})
test_that("get_glm_log_density_grad works correctly", {
X <- matrix(rnorm(20), ncol = 5)
Y_binomial <- c(1, 0, 1, 0)
Y_gaussian <- rnorm(4)
coefs <- rnorm(6)
# Retrieve GLM models from glm_data
model_binomial <- stats::glm(Y_binomial ~ X, binomial)
model_gaussian <- stats::glm(Y_gaussian ~ X, gaussian)
# Compute gradients
grad_binomial <- get_glm_log_density_grad("binomial", X, Y_binomial, coefs)
grad_gaussian <- get_glm_log_density_grad("gaussian", X, Y_gaussian, coefs)
# Manually compute expected gradients for comparison
mean_binomial <- get_glm_mean("binomial", X, coefs)
grad_binomial_expected <- c(t(model_binomial$prior.weights * (Y_binomial - mean_binomial)) %*% cbind(1, X))
mean_gaussian <- get_glm_mean("gaussian", X, coefs)
grad_gaussian_expected <- c(t(model_gaussian$prior.weights * (Y_gaussian - mean_gaussian)) %*% cbind(1, X))
# Test that computed values match expected values
expect_equal(grad_binomial, grad_binomial_expected)
expect_equal(grad_gaussian, grad_gaussian_expected)
})
test_that("get_glm_diag_approx_cov works correctly", {
X <- matrix(rnorm(20), ncol = 5)
y <- c(1, 0, 1, 0)
# Fit a GLM model
model <- stats::glm(y ~ X, family = binomial)
# Compute the diagonal covariance matrix
diag_cov <- get_glm_diag_approx_cov(X, model)
# Manually compute the expected diagonal covariance for comparison
hessian <- (t(cbind(1, X))) %*%
diag(model$fitted.values * (1 - model$fitted.values) * model$prior.weights) %*%
cbind(1, X)
expected_diag_cov <- tryCatch(
diag(solve(hessian)),
error = function(.) {
# Regularization for dealing with computationally singular issue
diag(solve(hessian + diag(ncol(hessian)) * 1e-5))
}
)
# Test that the computed and expected values match
expect_equal(diag_cov, expected_diag_cov)
# Check for correct error handling with mismatched dimensions (if applicable)
expect_error(get_glm_diag_approx_cov(matrix(1:4, ncol = 2), model))
})
test_that("get_glm_custom_var works correctly", {
# Call the function and check the result
custom_var <- get_glm_custom_var(Y ~ ., cat_init_glm_gaussian, 1)
# Expected custom variance calculation manually
tau_model <- do.call(
stats::glm,
list(
formula = Y ~ .,
family = cat_init_glm_gaussian$family,
data = cat_init_glm_gaussian$data,
weights = c(
rep(1, cat_init_glm_gaussian$obs_size),
rep(1 / cat_init_glm_gaussian$syn_size, cat_init_glm_gaussian$syn_size)
)
)
)
expected_custom_var <- sum(stats::residuals(tau_model)^2) / stats::df.residual(tau_model) +
sum(stats::residuals(tau_model)[1:cat_init_glm_gaussian$obs_size]^2) / cat_init_glm_gaussian$obs_size
expect_equal(custom_var, expected_custom_var)
})
test_that("get_glm_lambda works correctly", {
X <- matrix(rnorm(20), ncol = 5)
coefs <- rnorm(6)
# Expected outputs based on the given inputs
expected_square_error <- 2 * get_glm_mean("gaussian", X, coefs)
expected_classification_error <- 2 * get_glm_mean("binomial", X, coefs)
expected_logistic_deviance <- get_linear_predictor(X, coefs)
# Test for "square_error"
expect_equal(get_glm_lambda("mean_square_error", X, coefs), expected_square_error)
# Test for "classification_error"
expect_equal(get_glm_lambda("mean_classification_error", X, coefs), expected_classification_error)
# Test for "logistic_deviance"
expect_equal(get_glm_lambda("logistic_deviance", X, coefs), expected_logistic_deviance)
# Test that an error is thrown for invalid discrepancy method
expect_error(get_glm_lambda("invalid_method", X, coefs))
})
test_that("get_glm_log_density works correctly", {
X <- matrix(rnorm(20), ncol = 5)
coefs <- rnorm(6)
Y_binomial <- c(0, 1, 0, 1)
Y_gaussian <- rnorm(4)
weights <- 0.5
# Test for binomial family
log_density_binomial <- get_glm_log_density("binomial", X, Y_binomial, coefs, weights)
lp_binomial <- get_linear_predictor(X, coefs)
expected_log_density_binomial <- weights * sum((Y_binomial * lp_binomial - log(1 + exp(lp_binomial))))
expect_equal(log_density_binomial, expected_log_density_binomial, tolerance = 1e-6)
# Test for gaussian family
log_density_gaussian <- get_glm_log_density("gaussian", X, Y_gaussian, coefs, weights)
lp_gaussian <- get_linear_predictor(X, coefs)
expected_log_density_gaussian <- weights * -0.5 * sum((log(2 * pi * max(mean((Y_gaussian - lp_gaussian)^2), 1e-5)) + 1))
expect_equal(log_density_gaussian, expected_log_density_gaussian, tolerance = 1e-6)
# Test for invalid family
expect_null(get_glm_log_density("invalid_family", X, Y_binomial, coefs))
})
test_that("get_glm_mean works correctly", {
X <- matrix(rnorm(20), ncol = 5)
coefs <- rnorm(6)
# Test for binomial family
mean_binomial <- get_glm_mean("binomial", X, coefs)
expected_binomial <- 1 / (1 + exp(-get_linear_predictor(X, coefs)))
expect_equal(mean_binomial, expected_binomial, tolerance = 1e-6)
# Test for gaussian family
mean_gaussian <- get_glm_mean("gaussian", X, coefs)
expected_gaussian <- get_linear_predictor(X, coefs)
expect_equal(mean_gaussian, expected_gaussian, tolerance = 1e-6)
# Test for invalid family
expect_null(get_glm_mean("invalid_family", X, coefs))
})
test_that("get_glm_family_string works correctly", {
# Test retrieving the family name
gaussian_family_name <- get_glm_family_string("gaussian")
expect_equal(gaussian_family_name, "gaussian")
# Test retrieving the family name with the link function
poisson_family_name_with_link <- get_glm_family_string("poisson", with_link = TRUE)
expect_equal(poisson_family_name_with_link, "poisson [log]")
# Test handling invalid family input
expect_error(get_glm_family_string("invalid_family"), "'family' must be a valid GLM family.")
expect_error(get_glm_family_string(function() "invalid"), "'family' must be a valid GLM family.")
})
test_that("validate_glm_input works correctly", {
# Mock the menu function to simulate user input
mock_menu <- function(...) {
return(2) # Simulating user input "NO" to terminate
}
# Assign the mock_menu to replace the original menu function
assign("menu", mock_menu, envir = globalenv())
# Test for valid inputs
expect_silent(
validate_glm_input(
formula = Y ~ X1 + X2,
cat_init = cat_init_glm_binomial,
tau = 0.1,
tau_seq = seq(0.1, 1, by = 0.1),
tau_0 = 0.1,
parametric_bootstrap_iteration_times = 100,
cross_validation_fold_num = 5,
risk_estimate_method = "parametric_bootstrap",
discrepancy_method = "logistic_deviance",
binomial_tau_lower = 0.01,
tau_alpha = 0.5,
tau_gamma = 0.5,
gibbs_iter = 1000,
gibbs_warmup = 500,
coefs_iter = 5,
gaussian_variance_alpha = NULL,
gaussian_variance_beta = NULL
)
)
expect_silent(validate_glm_input(
formula = Y ~ X1 + X2,
cat_init = cat_init_glm_gaussian,
tau = 0.1,
tau_seq = seq(0.1, 1, by = 0.1),
tau_0 = 0.1,
parametric_bootstrap_iteration_times = 100,
cross_validation_fold_num = 5,
risk_estimate_method = "parametric_bootstrap",
discrepancy_method = "mean_square_error",
binomial_tau_lower = 0.01,
tau_alpha = 0.5,
tau_gamma = 0.5
))
# Test for invalid formula with wrong response variance
expect_error(validate_glm_input(
formula = Y_wrong ~ .,
cat_init = cat_init_glm_gaussian
))
# Test for invalid cat_init object
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_cox
))
# Test for invalid tau (negative value)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
tau = -0.1
))
# Test for invalid tau (vectors)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
tau = c(1, 2)
))
# Test for invalid tau_seq (negative value)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
tau_seq = c(-1, 0, 1)
))
# Test for invalid tau_seq (single value)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
tau_seq = 1
))
# Test for invalid tau_0 (zero value)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
tau_0 = 0
))
# Test for invalid tau_0 (vector)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
tau_0 = c(1, 2)
))
# Test for invalid parametric_bootstrap_iteration_times (zero value)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
parametric_bootstrap_iteration_times = 0
))
# Test for invalid cross_validation_fold_num (zero value)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
cross_validation_fold_num = 0
))
# Test for invalid cross_validation_fold_num (greater than number of rows)
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
risk_estimate_method = "cross_validation",
discrepancy_method = "mean_square_error",
cross_validation_fold_num = 100
))
# Test for family == "gaussian" & risk_estimate_method == "steinian_estimate"
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
risk_estimate_method = "steinian_estimate",
discrepancy_method = "mean_square_error"
))
# Test for family == "binomial" & risk_estimate_method == "mallowian_estimate"
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
risk_estimate_method = "mallowian_estimate",
discrepancy_method = "logistic_deviance"
))
# Test for family == "gaussian" & discrepancy_method != "mean_square_error"
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
risk_estimate_method = "mallowian_estimate",
discrepancy_method = "logistic_deviance"
))
cat_init_glm_gaussian_large_size <- cat_init_glm_gaussian
cat_init_glm_gaussian_large_size$obs_size <- 2000
# Test for large data size when using parametric_bootstrap
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian_large_size,
risk_estimate_method = "parametric_bootstrap",
discrepancy_method = "mean_square_error"
))
cat_init_glm_binomial_large_size <- cat_init_glm_binomial
cat_init_glm_binomial_large_size$obs_size <- 2000
# Test for large data size when using steinian_estimate
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial_large_size,
risk_estimate_method = "steinian_estimate",
discrepancy_method = "logistic_deviance"
))
# Warn user when family == "binomial" & risk_estimate_method == "steinian_estimate" & discrepancy_method != "logistic_deviance"
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
risk_estimate_method = "steinian_estimate",
discrepancy_method = "mean_classification_error"
))
# Test for family == "binomial" & discrepancy_method == "mean_square_error"
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
risk_estimate_method = "steinian_estimate",
discrepancy_method = "mean_square_error"
))
# Test for binomial using gibbs function
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
gibbs_iter = 1000,
gibbs_warmup = 200,
coefs_iter = 5
))
# Test for gibbs_warmup > gibbs_iter
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
gibbs_iter = 100,
gibbs_warmup = 200,
coefs_iter = 5
))
# Test for zero gibbs_iter
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
gibbs_iter = 0,
gibbs_warmup = 200,
coefs_iter = 5
))
# Test for zero gibbs_warmup
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
gibbs_iter = 100,
gibbs_warmup = 0,
coefs_iter = 5
))
# Test for zero coefs_iter
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
gibbs_iter = 1000,
gibbs_warmup = 200,
coefs_iter = 0
))
# Test for large coefs_iter
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
gibbs_iter = 1000,
gibbs_warmup = 200,
coefs_iter = 50
))
# Test for zero gaussian_variance_alpha
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
gaussian_variance_alpha = 0
))
# Test for zero gaussian_variance_beta
expect_error(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
gaussian_variance_beta = 0
))
# Test for ignore gaussian_variance_alpha when gaussian_known_variance = TRUE
cat_init_glm_gaussian_known <- mock_cat_glm_gaussian_initialization(glm_gaussian_data, gaussian_known_variance = TRUE)
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian_known,
gaussian_variance_alpha = 1
))
# Test for ignore gaussian_variance_alpha when family is binomial
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
gaussian_variance_alpha = 1
))
# Test for ignore gaussian_variance_beta when gaussian_known_variance = TRUE
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian_known,
gaussian_variance_beta = 1
))
# Test for ignore gaussian_variance_beta when family is binomial
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
gaussian_variance_beta = 1
))
# Test for binomial features when family is gaussian
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
binomial_joint_theta = TRUE
))
# Test for binomial features when family is gaussian
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_gaussian,
binomial_joint_alpha = TRUE
))
# Test for binomial features when theta is not activated with alpha
expect_warning(validate_glm_input(
formula = Y ~ .,
cat_init = cat_init_glm_binomial,
binomial_joint_theta = FALSE,
binomial_joint_alpha = TRUE
))
})
test_that("validate_glm_initialization_input checks parameters correctly", {
# Test for valid inputs
expect_silent(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for complex formula
expect_error(validate_glm_initialization_input(
formula = Y ~ X1 + X2 + X3,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for wrong data format
expect_error(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = as.matrix(glm_gaussian_data),
syn_size = NULL,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for empty data
expect_error(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = data.frame(),
syn_size = NULL,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for lmm formula
expect_error(validate_glm_initialization_input(
formula = Y ~ 1 + (1 | X1),
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for cox formula
expect_error(validate_glm_initialization_input(
formula = survival::Surv(Y, X1) ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for zero synthetic data size
expect_error(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = 0,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for positive synthetic data size
expect_silent(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = 100,
custom_variance = NULL,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for custom_variance equal to zero
expect_error(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = 0,
gaussian_known_variance = TRUE,
x_degree = NULL
))
# Test for positive custom_variance
expect_silent(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = 1,
gaussian_known_variance = TRUE,
x_degree = NULL
))
# Test for positive custom_variance but FALSE gaussian_known_variance
expect_warning(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = 1,
gaussian_known_variance = FALSE,
x_degree = NULL
))
# Test for positive custom_variance, TRUE gaussian_known_variance but binomial distribution
expect_warning(validate_glm_initialization_input(
formula = Y ~ 1,
family = binomial,
data = glm_binomial_data,
syn_size = NULL,
custom_variance = 1,
gaussian_known_variance = TRUE,
x_degree = NULL
))
# Test for x_degree being zero
expect_error(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = 1,
gaussian_known_variance = TRUE,
x_degree = rep(0, 3)
))
# Test for x_degree being positive
expect_silent(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = 1,
gaussian_known_variance = TRUE,
x_degree = rep(1, 3)
))
# Test for x_degree being a single value
expect_error(validate_glm_initialization_input(
formula = Y ~ 1,
family = gaussian,
data = glm_gaussian_data,
syn_size = NULL,
custom_variance = 1,
gaussian_known_variance = TRUE,
x_degree = 1
))
})
# ----- COX -----
test_that("get_cox_risk_and_failure_sets works correctly", {
time_vector <- cox_data$time
status_vector <- cox_data$status
indicator_vector <- sort(unique(c(time_vector, time_vector + rexp(length(time_vector)))))
# Calculate risk and failure sets
sets <- get_cox_risk_and_failure_sets(time_vector, status_vector, indicator_vector)
# Test that the result is a list with two elements
expect_true(is.list(sets))
expect_equal(length(sets), 2)
# Test that the risk_set and failure_set are matrices
expect_true(is.matrix(sets$risk_set))
expect_true(is.matrix(sets$failure_set))
# Test that the dimensions of risk_set and failure_set match the input
expect_equal(dim(sets$risk_set), c(length(time_vector), length(indicator_vector)))
expect_equal(dim(sets$failure_set), c(length(time_vector), length(indicator_vector)))
# Test that failure_set has only one 1 per row where status = 1
expect_true(all(rowSums(sets$failure_set[status_vector == 1, ]) == 1))
# Test that failure_set is all zeros where status = 0
expect_true(all(sets$failure_set[status_vector == 0, ] == 0))
# Test that risk_set is all zeros or ones
expect_true(all(sets$risk_set %in% c(0, 1)))
})
test_that("get_cox_kappa works correctly", {
kappa <- get_cox_kappa(
X = matrix(rnorm(100), ncol = 5),
time = rexp(20),
status = rbinom(20, 1, 0.5),
hazard_constant = 0.02
)
# Test that kappa is a numeric value
expect_true(is.numeric(kappa))
# Test that kappa is a scalar
expect_equal(length(kappa), 1)
})
test_that("get_cox_partial_likelihood works correctly", {
partial_likelihood <- get_cox_partial_likelihood(
X = matrix(rnorm(100), ncol = 5),
time = rexp(20),
status = rbinom(20, 1, 0.5),
coefs = rnorm(5),
entry_points = rep(0, 20)
)
# Test that the partial likelihood is a numeric scalar
expect_true(is.numeric(partial_likelihood))
expect_length(partial_likelihood, 1)
})
test_that("get_cox_qr_solve works correctly", {
# Define valid input
hessian_matrix <- matrix(c(2, 1, 1, 2), ncol = 2)
gradient_vector <- c(5, 6)
# Test that the computed solution matches the expected solution
expect_equal(
get_cox_qr_solve(
hessian_matrix = hessian_matrix,
gradient_vector = gradient_vector
),
solve(hessian_matrix, gradient_vector)
)
# Test with matrix that might produce NA values
expect_equal(
get_cox_qr_solve(
hessian_matrix = matrix(c(1, 1, 1, 1), ncol = 2),
gradient_vector = c(1, 1)
),
c(0.0001, 0.0001)
)
# Test with another case that produces errors
expect_equal(
get_cox_qr_solve(
hessian_matrix = matrix(c(0, 0, 0, 0), ncol = 2),
gradient_vector = c(1, 1)
),
c(0.0001, 0.0001)
)
})
test_that("get_cox_gradient works correctly", {
X <- matrix(rnorm(15), ncol = 3)
time <- rexp(5, rate = 0.1)
status <- rbinom(5, 1, 0.5)
coefs <- rnorm(3)
entry_points <- rep(0, 5)
gradient <- get_cox_gradient(
X = X,
time = time,
status = status,
coefs = coefs,
entry_points = entry_points
)
expect_true(is.numeric(gradient))
expect_equal(length(gradient), ncol(X))
# Test with no events
expect_equal(get_cox_gradient(
X = X,
time = time,
status = rep(0, length(status)),
coefs = coefs,
entry_points = entry_points
), c(0, 0, 0))
})
test_that("get_cox_syn_gradient works correctly", {
X <- matrix(rnorm(15), ncol = 3)
time <- rexp(5, rate = 0.1)
coefs <- rnorm(3)
gradient <- get_cox_syn_gradient(
X = X,
time = time,
coefs = coefs,
hazard_constant = 0.5
)
# Test that the gradient is a numeric vector
expect_true(is.numeric(gradient))
# Test that the length of the gradient matches the number of columns in X
expect_equal(length(gradient), ncol(X))
})
test_that("get_cox_risk_set_idx works correctly", {
time_of_interest <- 5
entry_vector <- c(0, 0, 0, 0, 0)
time_vector <- c(3, 5, 8, 9, 10)
status_vector <- c(1, 1, 0, 1, 0)
risk_set_idx <- get_cox_risk_set_idx(
time_of_interest = time_of_interest,
entry_vector = entry_vector,
time_vector = time_vector,
status_vector = status_vector
)
# Test that the output is a vector
expect_true(is.vector(risk_set_idx))
# Test that all indices are within the range of the time_vector length
expect_true(all(risk_set_idx >= 1 & risk_set_idx <= length(time_vector)))
# Test that all subjects in the risk set entered before or at the time of interest
expect_true(all(time_of_interest >= entry_vector[risk_set_idx]))
# Test with a time_of_interest that is not in time_vector
risk_set_idx_new <- get_cox_risk_set_idx(
time_of_interest = max(time_vector) + 1,
entry_vector = entry_vector,
time_vector = time_vector,
status_vector = status_vector
)
expect_true(length(risk_set_idx_new) == 0)
})
test_that("get_cox_hessian works correctly", {
X <- matrix(rnorm(15), ncol = 3)
time <- rexp(5, rate = 0.1)
status <- rbinom(5, 1, 0.5)
coefs <- rnorm(3)
entry_points <- rep(0, 5)
hessian_matrix <- get_cox_hessian(
X = X,
time = time,
status = status,
coefs = coefs,
entry_points = entry_points
)
# Test if the computed Hessian is a matrix
expect_true(is.matrix(hessian_matrix))
# Test the dimensions of the Hessian matrix
expect_equal(dim(hessian_matrix), c(ncol(X), ncol(X)))
# Test that the Hessian is symmetric (should be if correctly calculated)
expect_true(all.equal(hessian_matrix, t(hessian_matrix), tolerance = 1e-8))
# Test with no events
hessian_matrix_no_event <- get_cox_hessian(
X = X,
time = time,
status = c(0, 0, 0, 0, 0),
coefs = coefs,
entry_points = entry_points
)
expect_equal(hessian_matrix_no_event, matrix(0, ncol = ncol(X), nrow = ncol(X)))
})
test_that("get_cox_syn_hessian works correctly", {
X <- matrix(rnorm(15), ncol = 3)
time <- rexp(5, rate = 0.1)
coefs <- rnorm(3)
hazard_constant <- 0.7
hessian_matrix <- get_cox_syn_hessian(
X = X,
time = time,
coefs = coefs,
hazard_constant = hazard_constant
)
# Test if the computed Hessian is a matrix
expect_true(is.matrix(hessian_matrix))
# Test the dimensions of the Hessian matrix
expect_equal(dim(hessian_matrix), c(ncol(X), ncol(X)))
# Test that the Hessian is symmetric (should be if correctly calculated)
expect_true(all.equal(hessian_matrix, t(hessian_matrix), tolerance = 1e-8))
# Test with no variation in time (all times the same)
hessian_matrix_no_var <- get_cox_syn_hessian(
X = X,
time = rep(1, nrow(X)),
coefs = coefs,
hazard_constant = hazard_constant
)
# Test that the Hessian is still a valid numeric matrix
expect_true(is.matrix(hessian_matrix_no_var))
expect_true(is.numeric(hessian_matrix_no_var))
expect_equal(dim(hessian_matrix_no_var), c(ncol(X), ncol(X)))
})
test_that("validate_cox_input checks parameters correctly", {
# Test for valid inputs
cat_init <- get_adjusted_cat_init(
cat_init = cat_init_cox,
formula_rhs = ~ X1 + X2
)
expect_silent(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tau = 1,
tau_seq = c(1, 2),
init_coefficients = rep(0, ncol(cat_init$adj_x)),
tol = 1e-3,
max_iter = 30,
cross_validation_fold_num = 2,
hazard_beta = 2,
tau_alpha = 2,
tau_gamma = 1
))
# Test for cat_init not generated from `cat_cox_initialization`
cat_init_invalid <- cat_init_lmm
cat_init_invalid$function_name <- "wrong_initialization"
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init_invalid
))
# Test for fomrula contain wrong time/status column name
expect_error(validate_cox_input(
formula = survival::Surv(time_wrong, status_wrong) ~ X1 + X2,
cat_init = cat_init
))
# Test for tau being negative
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tau = -1
))
# Test for tau being a vector
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tau = c(1, 2)
))
# Test for tau_seq being negative
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tau_seq = c(-1, 1)
))
# Test for tau_seq being a single value
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tau_seq = 1
))
# Test for init_coefficients being different length
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
init_coefficients = rep(0, ncol(cat_init$adj_x) + 1)
))
# Test for tol being 0
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tol = 0
))
# Test for max_iter being 0
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
max_iter = 0
))
# Test for cross_validation_fold_num being 0
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
cross_validation_fold_num = 0
))
# Test for cross_validation_fold_num being large
expect_warning(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
cross_validation_fold_num = 3
))
# Test for cross_validation_fold_num being too large
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
cross_validation_fold_num = 100
))
# Test for hazard_beta being 0
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
hazard_beta = 0
))
# Test for tau_alpha being 0
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tau_alpha = 0
))
# Test for tau_gamma being 0
expect_error(validate_cox_input(
formula = ~ X1 + X2,
cat_init = cat_init,
tau_gamma = 0
))
})
test_that("validate_cox_initialization_input checks parameters correctly", {
# Test for valid inputs
expect_silent(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for wrong data format
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = as.matrix(cox_data),
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for complex formula
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ X1 + X2 + X3,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for empty data
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = data.frame(),
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for lmm formula
expect_error(validate_cox_initialization_input(
formula = time ~ 1 + (1 | status),
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for glm formula
expect_error(validate_cox_initialization_input(
formula = time ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for zero synthetic data size
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = 0,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for positive synthetic data size
expect_silent(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = 100,
hazard_constant = NULL,
entry_points = NULL,
x_degree = NULL
))
# Test for hazard constant equal to zero
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = 0,
entry_points = NULL,
x_degree = NULL
))
# Test for positive hazard constant
expect_silent(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = 5,
entry_points = NULL,
x_degree = NULL
))
# Test for not aligned entry point
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = c(0, 0, 0, 0, 0),
x_degree = NULL
))
# Test for right length of entry point
expect_silent(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = rep(0, nrow(cox_data)),
x_degree = NULL
))
# Test for negative entry point
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = rep(-1, nrow(cox_data)),
x_degree = NULL
))
# Test for x_degree being zero
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = rep(0, 3)
))
# Test for x_degree being positive
expect_silent(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = rep(1, 3)
))
# Test for x_degree being a single value
expect_error(validate_cox_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = cox_data,
syn_size = NULL,
hazard_constant = NULL,
entry_points = NULL,
x_degree = 1
))
})
# ----- LMM -----
test_that("update_lmm_variance produces expected results for edge cases", {
# Edge case with very small variances
result_small_variance <- update_lmm_variance(
residual_variance = 1e-10,
random_effect_variance = 1e-10,
obs_z_eigenvalues = c(1, 2, 3),
syn_z_eigenvalues = c(1, 2, 3),
obs_adjusted_residuals = c(1, 2, 3),
syn_adjusted_residuals = c(1, 2, 3),
tau = 1
)
expect_true(is.finite(result_small_variance))
# Check monotonicity with varying tau
result_low_tau <- update_lmm_variance(1, 1, c(1, 1.5, 2), c(1, 1.5, 2), c(1, 2, 3), c(1, 2, 3), 1)
result_high_tau <- update_lmm_variance(1, 1, c(1, 1.5, 2), c(1, 1.5, 2), c(1, 2, 3), c(1, 2, 3), 10)
expect_true(result_low_tau < result_high_tau) # Higher tau should increase likelihood
# Symmetry check with swapped eigenvalues with tau equal to length of the eigenvalues
result_obs_syn_swapped <- update_lmm_variance(0.5, 1.2, c(1, 2, 2.5), c(1, 2, 3), c(1, 2, 3), c(1, 2, 3), 3)
result_syn_obs_swapped <- update_lmm_variance(0.5, 1.2, c(1, 2, 3), c(1, 2, 2.5), c(1, 2, 3), c(1, 2, 3), 3)
expect_equal(result_obs_syn_swapped, result_syn_obs_swapped)
})
test_that("validate_lmm_input checks parameters correctly", {
# Test for valid inputs
expect_silent(validate_lmm_input(
cat_init = cat_init_lmm,
tau = 1,
residual_variance_0 = 1,
random_effect_variance_0 = 1,
coefs_0 = rep(1, ncol(cat_init_lmm$obs_x)),
optimize_domain = c(0, 10),
max_iter = 100,
tol = 0.001,
tau_seq = c(0, 1),
cross_validation_fold_num = 5
))
# Test for cat_init not generated from `cat_lmm_initialization`
cat_init_invalid <- cat_init_lmm
cat_init_invalid$function_name <- "wrong_initialization"
expect_error(validate_lmm_input(
cat_init = cat_init_invalid
))
# Test for tau being negative
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
tau = -1
))
# Test for tau being a vector
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
tau = c(1, 2)
))
# Test for residual_variance_0 being zero
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
residual_variance_0 = 0
))
# Test for random_effect_variance_0 being zero
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
random_effect_variance_0 = 0
))
# Test for coefs_0 being too short for observation x
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
coefs_0 = c(1)
))
# Test for coefs_0 being too long for observation x
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
coefs_0 = rep(1, 5)
))
# Test for optimize_domain being negative
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
optimize_domain = c(-1, 0)
))
# Test for optimize_domain being a single value
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
optimize_domain = 1
))
# Test for optimize_domain being too long
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
optimize_domain = c(1, 2, 3)
))
# Test for max_iter being too zero
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
max_iter = 0
))
# Test for max_iter being a vector
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
max_iter = c(1, 2)
))
# Test for tol being zero
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
tol = 0
))
# Test for tol being a vector
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
tol = c(1, 2)
))
# Test for tau_seq being negative
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
tau_seq = c(-1, 1)
))
# Test for tau_seq being a single value
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
tau_seq = 1
))
# Test for cross_validation_fold_num being less than 2
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
cross_validation_fold_num = 1
))
# Test for cross_validation_fold_num being larger than observation data size
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
cross_validation_fold_num = 100
))
# Test for cross_validation_fold_num being a vector
expect_error(validate_lmm_input(
cat_init = cat_init_lmm,
cross_validation_fold_num = c(1, 2)
))
})
test_that("validate_lmm_initialization_input checks parameters correctly", {
# Test for valid inputs
expect_silent(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for wrong data format
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = as.matrix(lmm_data),
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for empty data
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = data.frame(),
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for lmm formula
expect_silent(validate_lmm_initialization_input(
formula = Y ~ X1 + (1 | group),
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for surv formula
expect_error(validate_lmm_initialization_input(
formula = survival::Surv(time, status) ~ 1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for lack of x_cols
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for lack of z_cols
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2"),
group_col = "group",
syn_size = NULL
))
# Test for wrong x_cols
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3", "X4"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for wrong z_cols
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3", "Z4"),
group_col = "group",
syn_size = NULL
))
# Test for y_col being vector
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2"),
y_col = c("Y", "X3"),
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for y_col being wrong
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y1",
z_cols = c("Z1", "Z2", "Z3"),
group_col = "group",
syn_size = NULL
))
# Test for group_col being a vector
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2"),
group_col = c("group", "Z3"),
syn_size = NULL
))
# Test for group_col being wrong
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = c("group1"),
syn_size = NULL
))
# Test for syn_size is 0
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = c("group"),
syn_size = 0
))
# Test for syn_size is positive and large enough
expect_silent(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = c("group"),
syn_size = 100
))
# Test for syn_size is zero
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = c("group"),
syn_size = 0
))
# Test for syn_size is negative
expect_error(validate_lmm_initialization_input(
formula = Y ~ X1,
data = lmm_data,
x_cols = c("X1", "X2", "X3"),
y_col = "Y",
z_cols = c("Z1", "Z2", "Z3"),
group_col = c("group"),
syn_size = -1
))
})
# ----- General -----
test_that("validate_positive function works as expected", {
# Test for a positive scalar input, should not raise an error
expect_silent(validate_positive("param1", 5))
# Test for a zero scalar input, incl_0 = TRUE, should not raise an error
expect_silent(validate_positive("param1", 0, incl_0 = TRUE))
# Test for a positive vector input, is_vector = TRUE, should not raise an error
expect_silent(validate_positive("param1", c(2, 3, 4), is_vector = TRUE))
# Test for a vector with zero values, incl_0 = TRUE, should not raise an error
expect_silent(validate_positive("param1", c(0, 2, 3), incl_0 = TRUE, is_vector = TRUE))
# Test for a negative scalar input, expect an error
expect_error(
validate_positive("param1", -5),
"The provided `param1` should be a positive number."
)
# Test for a zero scalar input without incl_0 = TRUE, expect an error
expect_error(
validate_positive("param1", 0),
"The provided `param1` should be a positive number."
)
# Test for a negative value in vector input, is_vector = TRUE, expect an error
expect_error(
validate_positive("param1", c(-1, 2, 3), is_vector = TRUE),
"The provided `param1` should be a vector of positive numbers."
)
# Test for a non-vector input when is_vector = TRUE, expect an error
expect_error(
validate_positive("param1", 5, is_vector = TRUE),
"The provided `param1` should be a vector of positive numbers."
)
# Test for a single element when is_vector = TRUE and incl_0 = TRUE, expect an error if length is 1
expect_error(
validate_positive("param1", 0, incl_0 = TRUE, is_vector = TRUE),
"The provided `param1` should be a vector of non-negative numbers."
)
})
test_that("get_stan_model loads the correct Stan model file", {
# Test for default glm type with gaussian family with no additional options
stan_model <- get_stan_model(type = "glm", glm_family_string = "gaussian")
expect_s4_class(stan_model, "stanmodel") # Check that it returns a Stan model
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian") # Check filename pattern
# Test for Cox model type
stan_model <- get_stan_model(type = "cox")
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "cox")
# Test fwith `joint_tau = TRUE` or Cox model type
stan_model <- get_stan_model(type = "cox", joint_tau = TRUE)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "cox_joint")
# Test with `glm_family_string` = "binomial"
stan_model <- get_stan_model(type = "glm", glm_family_string = "binomial")
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial")
# Test with `joint_tau = TRUE` (and binomial family to test combined settings)
stan_model <- get_stan_model(type = "glm", glm_family_string = "binomial", joint_tau = TRUE)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint")
# Test for `glm_gaussian_known_variance = TRUE` with gaussian family
stan_model <- get_stan_model(type = "glm", glm_family_string = "gaussian", glm_gaussian_known_variance = TRUE)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian_known_variance")
# Test for `glm_gaussian_known_variance = TRUE` and `joint_tau = TRUE` with gaussian family
stan_model <- get_stan_model(
type = "glm", glm_family_string = "gaussian", joint_tau = TRUE,
glm_gaussian_known_variance = TRUE
)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian_joint_known_variance")
# Test for joint_tau = TRUE` with gaussian family
stan_model <- get_stan_model(
type = "glm", glm_family_string = "gaussian", joint_tau = TRUE
)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_gaussian_joint")
# Test with multiple options set: `glm_binomial_joint_theta` and `glm_binomial_joint_alpha`
stan_model <- get_stan_model(
type = "glm",
glm_family_string = "binomial",
glm_binomial_joint_theta = TRUE,
glm_binomial_joint_alpha = TRUE
)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint_theta_alpha")
# Test with `glm_binomial_joint_theta`
stan_model <- get_stan_model(
type = "glm",
glm_family_string = "binomial",
glm_binomial_joint_theta = TRUE
)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint_theta")
# Test with `glm_binomial_joint_alpha`
stan_model <- get_stan_model(
type = "glm",
glm_family_string = "binomial",
glm_binomial_joint_alpha = TRUE
)
expect_s4_class(stan_model, "stanmodel")
expect_equal(attr(stan_model@model_code, "model_name2"), "glm_binomial_joint")
# Edge case: invalid `glm_family_string`
expect_error(get_stan_model(type = "glm", glm_family_string = "invalid_family"))
})
test_that("get_linear_predictor works correctly", {
# Test with no intercept
X <- matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
coefs <- c(0.5, 1.5)
expect_equal(
get_linear_predictor(X, coefs),
X %*% coefs
)
# Test with intercept
coefs_with_intercept <- c(1, 0.5, 1.5)
expect_equal(
get_linear_predictor(X, coefs_with_intercept),
coefs_with_intercept[1] + X %*% coefs_with_intercept[-1]
)
# Test with dimension mismatch
expect_error(get_linear_predictor(X, c(0.5)))
# Test with minimal input
X_small <- matrix(1, ncol = 1)
coefs_small <- c(1)
expect_equal(
get_linear_predictor(X_small, coefs_small),
X_small %*% coefs_small
)
# Test with intercept and minimal input
coefs_small_with_intercept <- c(1, 2)
expect_equal(
get_linear_predictor(X_small, coefs_small_with_intercept),
coefs_small_with_intercept[1] + X_small %*% coefs_small_with_intercept[2]
)
})
test_that("get_discrepancy computes in consistence", {
X <- matrix(rnorm(15), ncol = 3)
coefs <- rnorm(4)
binomial_Y <- rbinom(5, 1, 0.5)
gaussian_Y <- rnorm(5)
lp <- coefs[1] + c(X %*% coefs[-1])
binomial_est_Y <- 1 / (1 + exp(-lp))
gaussian_est_Y <- lp
# Test for logarithmic error
expect_equal(
get_discrepancy(
discrepancy_method = "mean_logarithmic_error",
family_string = "binomial",
X = X,
Y = binomial_Y,
coefs = coefs
),
get_discrepancy(
discrepancy_method = "mean_logarithmic_error",
family_string = "binomial",
X = X,
Y = binomial_Y,
est_Y = binomial_est_Y
)
)
# Test for mean square error
expect_equal(
get_discrepancy(
discrepancy_method = "mean_square_error",
family_string = "gaussian",
X = X,
Y = gaussian_Y,
coefs = coefs
),
get_discrepancy(
discrepancy_method = "mean_square_error",
family_string = "gaussian",
X = X,
Y = gaussian_Y,
est_Y = gaussian_est_Y
)
)
# Test for mean classification error
expect_equal(
get_discrepancy(
discrepancy_method = "mean_classification_error",
family_string = "binomial",
X = X,
Y = binomial_Y,
coefs = coefs
),
get_discrepancy(
discrepancy_method = "mean_classification_error",
family_string = "binomial",
X = X,
Y = binomial_Y,
est_Y = binomial_est_Y
)
)
# Test for logistic deviance with no coefs input
expect_error(
get_discrepancy(
discrepancy_method = "logistic_deviance",
family_string = "binomial",
X = X,
Y = binomial_Y,
est_Y = binomial_est_Y
)
)
# Test for logistic deviance with coefs input
expect_silent(
get_discrepancy(
discrepancy_method = "logistic_deviance",
family_string = "binomial",
X = X,
Y = binomial_Y,
coefs = coefs
)
)
# Test for logistic deviance with different coefs input
expect_false(
get_discrepancy(
discrepancy_method = "logistic_deviance",
family_string = "binomial",
X = X,
Y = binomial_Y,
coefs = coefs
) == get_discrepancy(
discrepancy_method = "logistic_deviance",
family_string = "binomial",
X = X,
Y = binomial_Y,
coefs = rnorm(4)
)
)
})
test_that("get_adjusted_cat_init adjusts data correctly", {
formula_rhs_0 <- ~1
formula_rhs_1 <- ~X1
formula_rhs_2 <- ~ X1 + X2
formula_rhs_3 <- ~.
# Adjust the data
adjusted_cat_init_0 <- get_adjusted_cat_init(cat_init_glm_gaussian, formula_rhs_0)
adjusted_cat_init_1 <- get_adjusted_cat_init(cat_init_glm_gaussian, formula_rhs_1)
adjusted_cat_init_2 <- get_adjusted_cat_init(cat_init_glm_binomial, formula_rhs_2)
adjusted_cat_init_3 <- get_adjusted_cat_init(cat_init_cox, formula_rhs_3)
# Check that the adjusted data frame has the expected structure
expect_s3_class(adjusted_cat_init_0$adj_x, "data.frame")
expect_s3_class(adjusted_cat_init_1$adj_x, "data.frame")
expect_s3_class(adjusted_cat_init_2$adj_x, "data.frame")
expect_s3_class(adjusted_cat_init_3$adj_x, "data.frame")
# Validate sizes and contents of adjusted observed and synthetic parts
expect_equal(nrow(adjusted_cat_init_0$adj_obs_x), adjusted_cat_init_0$obs_size)
expect_equal(nrow(adjusted_cat_init_0$adj_syn_x), adjusted_cat_init_0$size - adjusted_cat_init_0$obs_size)
expect_equal(nrow(adjusted_cat_init_1$adj_obs_x), adjusted_cat_init_1$obs_size)
expect_equal(nrow(adjusted_cat_init_1$adj_syn_x), adjusted_cat_init_1$size - adjusted_cat_init_1$obs_size)
expect_equal(nrow(adjusted_cat_init_2$adj_obs_x), adjusted_cat_init_2$obs_size)
expect_equal(nrow(adjusted_cat_init_2$adj_syn_x), adjusted_cat_init_2$size - adjusted_cat_init_2$obs_size)
expect_equal(nrow(adjusted_cat_init_3$adj_obs_x), adjusted_cat_init_3$obs_size)
expect_equal(nrow(adjusted_cat_init_3$adj_syn_x), adjusted_cat_init_3$size - adjusted_cat_init_3$obs_size)
# Verify that the adjusted data contains the expected columns based on the formulas
expect_equal(ncol(adjusted_cat_init_0$adj_x), 0)
expect_true(all(c("X1") %in% colnames(adjusted_cat_init_1$adj_x)))
expect_true(all(c("X1", "X2") %in% colnames(adjusted_cat_init_2$adj_x)))
expect_true(all(c("X1", "X2", "X3") %in% colnames(adjusted_cat_init_3$adj_x)))
expect_equal(ncol(adjusted_cat_init_0$adj_obs_x), 0)
expect_true(all(c("X1") %in% colnames(adjusted_cat_init_1$adj_obs_x)))
expect_true(all(c("X1", "X2") %in% colnames(adjusted_cat_init_2$adj_obs_x)))
expect_true(all(c("X1", "X2", "X3") %in% colnames(adjusted_cat_init_3$adj_obs_x)))
expect_equal(ncol(adjusted_cat_init_0$adj_syn_x), 0)
expect_true(all(c("X1") %in% colnames(adjusted_cat_init_1$adj_syn_x)))
expect_true(all(c("X1", "X2") %in% colnames(adjusted_cat_init_2$adj_syn_x)))
expect_true(all(c("X1", "X2", "X3") %in% colnames(adjusted_cat_init_3$adj_syn_x)))
})
test_that("get_resampled_df works correctly for various cases", {
mock_data <- data.frame(
X1 = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1000), # Continuous variable (Test for Deskewing)
X2 = c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1), # Categorical variable (Test for Flattening)
X3 = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4), # Another continuous variable (Test for Smoothing)
X4 = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5) # Another continuous variable (Test for no action)
)
data_degree <- c(1, 1, 3, 1)
# Test basic resampling functionality
resample_size <- 50
result <- get_resampled_df(
data = mock_data,
resample_size = resample_size,
data_degree = data_degree
)
expect_false(setequal(unique(result$resampled_df[, 1]), unique(mock_data[, 1])))
expect_true(setequal(unique(result$resampled_df[, 2]), unique(mock_data[, 2])))
expect_false(setequal(unique(result$resampled_df[, 3]), unique(mock_data[, 3])))
expect_true(setequal(unique(result$resampled_df[, 4]), unique(mock_data[, 4])))
# Validate that each column are identified with correct type
expect_equal(result$resampled_df_log$Type[1], "Continuous")
expect_equal(result$resampled_df_log$Type[2], "Category")
expect_equal(result$resampled_df_log$Type[3], "Continuous")
expect_equal(result$resampled_df_log$Type[4], "Continuous")
# Validate that each column is processed according to its type
expect_true(grepl("Deskewing", result$resampled_df_log$Process[1]))
expect_false(grepl("Flattening", result$resampled_df_log$Process[1]))
expect_false(grepl("Smoothing", result$resampled_df_log$Process[1]))
expect_false(grepl("Deskewing", result$resampled_df_log$Process[2]))
expect_true(grepl("Flattening", result$resampled_df_log$Process[2]))
expect_false(grepl("Smoothing", result$resampled_df_log$Process[2]))
expect_false(grepl("Deskewing", result$resampled_df_log$Process[3]))
expect_false(grepl("Flattening", result$resampled_df_log$Process[3]))
expect_true(grepl("Smoothing", result$resampled_df_log$Process[3]))
expect_false(grepl("Deskewing", result$resampled_df_log$Process[4]))
expect_false(grepl("Flattening", result$resampled_df_log$Process[4]))
expect_false(grepl("Smoothing", result$resampled_df_log$Process[4]))
# Check that the resampled data frame has the correct dimensions
expect_equal(nrow(result$resampled_df), resample_size)
expect_equal(ncol(result$resampled_df), ncol(mock_data))
# Check that the resampled log is created correctly
expect_true(!is.null(result$resampled_df_log))
expect_equal(nrow(result$resampled_df_log), ncol(mock_data))
# Validate that resampling processes are logged correctly
expect_true(all(c("Covariate", "Type", "Process") %in% colnames(result$resampled_df_log)))
# Test with resample_only = TRUE
result_resample_only <- get_resampled_df(
data = mock_data,
resample_size = resample_size,
resample_only = TRUE
)
expect_true(setequal(unique(result_resample_only$resampled_df[, 1]), unique(mock_data[, 1])))
expect_true(setequal(unique(result_resample_only$resampled_df[, 2]), unique(mock_data[, 2])))
expect_true(setequal(unique(result_resample_only$resampled_df[, 3]), unique(mock_data[, 3])))
expect_true(setequal(unique(result_resample_only$resampled_df[, 4]), unique(mock_data[, 4])))
expect_null(result_resample_only$resampled_df_log)
# Test without data_degree parameter
result_without_degree <- get_resampled_df(
data = mock_data,
resample_size = resample_size,
data_degree = NULL
)
# Check if processes are logged with degree
expect_false(is.null(result_without_degree$resampled_df_log))
# Validate that each column is processed according to its type
expect_true(grepl("Deskewing", result_without_degree$resampled_df_log$Process[1]))
expect_false(grepl("Flattening", result_without_degree$resampled_df_log$Process[1]))
expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[1]))
expect_false(grepl("Deskewing", result_without_degree$resampled_df_log$Process[2]))
expect_true(grepl("Flattening", result_without_degree$resampled_df_log$Process[2]))
expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[2]))
expect_false(grepl("Deskewing", result_without_degree$resampled_df_log$Process[3]))
expect_false(grepl("Flattening", result_without_degree$resampled_df_log$Process[3]))
expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[3]))
expect_false(grepl("Deskewing", result_without_degree$resampled_df_log$Process[4]))
expect_false(grepl("Flattening", result_without_degree$resampled_df_log$Process[4]))
expect_false(grepl("Smoothing", result_without_degree$resampled_df_log$Process[4]))
})
test_that("get_formula_string works correctly", {
# Test with a simple formula
formula <- y ~ x1 + x2
expected_string <- "y ~ x1 + x2"
expect_equal(get_formula_string(formula), expected_string)
# Test with an interaction term
formula_interaction <- y ~ x1 * x2
expected_string_interaction <- "y ~ x1 * x2"
expect_equal(get_formula_string(formula_interaction), expected_string_interaction)
# Test with a more complex formula
formula_complex <- y ~ x1 + x2 + I(x1^2) + log(x2)
expected_string_complex <- "y ~ x1 + x2 + I(x1^2) + log(x2)"
expect_equal(get_formula_string(formula_complex), expected_string_complex)
})
test_that("get_formula_lhs works correctly", {
# Test with a simple formula
formula <- y ~ x1 + x2
expected_lhs <- "y"
expect_equal(get_formula_lhs(formula), expected_lhs)
# Test with an interaction term
formula_interaction <- y ~ x1 * x2
expected_lhs_interaction <- "y"
expect_equal(get_formula_lhs(formula_interaction), expected_lhs_interaction)
# Test with a more complex formula
formula_complex <- y ~ x1 + x2 + I(x1^2) + log(x2)
expected_lhs_complex <- "y"
expect_equal(get_formula_lhs(formula_complex), expected_lhs_complex)
})
test_that("get_formula_rhs works correctly", {
# Test with a simple formula
formula <- y ~ x1 + x2
expected_rhs <- "x1 + x2"
expect_equal(get_formula_rhs(formula), expected_rhs)
# Test with tilde included
expected_rhs_with_tilde <- "~ x1 + x2"
expect_equal(get_formula_rhs(formula, with_tilde = TRUE), expected_rhs_with_tilde)
# Test with interaction term
formula_interaction <- y ~ x1 * x2
expected_rhs_interaction <- "x1 * x2"
expect_equal(get_formula_rhs(formula_interaction), expected_rhs_interaction)
# Test with a more complex formula
formula_complex <- y ~ x1 + x2 + I(x1^2) + log(x2)
expected_rhs_complex <- "x1 + x2 + I(x1^2) + log(x2)"
expect_equal(get_formula_rhs(formula_complex), expected_rhs_complex)
# Test with tilde for complex formula
expected_rhs_complex_with_tilde <- "~ x1 + x2 + I(x1^2) + log(x2)"
expect_equal(get_formula_rhs(formula_complex, with_tilde = TRUE), expected_rhs_complex_with_tilde)
})
test_that("is.continuous works correctly", {
# Test with continuous numeric vector
continuous_vector <- c(1, 2, 3, 4, 5)
expect_true(is.continuous(continuous_vector))
# Test with categorical vector
categorical_vector <- c("A", "B", "A", "B", "C")
expect_false(is.continuous(categorical_vector))
# Test with binary numeric vector
binary_vector <- c(0, 1, 1, 0, 1)
expect_false(is.continuous(binary_vector))
# Test with a numeric vector with only two unique values
two_unique_vector <- c(1, 1, 2, 2, 1)
expect_false(is.continuous(two_unique_vector))
# Test with numeric vector with more than two unique values
mixed_numeric_vector <- c(1, 2, 3, 4, 5, 6)
expect_true(is.continuous(mixed_numeric_vector))
})
test_that("get_standardized_data works correctly", {
# Test with numeric and factor columns
data <- data.frame(
numeric_col = c(2, 3, NA, 4),
factor_col = factor(c("A", "B", "A", NA)),
stringsAsFactors = FALSE
)
# Test with numeric NA replacement
standardized_data <- get_standardized_data(data, na_replace = 0)
expect_equal(standardized_data$numeric_col, c(2, 3, 0, 4))
expect_equal(standardized_data$factor_col, factor(c("A", "B", "A", "A")))
# Test with mode replacement for factors
standardized_data <- get_standardized_data(data, na_replace = mean)
expect_equal(standardized_data$numeric_col, c(2, 3, 3, 4))
# Test with NA omission
data_with_na_omit <- get_standardized_data(data, na_replace = stats::na.omit)
expect_equal(nrow(data_with_na_omit), 2)
expect_equal(ncol(data_with_na_omit), 2)
# Test with no NA values
data_no_na <- data.frame(
numeric_col = c(1, 2, 4),
factor_col = factor(c("A", "B", "A")),
stringsAsFactors = FALSE
)
standardized_data_no_na <- get_standardized_data(data_no_na)
expect_equal(standardized_data_no_na, data_no_na)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.