Nothing
#' Test and compare the resulting `poLCA.posterior()` function
#'
#' Test and compare the resulting `poLCAParallel::poLCA.posterior()` with the
#' original `poLCA::poLCA.posterior()`. They should produce the same results
#'
#' Also tests if the resulting `poLCAParallel::poLCA.posterior()` are valid
#' probabilities and finite
#'
#' It is possible the original `poLCA::poLCA.posterior()` may produce NaN or
#' Inf. They are ignored in the comparison
#'
#' @param posterior_parallel Return value of `poLCAParallel::poLCA.posterior()`
#' @param posterior_polca Return value of `poLCA::poLCA.posterior()`
test_posterior <- function(posterior_parallel,
posterior_polca) {
expect_identical(all(is.finite(posterior_parallel)), TRUE)
test_cluster_probs(
posterior_parallel,
nrow(posterior_parallel), ncol(posterior_parallel)
)
# only do a comparison test if the original code produce finite results
if (all(is.finite(posterior_polca))) {
expect_equal(posterior_parallel, posterior_polca)
}
}
#' Test the function `poLCA.posterior()` for the non-regression problem
#'
#' Test the function `poLCA.posterior()` for the non-regression problem. The
#' model is fitted on data and then used to work out the posterior for the
#' training data, unseen no-missing test data and unseen with-missing test data.
#' The test compares the results with the original poLCA code
#'
#' @param n_data Number of data points
#' @param n_outcomes Vector of integers, number of outcomes for each category
#' @param n_cluster Number of clusters fitted
#' @param n_rep Number of different initial values to try
#' @param na_rm Logical, if to remove NA responses
#' @param n_thread Number of threads to use
#' @param maxiter Number of iterations used in the EM algorithm
#' @param tol Tolerance used in the EM algorithm
#' @param prob_na_train Probability of missing data in the training data
#' @param n_data_test Number of data points in the unseen test data
#' @param prob_na_test Probability of missing data in the unseen test data
test_non_regress_posterior <- function(n_data, n_outcomes, n_cluster, n_rep,
na_rm, n_thread, maxiter, tol,
prob_na_train, n_data_test,
prob_na_test) {
responses <- as.data.frame(
random_response(n_data, n_outcomes, prob_na_train, NaN)
)
formula <- get_non_regression_formula(responses)
lc <- poLCAParallel::poLCA(formula, responses, n_cluster,
maxiter = maxiter, tol = tol, na.rm = na_rm, nrep = n_rep,
verbose = FALSE, n.thread = n_thread
)
# using training data
test_posterior(
poLCAParallel::poLCA.posterior(lc, lc$y),
poLCA::poLCA.posterior(lc, lc$y)
)
# fully observed data
responses <- random_response(n_data_test, n_outcomes, 0, NaN)
test_posterior(
poLCAParallel::poLCA.posterior(lc, responses),
poLCA::poLCA.posterior(lc, responses)
)
# partially observed data
responses <- random_response(n_data_test, n_outcomes, prob_na_test, NaN)
test_posterior(
poLCAParallel::poLCA.posterior(lc, responses),
poLCA::poLCA.posterior(lc, responses)
)
}
#' Test the function `poLCA.posterior()` for the regression problem
#'
#' Test the function `poLCA.posterior()` for the non-regression problem. The
#' model is fitted on data and then used to work out the posterior for the
#' training data, unseen no-missing test data and unseen with-missing test data.
#' The test compares the results with the original poLCA code
#'
#' @param n_data Number of data points
#' @param n_feature Number of features
#' @param n_outcomes Vector of integers, number of outcomes for each category
#' @param n_cluster Number of clusters fitted
#' @param n_rep Number of different initial values to try
#' @param na_rm Logical, if to remove NA responses
#' @param n_thread Number of threads to use
#' @param maxiter Number of iterations used in the EM algorithm
#' @param tol Tolerance used in the EM algorithm
#' @param prob_na_train Probability of missing data in the training data
#' @param n_data_test Number of data points in the unseen test data
#' @param prob_na_test Probability of missing data in the unseen test data
test_regress_posterior <- function(n_data, n_feature, n_outcomes, n_cluster,
n_rep, na_rm, n_thread, maxiter, tol,
prob_na_train, n_data_test, prob_na_test) {
features <- random_features(n_data, n_feature)
responses <- random_response(n_data, n_outcomes, prob_na_train, NaN)
formula <- get_regression_formula(responses, features)
data <- cbind(responses, features)
lc <- poLCAParallel::poLCA(formula, data, n_cluster,
maxiter = maxiter, tol = tol, na.rm = na_rm, nrep = n_rep,
verbose = FALSE, n.thread = n_thread
)
# using training data
test_posterior(
poLCAParallel::poLCA.posterior(lc, lc$y),
poLCA::poLCA.posterior(lc, lc$y)
)
# fully observed data
responses <- random_response(n_data_test, n_outcomes, 0, NaN)
test_posterior(
poLCAParallel::poLCA.posterior(lc, responses),
poLCA::poLCA.posterior(lc, responses)
)
# partially observed data
responses <- random_response(n_data_test, n_outcomes, prob_na_test, NaN)
test_posterior(
poLCAParallel::poLCA.posterior(lc, responses),
poLCA::poLCA.posterior(lc, responses)
)
}
test_that("non-regression-full-data", {
# test using na_rm = TRUE and FALSE
set.seed(-1381922797)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_non_regress_posterior(
100,
c(2, 3, 5, 2, 2),
3,
4,
TRUE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0,
50,
0.01
))
}
set.seed(481136649)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_non_regress_posterior(
100,
c(2, 3, 5, 2, 2),
3,
4,
FALSE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0,
50,
0.01
))
}
})
test_that("non-regression-missing-data", {
# test using na_rm = TRUE and FALSE
set.seed(1210610989)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_non_regress_posterior(
100,
c(2, 3, 5, 2, 2),
3,
4,
TRUE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0.1,
50,
0.01
))
}
set.seed(1304862690)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_non_regress_posterior(
100,
c(2, 3, 5, 2, 2),
3,
4,
FALSE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0.1,
50,
0.01
))
}
})
test_that("regression-full-data", {
# test using na_rm = TRUE and FALSE
set.seed(-1529442620)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_regress_posterior(
100,
4,
c(2, 3, 5, 2, 2),
3,
4,
TRUE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0,
50,
0.01
))
}
set.seed(81779870)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_regress_posterior(
100,
4,
c(2, 3, 5, 2, 2),
3,
4,
FALSE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0,
50,
0.01
))
}
})
test_that("regression-missing-data", {
# test using na_rm = TRUE and FALSE
set.seed(-1396271961)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_regress_posterior(
100,
4,
c(2, 3, 5, 2, 2),
3,
4,
TRUE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0.1,
50,
0.01
))
}
set.seed(63195066)
seeds <- sample.int(.Machine$integer.max, N_REPEAT)
for (i in seq_len(N_REPEAT)) {
set.seed(seeds[i])
expect_no_error(test_regress_posterior(
100,
4,
c(2, 3, 5, 2, 2),
3,
4,
FALSE,
N_THREAD,
DEFAULT_MAXITER,
DEFAULT_TOL,
0.1,
50,
0.01
))
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.