context("shc constructor method")
test_that("shc accepts only matrix data input", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## check constructor works on original data
expect_is(shc(dm), "shc")
## check constructor fails with data.frame
err_df <- "x must be a matrix; use as.matrix if necessary"
expect_error(shc(data.frame(dm)), err_df)
})
test_that("shc accepts common clustering algorithm parameters", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## list of valid parameter settings to test
valid_params <- list(
list(metric="euclidean", linkage="ward.D2"),
list(metric="euclidean", linkage="ward"),
list(metric="euclidean", linkage="average"),
list(metric="euclidean", linkage="complete"),
list(metric="euclidean", linkage="single"),
list(metric="manhattan", linkage="average"),
list(metric="cor", linkage="average")
)
## check common hclust methods are accepted
for (p in valid_params) {
out <- shc(dm, n_sim=5, n_min=30, metric=p$metric, linkage=p$linkage)
if (p$metric == "cor") {
hc <- hclust(as.dist(1 - cor(t(dm))), method=p$linkage)
} else {
hc <- hclust(dist(dm, method=p$metric), method=p$linkage)
}
## check class and clustering results
expect_is(out, "shc")
expect_equal(out$hc_dat$height, hc$height)
}
## check minkowski distance separately
out_d <- shc(dm, n_sim=5, n_min=30, metric="minkowski", linkage="average")
out_3 <- shc(dm, n_sim=5, n_min=30, metric="minkowski", linkage="average", l=3)
hc_d <- hclust(dist(dm, method="minkowski"), method="average")
hc_3 <- hclust(dist(dm, method="minkowski", p=3), method="average")
expect_is(out_d, "shc")
expect_equal(out_d$hc_dat$height, hc_d$height)
expect_is(out_3, "shc")
expect_equal(out_3$hc_dat$height, hc_3$height)
})
test_that("shc accepts alpha values between 0 and 1", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## check still valid, but nothing significant
out <- shc(dm, alpha = 0)
expect_equal(sum(out$nd_type == "sig"), 0)
## check still valid, but only 1 test
out <- shc(dm, alpha = 1e-200)
expect_equal(sum(out$nd_type == "not_sig"), 1)
## check still valid, but nothing tested
out <- shc(dm, alpha = 1)
expect_equal(sum(out$nd_type == "not_sig"), 0)
## check returns errors
err_alpha <- "invalid choice for alpha; alpha must be 0 < alpha < 1"
expect_error(shc(dm, alpha = 1.2), err_alpha)
expect_error(shc(dm, alpha = -.2), err_alpha)
})
test_that("shc accepts n_sim values", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## some simulation sizes to test
n_sim_vec <- c(1, 5, 10, 100)
## check that n_sim value matches num CIs simulated
for (n_sim in n_sim_vec) {
out <- shc(dm, n_sim = n_sim)
expect_equal(dim(out$ci_sim)[2], n_sim)
}
})
test_that("shc accepts n_min values between 3 and n", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## check valid input values
expect_is(shc(dm, n_min = 100), "shc")
expect_is(shc(dm, n_min = 3), "shc")
## check invalid input values
expect_error(shc(dm, n_min = 101), "n_min must be <= n")
expect_error(shc(dm, n_min = 1), "n_min must be >= 3")
expect_error(shc(dm, n_min = 2), "n_min must be >= 3")
})
test_that("shc accepts icovest values in {1, 2, 3}", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## check valid input values
expect_is(shc(dm, n_sim=10, n_min=30, icovest = 1), "shc")
expect_is(shc(dm, n_sim=10, n_min=30, icovest = 2), "shc")
expect_is(shc(dm, n_sim=10, n_min=30, icovest = 3), "shc")
## check invalid input values
warn_icov <- "icovest should be 1, 2 or 3. Using default value: 1."
expect_warning(shc(dm, n_sim=10, n_min=30, icovest=4), warn_icov)
expect_warning(shc(dm, n_sim=10, n_min=30, icovest=2.5), warn_icov)
expect_warning(shc(dm, n_sim=10, n_min=30, icovest=-1), warn_icov)
})
test_that("shc accepts expected ci/null_alg parameter values", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## check that common pairs are accepted
expect_is(shc(dm, ci = "2CI", null_alg = "hclust"), "shc")
expect_is(shc(dm, ci = "2CI", null_alg = "2means"), "shc")
expect_is(shc(dm, ci = "linkage", null_alg = "hclust"), "shc")
## check that linkage + 2means isn't accepted
err_mismatch <- "ci = 'linkage', null_alg = '2means' cannot be specified"
expect_error(shc(dm, ci = "linkage", null_alg = "2means"), err_mismatch)
## check that multiple ci/null_alg values can be passed
mout <- shc(dm, ci = c("linkage", "2CI"), null_alg = c("hclust", "hclust"), n_sim = 10)
expect_equal(dim(mout$ci_sim), c(99, 10, 2))
expect_equal(dim(mout$ci_dat), c(99, 2))
## check that error occurs when ci/null_alg aren't same length
err_length <- "ci and null_alg must be of same length"
expect_error(shc(dm, ci = c("linkage", "2CI"), null_alg = "2means"), err_length)
})
test_that("shc accepts only valid ci_idx/ci_emp parameters", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
ci_vec <- c("2CI", "2CI")
null_vec <- c("hclust", "2means")
## check default parameters don't cause error
obj <- shc(dm, n_sim=10, n_min=30, ci=ci_vec, null_alg=null_vec, ci_idx=1, ci_emp=FALSE)
expect_is(obj, "shc")
## check non-default parameters don't cause error
obj <- shc(dm, n_sim=10, n_min=30, ci=ci_vec, null_alg=null_vec, ci_idx=2, ci_emp=TRUE)
expect_is(obj, "shc")
## check invalid choice causes errors
err_long <- "invalid choice for ci_idx"
expect_error(shc(dm, ci = ci_vec, null_alg = null_vec, ci_idx = 4, ci_emp = TRUE), err_long)
})
test_that("shc accepts user specified metric functions w/ vecmet and matmet", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(100), ncol=2, nrow=50)
## valid vecmet function and matrix extension
vfun <- function(x, y) {1 - cor(x, y)}
vfun_m <- function(x) { as.dist(outer(split(x, row(x)), split(x, row(x)), Vectorize(vfun))) }
## valid matmet function
mfun <- function(x) { as.dist(1 - cor(t(x))) }
## check class and clustering results with vecmet
shc_vfun <- shc(dm, n_sim=10, n_min=20, vecmet=vfun, linkage="average")
hc_vfun <- hclust(vfun_m(dm), method="average")
expect_is(shc_vfun, "shc")
expect_equal(shc_vfun$hc_dat$height, hc_vfun$height)
## check class and clustering results with matmet
shc_mfun <- shc(dm, n_sim=10, n_min=20, matmet=mfun, linkage="average")
hc_mfun <- hclust(mfun(dm), method="average")
expect_is(shc_mfun, "shc")
expect_equal(shc_mfun$hc_dat$height, hc_mfun$height)
## check error if both vecmet and matmet are specified
expect_error(shc(dm, vecmet=vfun, matmet=mfun, linkage="average"),
"only one of vecmet and matmet can be specified")
## check errors if vecmet, matmet are not functions
expect_error(shc(dm, vecmet="some_text", linkage="average"),
paste("vecmet must be a function taking two vectors as input",
"and returning a real-valued dissimilarity"))
expect_error(shc(dm, matmet="some_text", linkage="average"),
paste("matmet must be a function taking a data matrix as input",
"and returning an object of class dist"))
## invalid function specifications
vfun_w <- function(x, y) { warning("wrong function") }
vfun_e <- function(x, y) { stop("wrong function") }
## check if warnings/errors are reurned if vecmet is a poorly specified
expect_error(shc(dm, vecmet=vfun_w, linkage="average"),
"warning for vecmet specification: ")
expect_error(shc(dm, vecmet=vfun_e, linkage="average"),
"error with vecmet specification: ")
})
test_that("shc accepts different bkgd noise calculations w/ bkgd_pca", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(100*100), ncol=100, nrow=100)
## run shc with/without PCA-based background noise estimation
out_t <- shc(dm, n_sim=10, n_min=30, bkgd_pca=TRUE)
out_f <- shc(dm, n_sim=10, n_min=30, bkgd_pca=FALSE)
## explicitly call background noise est subroutines
ne_t <- null_eigval(dm, nrow(dm), ncol(dm), icovest=1, bkgd_pca=TRUE)
ne_f <- null_eigval(dm, nrow(dm), ncol(dm), icovest=1, bkgd_pca=FALSE)
## check that PCA-based estimate is always less than standard approach
expect_lt(ne_t$backvar, ne_f$backvar)
## check that shc returns same result as subroutine
expect_equal(out_t$backvar[1], ne_t$backvar, tolerance=0.001)
expect_equal(out_f$backvar[1], ne_f$backvar, tolerance=0.001)
})
test_that("shc accepts rcpp specification to call rclusterpp", {
## simple dataset
set.seed(100)
dm <- matrix(rnorm(200), ncol=2, nrow=100)
## run shc without Rclusterpp
out_base <- shc(dm, n_sim=10, n_min=30, rcpp=FALSE)
## run shc with Rclusterpp
if (requireNamespace("Rclusterpp", quietly = TRUE)) {
out_cpp <- shc(dm, n_sim=10, n_min=30, linkage="ward", rcpp=TRUE)
## check that clustering returns same result
expect_equal(out_cpp$hc_dat$height, out_base$hc_dat$height)
## check that error returned with linakge = 'ward.D2', rcpp = TRUE
expect_error(shc(dm, n_sim=10, n_min=30, rcpp=TRUE),
"for linkage when rcpp = TRUE.")
} else {
expect_error(shc(dm, n_sim=10, n_min=30, linkage="ward", rcpp=TRUE),
"package is not available")
}
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.