Nothing
# tests/testthat/test-bicor.R
# helper - defined using R
bicor_manual_test <- function(x, y, c_const = 9) {
rx <- {
mx <- median(x); madx <- median(abs(x - mx))
u <- (x - mx) / (c_const * madx)
w <- ifelse(abs(u) < 1, (1 - u^2)^2, 0)
r <- (x - mx) * w
r / sqrt(sum(r^2))
}
ry <- {
my <- median(y); mady <- median(abs(y - my))
u <- (y - my) / (c_const * mady)
w <- ifelse(abs(u) < 1, (1 - u^2)^2, 0)
r <- (y - my) * w
r / sqrt(sum(r^2))
}
sum(rx * ry)
}
biweight_standardise_R <- function(x, c_const = 9, max_p_outliers = 1) {
stopifnot(is.numeric(x), length(x) >= 2L)
mx <- stats::median(x)
mad <- stats::median(abs(x - mx))
if (!(mad > 0)) return(rep(NaN, length(x)))
# Side-cap disabled here for exactness in most tests
stopifnot(max_p_outliers == 1)
u <- (x - mx) / (c_const * mad)
w <- ifelse(abs(u) < 1, (1 - u^2)^2, 0)
r <- (x - mx) * w
s2 <- sum(r^2)
if (!(s2 > 0)) return(rep(NaN, length(x)))
r / sqrt(s2)
}
bicor_manual_R <- function(x, y, c_const = 9, max_p_outliers = 1) {
zx <- biweight_standardise_R(x, c_const, max_p_outliers)
zy <- biweight_standardise_R(y, c_const, max_p_outliers)
if (any(!is.finite(zx)) || any(!is.finite(zy))) return(NaN)
sum(zx * zy)
}
# Weighted versions (for small n testcases)
w_quantile <- function(x, w, p) {
o <- order(x, method = "radix")
xs <- x[o]; ws <- w[o]
W <- sum(ws)
if (!(W > 0)) return(NaN)
if (p <= 0) return(xs[1L])
if (p >= 1) return(xs[length(xs)])
target <- p * W
csum <- 0
for (i in seq_along(xs)) {
csum <- csum + ws[i]
if (csum >= target) {
if (i == 1L) return(xs[1L])
cprev <- csum - ws[i]
frac <- (target - cprev) / ws[i]
return(xs[i-1L] + frac * (xs[i] - xs[i-1L]))
}
}
xs[length(xs)]
}
w_median <- function(x, w) w_quantile(x, w, 0.5)
w_mad <- function(x, w, med) w_median(abs(x - med), w)
biweight_standardise_w_R <- function(x, w, c_const = 9, max_p_outliers = 1) {
stopifnot(is.numeric(x), is.numeric(w), length(x) == length(w))
W <- sum(w); if (!(W > 0)) return(rep(NaN, length(x)))
med <- w_median(x, w)
mad <- w_mad(x, w, med)
if (!(mad > 0)) {
# weighted Pearson fallback (for tests that expect it)
mu <- sum(w * x) / W
r <- (x - mu) * w
s2 <- sum(r^2)
if (!(s2 > 0)) return(rep(NaN, length(x)))
return(r / sqrt(s2))
}
stopifnot(max_p_outliers == 1)
u <- (x - med) / (c_const * mad)
wt <- ifelse(abs(u) < 1, (1 - u^2)^2, 0) * w
r <- (x - med) * wt
s2 <- sum(r^2)
if (!(s2 > 0)) return(rep(NaN, length(x)))
r / sqrt(s2)
}
bicor_weighted_manual_R <- function(x, y, w, c_const = 9, max_p_outliers = 1) {
zx <- biweight_standardise_w_R(x, w, c_const, max_p_outliers)
zy <- biweight_standardise_w_R(y, w, c_const, max_p_outliers)
if (any(!is.finite(zx)) || any(!is.finite(zy))) return(NaN)
sum(zx * zy)
}
test_that("affine transformations yield ±1 exactly", {
x <- 1:5
y_pos <- 2*x + 7 # strictly increasing affine transform
y_neg <- -3*x + 1 # strictly decreasing affine transform
M <- cbind(x = x, y_pos = y_pos, y_neg = y_neg)
R <- bicor(
M, c_const = 9, max_p_outliers = 1,
pearson_fallback = "none", n_threads = 1L
)
expect_equal(R["x", "y_pos"], 1, tolerance = 1e-12)
expect_equal(R["x", "y_neg"], -1, tolerance = 1e-12)
expect_equal(as.numeric(diag(R)), rep(1, 3))
})
test_that("single gross same-direction leaves bicor = 1", {
x <- c(1:5, 1000)
y <- c(1:5, 1000)
R <- bicor(cbind(x, y), pearson_fallback = "none")
expect_gt(R["x", "y"], 0.999)
})
test_that("opposite-direction outlier: bicor matches manual definition", {
x <- c(1:5, 1000)
y <- c(1:5, -1000)
R <- bicor(cbind(x, y), pearson_fallback = "none")
expect_equal(R["x","y"], bicor_manual_test(x, y), tolerance = 1e-12)
expect_lt(R["x","y"], 0.9) # sanity: not near 1
})
test_that("fallback policy: 'none' and 'hybrid' produce NA for constant column; 'all' matches Pearson", {
A <- rep(1, 4) # constant -> MAD = 0 and variance = 0
B <- 1:4
C <- 2*B + 5
M <- cbind(A = A, B = B, C = C)
# 'none' => NA for all correlations involving A; B~C remains exactly 1
R_none <- bicor(M, pearson_fallback = "none")
expect_true(R_none["A","A"]==1)
expect_true(all(is.na(R_none["A", c("B","C")])))
expect_equal(R_none["B","C"], 1, tolerance = 1e-12)
# 'hybrid' tries Pearson for zero-MAD column; but A has zero variance,
# so it must still be NA. B~C remains exactly 1.
R_hybr <- bicor(M, pearson_fallback = "hybrid")
expect_true(R_hybr["A","A"]==1)
expect_true(all(is.na(R_hybr["A", c("B","C")])))
expect_equal(R_hybr["B","C"], 1, tolerance = 1e-12)
})
test_that("threading does not change results beyond tiny numerical noise", {
set.seed(42)
X <- matrix(rnorm(500 * 20), 500, 20)
R1 <- bicor(X, n_threads = 1L)
R2 <- bicor(X, n_threads = 2L)
expect_equal(R1, R2, tolerance = 1e-8)
})
test_that("data-frame path: drops non-numeric columns and preserves numeric names", {
df <- data.frame(
num1 = 1:5,
fac = factor(c("a","b","a","b","a")),
logi = c(TRUE, FALSE, TRUE, FALSE, TRUE),
num2 = 2*(1:5) + 7,
date = as.Date("2020-01-01") + 0:4
)
R <- bicor(df, pearson_fallback = "none")
expect_equal(colnames(R), c("num1", "num2"))
expect_equal(rownames(R), c("num1", "num2"))
expect_equal(R["num1","num2"], 1, tolerance = 1e-12)
})
test_that("validator rejects missing values", {
df_na <- data.frame(a = c(1, 2, NA, 4), b = c(2, 3, 4, 5))
expect_error(
bicor(df_na),
regexp = "Missing values are not allowed"
)
})
test_that("attributes and class are set correctly", {
set.seed(1)
X <- matrix(rnorm(200), 50, 4)
R <- bicor(X)
expect_s3_class(R, "bicor")
expect_equal(attr(R, "method"), "biweight_mid_correlation")
expect_true(grepl("biweight mid-correlation", attr(R, "description")))
expect_equal(attr(R, "package"), "matrixCorr")
expect_null(attr(R, "ci", exact = TRUE))
expect_null(attr(R, "inference", exact = TRUE))
})
test_that("bicor large-sample inference follows the established reference behaviour", {
set.seed(151)
X <- matrix(rnorm(90 * 4), nrow = 90, ncol = 4)
X[, 2] <- 0.7 * X[, 1] + rnorm(90, sd = 0.4)
X[, 4] <- -0.4 * X[, 3] + rnorm(90, sd = 0.6)
colnames(X) <- paste0("B", seq_len(ncol(X)))
ours <- bicor(X, ci = TRUE, pearson_fallback = "hybrid")
inf <- attr(ours, "inference", exact = TRUE)
ref_estimate <- structure(
c(
1, 0.87211433189796, -0.139821080789143, 0.0200531551858707,
0.87211433189796, 1, -0.100737855987848, 0.0600146995629313,
-0.139821080789143, -0.100737855987848, 1, -0.501164943217541,
0.0200531551858707, 0.0600146995629313, -0.501164943217541, 1
),
dim = c(4L, 4L)
)
ref_p_value <- structure(
c(
0, 4.66844299908808e-29, 0.188716903130168, 0.851189832850908,
4.66844299908808e-29, 0, 0.344796594702779, 0.574185746219993,
0.188716903130168, 0.344796594702779, 0, 4.87601871367649e-07,
0.851189832850908, 0.574185746219993, 4.87601871367649e-07, 0
),
dim = c(4L, 4L)
)
ref_statistic <- structure(
c(
Inf, 16.7200525192994, 1.32465032686288, 0.188153104953045,
16.7200525192994, Inf, 0.949836670568733, 0.564004406966281,
1.32465032686288, 0.949836670568733, 363463325.618328, 5.43287021000477,
0.188153104953045, 0.564004406966281, 5.43287021000477, 363463325.618328
),
dim = c(4L, 4L)
)
ref_Z <- structure(
c(
Inf, 12.5876081889807, -1.32028716414974, 0.188140491858905,
12.5876081889807, Inf, -0.948221133852547, 0.563665166175343,
-1.32028716414974, -0.948221133852547, 170.409068424375, -5.16753057745717,
0.188140491858905, 0.563665166175343, -5.16753057745717, 170.409068424375
),
dim = c(4L, 4L)
)
ref_n_obs <- structure(rep(90, 16), dim = c(4L, 4L))
idx <- upper.tri(ours, diag = FALSE)
expect_lt(max(abs(unclass(ours)[idx] - ref_estimate[idx]), na.rm = TRUE), 1e-8)
expect_lt(max(abs(inf$p_value[idx] - ref_p_value[idx]), na.rm = TRUE), 1e-8)
expect_lt(max(abs(inf$statistic[idx] - ref_statistic[idx]), na.rm = TRUE), 1e-8)
expect_lt(max(abs(inf$Z[idx] - ref_Z[idx]), na.rm = TRUE), 1e-8)
expect_equal(attr(ours, "diagnostics", exact = TRUE)$n_complete[idx], ref_n_obs[idx], tolerance = 0)
})
test_that("bicor returns confidence intervals only when requested", {
set.seed(152)
X <- matrix(rnorm(70 * 4), nrow = 70, ncol = 4)
X[, 2] <- 0.6 * X[, 1] + rnorm(70, sd = 0.5)
colnames(X) <- paste0("C", seq_len(ncol(X)))
fit0 <- bicor(X, ci = FALSE)
fit1 <- bicor(X, ci = TRUE)
expect_null(attr(fit0, "ci", exact = TRUE))
expect_null(attr(fit0, "inference", exact = TRUE))
ci <- attr(fit1, "ci", exact = TRUE)
inf <- attr(fit1, "inference", exact = TRUE)
expect_true(is.list(ci))
expect_true(is.list(inf))
expect_identical(ci$ci.method, "fisher_z_bicor")
expect_true(is.matrix(ci$lwr.ci))
expect_true(is.matrix(ci$upr.ci))
expect_true(is.matrix(inf$p_value))
})
test_that("bicor confidence intervals are ordered and bounded", {
set.seed(153)
X <- matrix(rnorm(80 * 5), nrow = 80, ncol = 5)
X[, 2] <- 0.8 * X[, 1] + rnorm(80, sd = 0.25)
colnames(X) <- paste0("D", seq_len(ncol(X)))
fit <- bicor(X, ci = TRUE)
ci <- attr(fit, "ci", exact = TRUE)
idx <- upper.tri(fit, diag = FALSE)
expect_true(all(ci$lwr.ci[idx] <= ci$upr.ci[idx], na.rm = TRUE))
expect_true(all(ci$lwr.ci[idx] >= -1, na.rm = TRUE))
expect_true(all(ci$upr.ci[idx] <= 1, na.rm = TRUE))
expect_true(all(fit[idx] >= ci$lwr.ci[idx] & fit[idx] <= ci$upr.ci[idx], na.rm = TRUE))
})
test_that("bicor core estimate is unchanged when ci is disabled", {
set.seed(154)
X <- matrix(rnorm(60 * 4), nrow = 60, ncol = 4)
X[, 3] <- -0.5 * X[, 2] + rnorm(60, sd = 0.4)
fit_base <- bicor(X)
fit_ci_false <- bicor(X, ci = FALSE)
base_mat <- unclass(fit_base)
false_mat <- unclass(fit_ci_false)
attributes(base_mat) <- attributes(base_mat)[c("dim", "dimnames")]
attributes(false_mat) <- attributes(false_mat)[c("dim", "dimnames")]
expect_equal(base_mat, false_mat, tolerance = 1e-12)
})
test_that("bicor summary switches to pairwise inference view when ci is requested", {
set.seed(155)
X <- matrix(rnorm(75 * 4), nrow = 75, ncol = 4)
X[, 2] <- 0.65 * X[, 1] + rnorm(75, sd = 0.4)
colnames(X) <- paste0("S", seq_len(ncol(X)))
fit <- bicor(X, ci = TRUE)
sm <- summary(fit)
txt <- capture.output(print(sm))
expect_s3_class(sm, "summary.bicor")
expect_s3_class(sm, "data.frame")
expect_true(all(c("item1", "item2", "estimate", "n_complete", "lwr", "upr", "statistic", "fisher_z", "p_value") %in% names(sm)))
expect_true(isTRUE(attr(sm, "has_ci")))
expect_true(isTRUE(attr(sm, "has_p")))
expect_true(any(grepl("^Biweight mid-correlation summary$", txt)))
expect_true(any(grepl("p_value", txt, fixed = TRUE)))
})
test_that("NA policy: error vs pairwise", {
set.seed(1)
x <- c(rnorm(4), NA, rnorm(3))
y <- c(rnorm(3), NA, rnorm(4))
M <- cbind(x = x, y = y)
# error mode must reject
expect_error(bicor(M, na_method = "error"),
"Missing values are not allowed")
# pairwise mode: compute on finite overlap
idx <- is.finite(x) & is.finite(y)
expect_true(sum(idx) >= 5) # ensure enough points for a stable check
Rpw <- bicor(M, na_method = "pairwise",
pearson_fallback = "none",
c_const = 9, max_p_outliers = 1)
expect_equal(Rpw["x","y"],
bicor_manual_R(x[idx], y[idx], c_const = 9),
tolerance = 1e-12)
expect_equal(as.numeric(diag(Rpw)), rep(1, 2))
})
test_that("weighted, NA-free: matches manual weighted bicor", {
x <- c(-2, -1, 0, 1, 2)
y <- c(-4, -2, 0, 2, 4) # perfectly monotone
w <- c(1, 1, 5, 1, 1) # emphasise the centre
Rw <- bicor(cbind(x = x, y = y),
w = w,
na_method = "error",
pearson_fallback = "none",
c_const = 9, max_p_outliers = 1)
expected <- bicor_weighted_manual_R(x, y, w, c_const = 9)
expect_equal(Rw["x","y"], expected, tolerance = 1e-12)
expect_equal(as.numeric(diag(Rw)), rep(1, 2))
})
test_that("weighted + pairwise: matches manual on overlapping rows (hybrid fallback)", {
x <- c(0, 0, 0, 1, 2, NA)
y <- c(0, 0, 0, 2, 4, 5)
w <- c(1, 1, 5, 1, 1, 3)
idx <- is.finite(x) & is.finite(y)
Rwp <- bicor(cbind(x = x, y = y),
w = w, na_method = "pairwise",
pearson_fallback = "hybrid", # ← change here
c_const = 9, max_p_outliers = 1)
expect_equal(Rwp["x","y"],
bicor_weighted_manual_R(x[idx], y[idx], w[idx], c_const = 9),
tolerance = 1e-12)
})
test_that("fallback = 'all' reproduces Pearson (including constant columns)", {
A <- rep(1, 5); B <- 1:5; C <- 2*B + 5
M <- cbind(A = A, B = B, C = C)
R_all <- bicor(M, pearson_fallback = "all", na_method = "error")
R_p <- suppressWarnings(stats::cor(M, method = "pearson"))
to_na <- function(m) { m[is.nan(m)] <- NA_real_; m }
# Diagonal: ignore names on the vector
expect_equal(unname(diag(R_all)), rep(1, 3))
# Off-diagonals: align NaN from bicor to NA as in base cor()
R_all_na <- to_na(R_all)
expect_equal(R_all_na[upper.tri(R_all_na)], R_p[upper.tri(R_p)])
expect_equal(R_all_na[lower.tri(R_all_na)], R_p[lower.tri(R_p)])
})
test_that("fallback = 'hybrid' when one column has MAD=0 but variance>0", {
# x has MAD = 0 (at least half zeros) but variance > 0
x <- c(0, 0, 0, 1, 2)
y <- c(0, 1, 2, 3, 4)
# Expected: Pearson standardisation for x, biweight for y
mu <- mean(x); zx <- (x - mu) / sqrt(sum((x - mu)^2))
zy <- biweight_standardise_R(y, c_const = 9)
expected <- sum(zx * zy)
Rh <- bicor(cbind(x = x, y = y),
pearson_fallback = "hybrid",
c_const = 9, max_p_outliers = 1)
expect_equal(Rh["x","y"], expected, tolerance = 1e-12)
})
test_that("mad_consistent=TRUE equals using c_const*1.4826", {
set.seed(123)
X <- matrix(rnorm(200), 50, 4)
R1 <- bicor(X, mad_consistent = TRUE, c_const = 9)
R2 <- bicor(X, mad_consistent = FALSE, c_const = 9 * 1.4826)
expect_equal(R1, R2, tolerance = 1e-12, ignore_attr = TRUE)
})
test_that("max_p_outliers side-cap improves robustness in one-sided outliers", {
# Perfect linear pattern + a few very large positive outliers on y
set.seed(7)
x <- seq(-2, 2, length.out = 40)
y <- 3*x + rnorm(length(x), sd = 0.02)
y[38:40] <- y[38:40] + 50 # large positive outliers (one-sided)
R0 <- bicor(cbind(x, y),
c_const = 9, max_p_outliers = 1,
pearson_fallback = "none")
R1 <- bicor(cbind(x, y),
c_const = 9, max_p_outliers = 0.10,
pearson_fallback = "none")
# With side-cap, more points remain with positive weight on the + side -> correlation should not decrease
expect_gte(R1["x","y"], R0["x","y"])
})
test_that("sparse_threshold alias returns unified sparse output contract", {
skip_if_not_installed("Matrix")
# Small matrix with modest correlations
set.seed(99)
X <- matrix(rnorm(200), 50, 4)
R <- bicor(X, sparse_threshold = 0.9) # most off-diagonals -> 0
expect_s4_class(R, "sparseMatrix")
expect_identical(attr(R, "output"), "sparse")
expect_equal(as.numeric(attr(R, "threshold")), 0.9)
expect_true(isTRUE(attr(R, "diag")))
meta <- attr(R, "matrixCorr_meta", exact = TRUE)
expect_true(is.list(meta))
expect_true("bicor" %in% meta$source_class)
expect_identical(meta$method, "biweight_mid_correlation")
# diagonal remains 1 for finite bicor diagonals
expect_equal(as.numeric(diag(as.matrix(R))), rep(1, ncol(X)))
# confirm many structural zeros (at least one)
nz <- Matrix::nnzero(R)
expect_lt(nz, ncol(X)^2) # sparsity achieved
})
test_that("sparse output drops non-finite dense entries and keeps finite thresholded values", {
skip_if_not_installed("Matrix")
M <- cbind(
const = rep(1, 6),
var = c(0, 1, 2, 3, 4, 5)
)
R_dense <- bicor(M, pearson_fallback = "none")
R <- bicor(M, pearson_fallback = "none", sparse_threshold = 0.4)
R_sparse_dense <- as.matrix(R)
expect_s4_class(R, "sparseMatrix")
finite_keep <- is.finite(R_dense) & abs(R_dense) >= 0.4
expect_equal(R_sparse_dense[finite_keep], R_dense[finite_keep], tolerance = 1e-12)
expect_true(all(R_sparse_dense[!is.finite(R_dense)] == 0))
})
test_that("plot methods do not error and return expected classes", {
skip_if_not_installed("ggplot2")
set.seed(10)
X <- matrix(rnorm(100), 20, 5)
R <- bicor(X)
# plot method: should return a ggplot object
p <- plot(R, title = "bicor heatmap")
expect_s3_class(p, "ggplot")
})
# --- cross-check a few small exact-value scenarios (no side-cap, no weights) ---
test_that("small exact scenarios: ±1 for monotone affine transforms", {
x <- 1:5
y_pos <- 2*x + 7
y_neg <- -3*x + 1
M <- cbind(x = x, y_pos = y_pos, y_neg = y_neg)
R <- bicor(M, c_const = 9, max_p_outliers = 1,
pearson_fallback = "none", n_threads = 1L)
expect_equal(R["x","y_pos"], 1, tolerance = 1e-12)
expect_equal(R["x","y_neg"], -1, tolerance = 1e-12)
expect_equal(as.numeric(diag(R)), rep(1, 3))
})
test_that("gross opposite-direction outlier matches manual bicor", {
x <- c(1:5, 1000)
y <- c(1:5, -1000)
R <- bicor(cbind(x, y), pearson_fallback = "none")
expect_equal(R["x","y"], bicor_manual_R(x, y), tolerance = 1e-12)
expect_lt(R["x","y"], 0.9)
})
test_that("threading is numerically stable", {
set.seed(42)
t <- seq(0, 8 * pi, length.out = 500)
X <- sapply(1:20, function(j) {
sin(t * (j + 1) / 7) + cos(t * (j + 3) / 11) + rnorm(length(t), sd = 0.2)
})
R1 <- bicor(X, n_threads = 1L)
R2 <- bicor(X, n_threads = 2L)
expect_equal(R1, R2, tolerance = 1e-8)
})
test_that("data.frame path: drops non-numerics and preserves names", {
df <- data.frame(
num1 = 1:5,
fac = factor(c("a","b","a","b","a")),
logi = c(TRUE, FALSE, TRUE, FALSE, TRUE),
num2 = 2*(1:5) + 7,
date = as.Date("2020-01-01") + 0:4
)
R <- bicor(df, pearson_fallback = "none")
expect_equal(colnames(R), c("num1", "num2"))
expect_equal(rownames(R), c("num1", "num2"))
expect_equal(R["num1","num2"], 1, tolerance = 1e-12)
})
test_that("validator rejects missing values in error mode", {
df_na <- data.frame(a = c(1, 2, NA, 4), b = c(2, 3, 4, 5))
expect_error(bicor(df_na),
regexp = "Missing values are not allowed")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.