Nothing
library(proxyC)
library(Matrix)
library(recipes)
library(ggplot2)
# ------------------------------------------------------------------------------
# simulate a small data set
make_data <- function(p, n, rate = .5) {
x <- matrix(sample(0:1, size = p * n, prob = rep(rate, 2), replace = TRUE), ncol = p)
colnames(x) <- paste0("x", 1:p)
x
}
set.seed(535)
tr_x <- make_data(20, 50)
un_x <- make_data(20, 10)
tr_x_sp <- Matrix(tr_x, sparse = TRUE)
un_x_sp <- Matrix(un_x, sparse = TRUE)
tr_scores <- simil(tr_x_sp, tr_x_sp, method = "jaccard")
un_scores <- simil(tr_x_sp, un_x_sp, method = "jaccard")
mean_tr <- apply(tr_scores, 1, mean)
mean_tab <- as.data.frame(table(mean_tr), stringsAsFactors = FALSE)
mean_tab$mean_tr <- as.numeric(mean_tab$mean_tr)
mean_tab$cumulative <- cumsum(mean_tab$Freq) / 50
# ------------------------------------------------------------------------------
test_that("matrix method - mean similarity", {
tmp <- apd_similarity(tr_x)
tmp_scores <- score(tmp, un_x)
expect_equal(tmp_scores$similarity, apply(un_scores, 2, mean))
expect_equal(tmp$options, list(method = "jaccard"))
expect_equal(tmp$ref_data, tr_x_sp)
expect_equal(mean_tab$mean_tr, tmp$ref_scores$sim)
expect_equal(mean_tab$Freq, tmp$ref_scores$n)
expect_equal(mean_tab$cumulative, tmp$ref_scores$cumulative)
})
test_that("data frame method - quantile similarity", {
tmp <- apd_similarity(tr_x, quantile = .1)
tmp_scores <- score(tmp, un_x)
expect_equal(tmp_scores$similarity, apply(un_scores, 2, quantile, probs = .1))
})
test_that("formula method - mean similarity", {
sim_form <- as.formula(" ~.")
tmp <- apd_similarity(sim_form, tr_x)
tmp_scores <- score(tmp, un_x)
expect_equal(tmp_scores$similarity, apply(un_scores, 2, mean))
expect_equal(tmp$options, list(method = "jaccard"))
expect_equal(tmp$ref_data, tr_x_sp)
expect_equal(mean_tab$mean_tr, tmp$ref_scores$sim)
expect_equal(mean_tab$Freq, tmp$ref_scores$n)
expect_equal(mean_tab$cumulative, tmp$ref_scores$cumulative)
})
# ------------------------------------------------------------------------------
test_that("data frame method - mean similarity", {
tmp <- apd_similarity(as.data.frame(tr_x))
tmp_scores <- score(tmp, as.data.frame(un_x))
expect_equal(tmp_scores$similarity, apply(un_scores, 2, mean))
expect_equal(tmp$options, list(method = "jaccard"))
expect_equal(tmp$ref_data, tr_x_sp)
expect_equal(mean_tab$mean_tr, tmp$ref_scores$sim)
expect_equal(mean_tab$Freq, tmp$ref_scores$n)
expect_equal(mean_tab$cumulative, tmp$ref_scores$cumulative)
})
test_that("matrix method - quantile similarity", {
tmp <- apd_similarity(as.data.frame(tr_x), quantile = .1)
tmp_scores <- score(tmp, as.data.frame(un_x))
expect_equal(tmp_scores$similarity, apply(un_scores, 2, quantile, probs = .1))
})
test_that("formula method - quantile similarity", {
sim_form <- as.formula(" ~.")
tmp <- apd_similarity(sim_form, tr_x, quantile = .1)
tmp_scores <- score(tmp, as.data.frame(un_x))
expect_equal(tmp_scores$similarity, apply(un_scores, 2, quantile, probs = .1))
})
# ------------------------------------------------------------------------------
test_that("recipe method - mean similarity", {
rec <-
recipe(~., data = as.data.frame(tr_x)) %>%
step_zv(all_predictors())
tmp <- apd_similarity(rec, as.data.frame(tr_x))
tmp_scores <- score(tmp, as.data.frame(un_x))
expect_equal(tmp_scores$similarity, apply(un_scores, 2, mean))
expect_equal(tmp$options, list(method = "jaccard"))
expect_equal(tmp$ref_data, tr_x_sp)
expect_equal(mean_tab$mean_tr, tmp$ref_scores$sim)
expect_equal(mean_tab$Freq, tmp$ref_scores$n)
expect_equal(mean_tab$cumulative, tmp$ref_scores$cumulative)
})
test_that("matrix method - quantile similarity", {
rec <-
recipe(~., data = as.data.frame(tr_x)) %>%
step_zv(all_predictors())
tmp <- apd_similarity(rec, as.data.frame(tr_x), quantile = .1)
tmp_scores <- score(tmp, as.data.frame(un_x))
expect_equal(tmp_scores$similarity, apply(un_scores, 2, quantile, probs = .1))
})
# ------------------------------------------------------------------------------
test_that("bad args", {
expect_snapshot(error = TRUE,
apd_similarity(tr_x, quantile = 2)
)
expect_snapshot(error = TRUE,
apd_similarity(tr_x_sp)
)
})
# ------------------------------------------------------------------------------
test_that("printed output", {
expect_snapshot(print(apd_similarity(tr_x)))
expect_snapshot(print(apd_similarity(tr_x)))
expect_snapshot(print(apd_similarity(tr_x)))
expect_snapshot(print(apd_similarity(tr_x, quantile = .13)))
})
# ------------------------------------------------------------------------------
test_that("plot output", {
ad <- apd_similarity(tr_x)
ad_plot <- autoplot(ad)
expect_equal(ad_plot$data, ad$ref_scores)
expect_equal(ad_plot$labels$x, "mean similarity (training set)")
expect_equal(ad_plot$labels$y, "Cumulative Probability")
})
# ------------------------------------------------------------------------------
test_that("apd_similarity fails when quantile is neither NA nor a number in [0, 1]", {
expect_snapshot(error = TRUE,
apd_similarity(tr_x, quantile = -1)
)
expect_snapshot(error = TRUE,
apd_similarity(tr_x, quantile = 3)
)
expect_snapshot(error = TRUE,
apd_similarity(tr_x, quantile = "la")
)
})
# ------------------------------------------------------------------------------
test_that("apd_similarity outputs warning with zero variance variables ", {
bad_data <- list(
"a" = c(0, 0),
"b" = c(0, 0),
"c" = c(1, 1),
"d" = c(0, 0)
)
bad_data <- as.data.frame(bad_data)
expect_snapshot(
apd_similarity(bad_data)
)
})
# ------------------------------------------------------------------------------
test_that("apd_similarity fails when all the variables have zero variance", {
bad_data <- list(
"a" = c(0, 0),
"b" = c(0, 0),
"d" = c(0, 0)
)
bad_data <- as.data.frame(bad_data)
expect_snapshot(error = TRUE,
apd_similarity(bad_data)
)
})
# ------------------------------------------------------------------------------
test_that("apd_similarity fails data is not binary", {
bad_data <- list(
"a" = c(0, 0),
"b" = c(1, 3),
"c" = c(1, 1),
"d" = c(2, 0)
)
bad_data <- as.data.frame(bad_data)
expect_snapshot(error = TRUE,
apd_similarity(bad_data)
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.