Nothing
context("test-split_strata")
library(dplyr)
library(optimall)
library(stats)
set.seed(4563)
data_split <- data.frame(
"strata" = c(
rep("a", times = 15),
rep("b", times = 15),
rep("c", times = 12)
),
"split_var" = c(
rnorm(30, sd = 1),
rnorm(12, sd = 2)
),
"strata2" = rep(c(
rep(0, times = 7),
rep(1, times = 7)
),
times = 3
)
)
test_that("split_strata produces a dataframe with the same number of
rows as input and one more column called 'new_strata' with
only the specified strata changed", {
expect_equal(
dim(split_strata(
data = data_split, strata = "strata",
split = "a", split_var = "split_var",
split_at = 0, type = "value"
)),
c(42, 4)
)
expect_equal(
dim(split_strata(
data = data_split, strata = "strata",
split = NULL, split_var = "split_var",
split_at = 0, type = "value"
)),
c(42, 4)
)
expect_equal(
sort(unique(split_strata(
data = data_split,
strata = "strata",
split = "a",
split_var = "split_var",
split_at = 0,
type = "value"
)$new_strata)),
c(
paste("a.split_var_(0,",
round(max(filter(
data_split,
strata == "a"
)$split_var),
digits = 2
), "]",
sep = ""
),
paste("a.split_var_[",
round(min(filter(
data_split,
strata == "a"
)$split_var),
digits = 2
),
",0]",
sep = ""
),
"b", "c"
)
)
})
test_that("splits occur at correct global quantile values", {
median1 <- strsplit(dplyr::filter(
split_strata(
data = data_split, strata = "strata", split = "a",
split_var = "split_var", split_at = 0.5,
type = "global quantile"
),
split_var < stats::median(split_var),
old_strata == "a"
)$new_strata[1],
split = "[", fixed = TRUE
)[[1]][2]
# Extract median from strata name
expect_equal(
substr(median1, start = 7, stop = nchar(median1) - 1),
as.character(round(stats::median(data_split$split_var),
digits = 2
))
)
expect_equal(
as.vector(table(dplyr::filter(
split_strata(
data = data_split, strata = "strata", split = "a",
split_var = "split_var", split_at = 0.5,
type = "global quantile"
),
new_strata %in% c("b", "c") == FALSE
)$new_strata)),
as.vector(table(dplyr::filter(
data_split, strata == "a"
)$split_var <=
median(data_split$split_var)))
)
})
test_that("splits occur at correct local quantile values", {
median2 <- strsplit(dplyr::filter(
split_strata(
data = data_split, strata = "strata", split = "a",
split_var = "split_var", split_at = 0.5,
type = "local quantile"
),
split_var < stats::median(data_split[data_split$strata ==
"a", ]$split_var),
old_strata == "a"
)$new_strata[1],
split = "[", fixed = TRUE
)[[1]][2]
expect_equal(
substr(median2, start = 7, stop = nchar(median2) - 1),
as.character(round(stats::median(
data_split[data_split$strata == "a", ]$split_var
),
digits = 2
))
)
expect_equal(
as.vector(table(dplyr::filter(
split_strata(
data = data_split, strata = "strata", split = "a",
split_var = "split_var", split_at = 0.5,
type = "local quantile"
),
new_strata %in% c("b", "c") == FALSE
)$new_strata)),
as.vector(table(dplyr::filter(
data_split, strata == "a"
)$split_var <=
median(dplyr::filter(data_split, strata == "a")$split_var)))
)
})
test_that("splits occur at correct local quantile values if multiple
split points are provided", {
cutpts <- strsplit(dplyr::filter(
split_strata(
data = data_split, strata = "strata", split = "a",
split_var = "split_var", split_at = c(0.1, 0.9),
type = "local quantile"
),
split_var < stats::median(split_var),
old_strata == "a"
)$new_strata[1],
split = "(", fixed = TRUE
)[[1]][2]
# Extract cut points from strata name
expect_equal(
substr(cutpts, start = 7, stop = nchar(cutpts) - 1),
as.character(round(stats::quantile(
dplyr::filter(data_split, strata == "a")$split_var,
0.9
), digits = 2))
)
expect_equal(
substr(cutpts, start = 1, stop = 5),
as.character(round(stats::quantile(
dplyr::filter(data_split, strata == "a")$split_var,
0.1
), digits = 2))
)
splitpts <- quantile(
dplyr::filter(
data_split,
strata == "a"
)$split_var,
c(0.1, 0.9)
)
expected_sizes <- data_split %>%
dplyr::filter(strata == "a") %>%
dplyr::mutate(size = dplyr::case_when(
split_var <= splitpts[1] ~ "0",
split_var <= splitpts[2] ~ "1",
split_var > splitpts[2] ~ "2"
))
expect_equal(
sort(as.vector(table(dplyr::filter(
split_strata(
data = data_split, strata = "strata", split = "a",
split_var = "split_var", split_at = c(0.1, 0.9),
type = "local quantile"
),
new_strata %in% c("b", "c") == FALSE
)$new_strata)), decreasing = T),
sort(as.vector(table(expected_sizes$size)), decreasing = T)
)
# size of each new strata is as expected.
})
test_that("splits occur at correct categorical split", {
data_split$split_var2 <- rep(c(
rep("alpha", times = 7),
rep("beta", times = 7)
), times = 3)
expect_equal(
sort(unique(split_strata(
data = data_split,
strata = "strata",
split = "a",
split_var = "split_var2",
split_at = "alpha",
type = "categorical"
)$new_strata)),
c("a.split_var2_0", "a.split_var2_1", "b", "c")
)
expect_equal(
sort(as.character(unique(
split_strata(
data = data_split, strata = "strata", split = NULL,
split_var = "split_var2", split_at = "alpha",
type = "categorical"
)$new_strata
))),
c(
"a.split_var2_0", "a.split_var2_1", "b.split_var2_0",
"b.split_var2_1", "c.split_var2_0", "c.split_var2_1"
)
)
})
test_that("splits work when multiple strata given to the function", {
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata",
split = c("a", "b"), split_var = "split_var",
split_at = 0.5, type = "global quantile"
)$new_strata
)),
c(
"a.split_var_(0.03,0.75]", "a.split_var_[-1.71,0.03]",
"b.split_var_(0.03,1.79]", "b.split_var_[-1.72,0.03]", "c"
)
)
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata",
split = c("a", "b"), split_var = "split_var",
split_at = 0.5, type = "local quantile"
)$new_strata
)),
c(
"a.split_var_(0.04,0.75]", "a.split_var_[-1.71,0.04]",
"b.split_var_(0.15,1.79]", "b.split_var_[-1.72,0.15]", "c"
)
)
data_split$split_var2 <- rep(c(
rep("alpha", times = 7),
rep("beta", times = 7)
), times = 3)
expect_equal(
sort(unique(split_strata(
data = data_split,
strata = "strata",
split = c("a", "b"),
split_var = "split_var2",
split_at = c("alpha"),
type = "categorical"
)$new_strata)),
c(
"a.split_var2_0", "a.split_var2_1",
"b.split_var2_0", "b.split_var2_1", "c"
)
)
})
test_that("splits also work when multiple strata and multiple split_at
values are given to the function", {
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata",
split = c("a", "b"), split_var = "split_var",
split_at = c(0.3, 0.6),
type = "global quantile"
)$new_strata
)),
c(
"a.split_var_(-0.51,0.16]", "a.split_var_(0.16,0.75]",
"a.split_var_[-1.71,-0.51]", "b.split_var_(-0.51,0.16]",
"b.split_var_(0.16,1.79]", "b.split_var_[-1.72,-0.51]", "c"
)
)
expect_equal(
as.vector(table(split_strata(
data = data_split,
strata = "strata",
split = c("a", "b"),
split_var = "split_var",
split_at = c(0.3, 0.6),
type =
"global quantile"
)$new_strata)),
c(6, 6, 3, 3, 7, 5, 12)
)
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata",
split = c("a", "b"), split_var = "split_var",
split_at = c(0.3, 0.6),
type = "local quantile"
)$new_strata
)),
c(
"a.split_var_(0.01,0.16]", "a.split_var_(0.16,0.75]",
"a.split_var_[-1.71,0.01]", "b.split_var_(-0.53,0.2]",
"b.split_var_(0.2,1.79]", "b.split_var_[-1.72,-0.53]", "c"
)
)
expect_equal(
as.vector(table(split_strata(
data = data_split,
strata = "strata",
split = c("a", "b"),
split_var = "split_var",
split_at = c(0.3, 0.6),
type =
"local quantile"
)$new_strata)),
c(4, 6, 5, 4, 6, 5, 12)
)
})
test_that("strata_split can define prior strata based on an
interaction of multiple columns", {
expect_equal(length(unique(
split_strata(
data = data_split, strata = c("strata", "strata2"),
split = "a.0", split_var = "split_var",
split_at = 0.5, type = "global quantile"
)$new_strata
)), 7)
})
test_that("when type is a quantile, input must be between 0 and 1 or
else an error occurs", {
expect_error(split_strata(
data = data_split, strata = "strata",
split = "a", split_var = "split_var",
split_at = 1.3, type = "global quantile"
),
"'probs' outside [0,1]",
fixed = TRUE
)
})
test_that("when a 'value' outside of the range of values is given,
a warning comes up", {
expect_warning(split_strata(
data = data_split, strata = "strata",
split = "a", split_var = "split_var",
split_at = 5, type = "value"
),
"value(s) of 'split_at' are outside of the range",
fixed = TRUE
)
})
test_that("order is preserved in dataframe with ids provided", {
data_split$id <- seq(1:42)
data_split_id <- dplyr::select(data_split, id, strata, split_var)
expect_equal(all(split_strata(
data = data_split, strata = "strata",
split = c("a", "b"),
split_var = "split_var",
split_at = 0.5,
type = "local quantile"
)$split_var ==
data_split$split_var), TRUE)
})
test_that("truncating the new strata name works properly", {
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata", split = "a",
split_var = "split_var", split_at = 0,
type = "value", trunc = "spl"
)$new_strata
)),
c(paste("a.spl_(0,", round(max(filter(
data_split, strata == "a"
)$split_var), digits = 2), "]",
sep = ""
), paste("a.spl_[",
round(min(filter(
data_split, strata == "a"
)$split_var),
digits = 2
), ",0]",
sep = ""
), "b", "c")
)
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata",
split = "a", split_var = "split_var",
split_at = 0, type = "value", trunc = 4
)$new_strata
)),
c(
paste("a.spli_(0,", round(max(filter(
data_split,
strata == "a"
)$split_var),
digits = 2
), "]", sep = ""),
paste("a.spli_[",
round(min(filter(data_split, strata == "a")$split_var),
digits = 2
), ",0]",
sep = ""
), "b", "c"
)
)
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata",
split = "a", split_var = "split_var",
split_at = 0, type = "value",
trunc = -2
)$new_strata
)),
c(
paste("a.ar_(0,", round(max(filter(
data_split,
strata == "a"
)$split_var),
digits = 2
), "]", sep = ""),
paste("a.ar_[", round(min(filter(
data_split,
strata == "a"
)$split_var),
digits = 2
), ",0]", sep = ""), "b", "c"
)
)
expect_equal(
sort(unique(
split_strata(
data = data_split, strata = "strata",
split = "a", split_var = "split_var",
split_at = 0, type = "value",
trunc = 50
)$new_strata
)),
c(
paste("a.split_var_(0,",
round(max(filter(data_split, strata == "a")$split_var),
digits = 2
), "]",
sep = ""
),
paste("a.split_var_[",
round(min(filter(data_split, strata == "a")$split_var),
digits = 2
), ",0]",
sep = ""
), "b", "c"
)
)
expect_error(split_strata(
data = data_split, strata = "strata",
split = "a", split_var = "split_var",
split_at = 0, type = "value",
trunc = c("split", "spli")
),
"'trunc' must be a single numeric or character",
fixed = TRUE
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.