testthat::test_that("test psuedo_density_mode_cluster_1d", {
set.seed(1)
data <- matrix(c(rnorm(mean = 2,n = 100),
rnorm(mean = -2, n = 100)),
byrow = T, nrow = 100)
sigma <- quantile(as.matrix(dist(data)), .2)
out <- psuedo_density_mode_cluster_1d(data, sigma = sigma,
maxT = 60, eps = 1e-05,
verbose = FALSE, list_out = TRUE)
df_out <- do.call(rbind, lapply(1:length(out[[2]]),
function(idx) data.frame(out[[2]][idx]) %>%
dplyr::mutate(idx = idx) %>%
tibble::rownames_to_column()))
if (FALSE){
plotly::ggplotly(df_out %>% ggplot() +
geom_point(aes(x=X1, y =X2, frame = idx)) +
geom_path(aes(x=X1, y=X2, group = rowname))
)
}
dist_mat <- as.matrix(dist(out[[1]]))
adjmatrix <- dist_mat <= .01
ig <- igraph::graph_from_adjacency_matrix(adjmatrix, mode = "undirected")
groupings <- igraph::components(ig, mode = "strong")$membership
# 2 completely disjoint groups 1:50 and 51:100
testthat::expect_equal(length(unique(groupings[1:50])), 1)
testthat::expect_equal(length(unique(groupings[51:100])), 1)
testthat::expect_equal(length(unique(groupings)), 2)
})
testthat::test_that("test mode_clustering_1d", {
set.seed(1)
data <- matrix(c(rnorm(mean = 2,n = 100),
rnorm(mean = -2, n = 100)),
byrow = T, nrow = 100)
sigma <- quantile(as.matrix(dist(data)), .2)
data_df <- data.frame(data) %>% tibble::rownames_to_column()
names(data_df)[2:3] <- c("x","y")
out <- mode_clustering_1d(data_df, position = 2:3, naming_info = 1,
sigma = sigma, verbose = F)
testthat::expect_equal(out[,1], data_df$rowname)
# 2 completely disjoint groups 1:50 and 51:100
testthat::expect_equal(length(unique(out$groupings[1:50])), 1)
testthat::expect_equal(length(unique(out$groupings[51:100])), 1)
testthat::expect_equal(length(unique(out$groupings)), 2)
})
testthat::test_that("test coverage_down_1d_single", {
ordered_sim_df <- data.frame(x = c(1,2,1.5,4,4.1))
e_cols = c("x")
# useful ordered dist:
# dist(ordered_sim_df)
#1 2 3 4
#2 1.0
#3 0.5 0.5
#4 3.0 2.0 2.5
#5 3.1 2.1 2.6 0.1
out <- coverage_down_1d_single(ordered_sim_df, e_cols)
testthat::expect_equal(out$min_cover_vec, c(0,1,.5,2,.5))
testthat::expect_equal(out$dist_mat, matrix(c(0, 1, 1, 2, 2,
NA, 1, 1, 2, 2,
NA, NA, .5, 2, 2,
NA, NA, NA, 2, 2,
NA, NA, NA, NA, .5),
byrow = T, nrow = 5))
# data fram with single element gets message about it
testthat::expect_message(out_base <- coverage_down_1d_single(ordered_sim_df[1, , drop = F], e_cols))
testthat::expect_equal(out_base$min_cover_vec,0)
testthat::expect_equal(out_base$dist_mat,matrix(0, nrow = 1, ncol = 1))
})
testthat::test_that("test coverage_down_1d_mult", {
# standard
sim_df <- data.frame(x = c(1,2,1.5,4,4.1, # first group
2.1, 1.1,4.1 , 1.6, 4.2 ) # second group
)
e_cols = c("x")
ordering_list <- list(c(1:5),
c(2,1,4,3) + 5, # allows for basically the same out as first group...
c(10)) # for a singleton
# this one shouldn't expect message - not actually used...
out <- coverage_down_1d_mult(sim_df[1:9,, drop = F], e_cols, ordering_list[1:2])
testthat::expect_message(out <- coverage_down_1d_mult(sim_df, e_cols, ordering_list))
# first group:
# useful ordered dist
# dist(ordered_sim_df)
#1 2 3 4
#2 1.0
#3 0.5 0.5
#4 3.0 2.0 2.5
#5 3.1 2.1 2.6 0.1
testthat::expect_equal(out[[1]]$min_cover_vec, c(0,1,.5,2,.5))
testthat::expect_equal(out[[1]]$dist_mat, matrix(c(0, 1, 1, 2, 2,
NA, 1, 1, 2, 2,
NA, NA, .5, 2, 2,
NA, NA, NA, 2, 2,
NA, NA, NA, NA, .5),
byrow = T, nrow = 5))
# second group:
testthat::expect_equal(out[[2]]$min_cover_vec, c(0,1,.5,2))
testthat::expect_equal(out[[2]]$dist_mat, matrix(c(0, 1, 1, 2,
NA, 1, 1, 2,
NA, NA, .5, 2,
NA, NA, NA, 2),
byrow = T, nrow = 4))
# singleton group:
testthat::expect_equal(out[[3]]$min_cover_vec, c(0))
testthat::expect_equal(out[[3]]$dist_mat, matrix(c(0),nrow = 1))
})
testthat::test_that("test my_pdist", {
# fixed 1d
df1 <- data.frame(x = 3:4)
df2 <- data.frame(x = 4:6)
dist_out <- my_pdist(df1, df2)
expected_dist_out <- matrix(c(1,2,3,
0,1,2),
byrow = T, nrow = 2)
testthat::expect_equal(dist_out, expected_dist_out)
# fixed 2d
df1 <- data.frame(x = 3:4,
y = 3:4)
df2 <- data.frame(x = 4:6,
x = 4:6)
dist_out <- my_pdist(df1, df2)
expected_dist_out <- matrix(c(1,2,3,
0,1,2) * sqrt(2),
byrow = T, nrow = 2)
testthat::expect_equal(dist_out, expected_dist_out)
# random square
set.seed(5)
df_r <- data.frame(x = rnorm(5),
y = rnorm(5))
dist_sq <- my_pdist(df_r,df_r)
testthat::expect_equivalent(dist_sq, as.matrix(dist(df_r)))
})
testthat::test_that("test inner_containment_conformal_score_mode_radius_1d", {
sim_df <- data.frame(x = c(1,2,1.5,4,4.1, # first group
2.1, 1.1,4.1 , 1.6, 4.2 ) # second group
)
e_cols = c("x")
list_grouping_id <- list(c(1:5),
c(2,1,4,3) + 5, # allows for basically the same out as first group...
c(10))
# natural structure for simulation_info_df
psuedo_density_df <- data.frame(psuedo_density = c(.4,.3,.2,.1,.05,
.31,.41,.11,.21,
.5))
mode_grouping <- data.frame(groupings = c(rep(1,5),
rep(2,4),
3))
suppressMessages(list_radius_info <- coverage_down_1d_mult(sim_df, e_cols = e_cols,
ordering_list = list_grouping_id,
verbose = FALSE))
# could be fed into EpiCompare::inner_expanding_info
# (but doesn't have grouping names...)
simulation_info_df <- psuedo_density_df %>%
cbind(mode_grouping) %>%
dplyr::mutate(ranking = rank(-.data$psuedo_density,
ties.method ="random")) %>% # no impact in example
dplyr::group_by(.data$groupings) %>%
dplyr::mutate(group_ranking = rank(-.data$psuedo_density,
ties.method ="random")) # no impact in example
test_df <- data.frame(x = c(4.1, 4.11, 0, 3))
# 4.1 and 4.11 will be contained with 4.1 in either the 1st or 2nd list
# both of the sim 4.1s are at the bottom of the pile in terms of density,
# with overall rankings 8 and 9 out of 10
# 0 will be contained by 1 (first list after we include 2), so it should have
# ranking associated with 2 (in the first list), which has a ranking of 5
# 3 will also be captured by 2.1 from the second list as it has a higher
# density than the 2 from the first list and same radius, associated with
# a rank of 4
# since the conformal scores are n_sims + 1 - rank, we should expect:
out_expected <- data.frame(row_index = as.character(1:4),
containment_val = 10+1 - c(8, 8,5,4))
out <- inner_containment_conformal_score_mode_radius_1d(df_row_group = test_df,
simulations_group_df = sim_df,
data_column_names = e_cols,
simulation_info_df = simulation_info_df,
list_radius_info = list_radius_info,
list_grouping_id = list_grouping_id,
verbose = FALSE)
out_raw <- out %>% dplyr::ungroup() %>% as.data.frame()
testthat::expect_equal(out_raw, out_expected)
# with their own names!
test_df2 <- data.frame(x = c(4.1,4.11, 0, 3),
names = c("alice", "bill", "chuck", "dave"))
out_expected2 <- data.frame(names = c("alice", "bill", "chuck", "dave"),
containment_val = 10+1 - c(8, 8,5,4))
out2 <- inner_containment_conformal_score_mode_radius_1d(df_row_group = test_df2,
simulations_group_df = sim_df,
data_column_names = e_cols,
simulation_info_df = simulation_info_df,
list_radius_info = list_radius_info,
list_grouping_id = list_grouping_id,
verbose = FALSE)
out2_raw <- out2 %>% dplyr::ungroup() %>% as.data.frame()
testthat::expect_equal(out2_raw, out_expected2)
# 2d:
sim_df_2d <- data.frame(x = c(1,2,1.5,4,4.1, # first group
2.1, 1.1,4.1 , 1.6, 4.2 ), # second group
y = c(1,2,1.5,4,4.1, # first group
2.1, 1.1,4.1 , 1.6, 4.2 ) # second group
)
test_df_2d <- data.frame(x = c(4.1, 4.11, 0, 3),
y = c(4.1, 4.11, 0, 3))
e_cols = c("x", "y")
out_2d <- inner_containment_conformal_score_mode_radius_1d(df_row_group = test_df_2d,
simulations_group_df = sim_df_2d,
data_column_names = e_cols,
simulation_info_df = simulation_info_df,
list_radius_info = list_radius_info,
list_grouping_id = list_grouping_id,
verbose = FALSE)
})
testthat::test_that("test simulation_based_conformal_1d_complex", {
create_smooth_function <- function(df){
eval(parse(text = paste0("smooth_function <- function(x,y){
tryCatch({
inner_ss <- smooth.spline(x,y, df = ",
df,
")
return(predict(inner_ss,x)$y)},
error = function(cond){
message(sprintf(\"returning y: error in size of x (%i < 4)\", length(x)))
return(y)
},
warning = function(cond){
message(sprintf(paste(\"returning y: error in size of x\",
\"relative to size of df (%i < %i)\"),
length(x), ", df, "))
return(y)
}
)
}")))
return(smooth_function)
}
inner_moving <- function(x, y, window = 5, fill_left = T,
fun = min){
n <- length(y)
if (n < window){
message(sprintf(paste("returning y: error in size of x",
"relative to size of window (%i < %i)"),
n, window))
return(y)
}
y_smooth <- rep(NA, n)
for (idx in window:n){
y_smooth[idx] <- fun(y[(idx-window+1):idx])
}
if (fill_left){
y_smooth[1:(window-1)] <- y_smooth[window]
}
return(y_smooth)
}
create_min_smooth_function <- function(window){
eval(parse(text = paste0("out_function <- function(x,y){
inner_moving(x,y, window = ", window,", fill_left = T, fun = min)
}")))
return(out_function)
}
smooth_functions <- list("window10" = create_min_smooth_function(10))
set.seed(50)
sim_df <- data.frame(matrix(c(rnorm(100, mean = -2), rnorm(100, mean = 2)),
byrow = TRUE, ncol = 2)) %>%
dplyr::rename(x = "X1", y = "X2")
test_df <- data.frame(x = c(2,-1, -2.5),
y = c(2,-3.5, 2.5))
if (FALSE){
sim_df %>% ggplot() +
geom_point(aes(x=x,y=y)) +
geom_point(data = test_df, aes(x=x,y=y),color ="red")
}
# we expect that the first will have a higher conformal score than the second,
# and the third will have a conformal score of 0
out <- simulation_based_conformal_1d_complex(truth_df = test_df,
simulations_df = sim_df,
smooth_functions = smooth_functions, #named list
data_column_names = c("x","y"),
.maxT = 100,
.eps = 1e-07,
.diff_eps = 1e-06, return_min = F)
#out$conformal_score
testthat::expect_true(all(out$conformal_score$vary >=
out$conformal_score$smooth_vary$window10))
testthat::expect_true(all(out$conformal_score$vary_nm >=
out$conformal_score$smooth_vary_nm$window10))
list_cs <- out$conformal_score[c("fixed", "fixed_nm", "vary", "vary_nm")]
list_cs[[5]] <- out$conformal_score$smooth_vary$window10
list_cs[[6]]<- out$conformal_score$smooth_vary_nm$window10
cs_mat_idx <- 1
for (cs_mat in list_cs){
testthat::expect_true(all(diff(cs_mat$containment_val) <= rep(0,2)))
if (!(cs_mat_idx %in% 3:4)){
testthat::expect_equal(cs_mat$containment_val[3], 0)
}
testthat::expect_gt(cs_mat$containment_val[1], 60) # should be higher - pretty central...
cs_mat_idx <- cs_mat_idx + 1
}
})
testthat::test_that("combine_via_modes", {
order_idx = c(1,2,5,3,4)
grouping <- c(0,1,1,0,0)
inner_sim_data <- c(0,0,.05,-.4,.1)
out <- combine_via_modes(inner_sim_data, order_idx, grouping)
testthat::expect_equal(out, c(0,0,.1,.1,.4))
})
testthat::test_that("test cs_across_radius, static", {
inner_sim_data <- c(0,1,2,4,8)
# ranking of sims: c(3,1,2,4,5)
radius_prop_range <- c(.05,.10,.2,.8,.5,.6,.75)
sigma_prop <- .5
# multiple observations --
# radius range expected:
obs_values <- c(-.01,.25,1.5,3,6.75,0)
out <- cs_across_radius(inner_sim_data = inner_sim_data,
obs_values = obs_values,
radius_prop_range = radius_prop_range,
sigma_prop = sigma_prop)
expected_rad_range <- c(0,1,1,2,4)[ceiling(radius_prop_range * length(inner_sim_data))]
# ^ min span radius for inner_sim_data
#
# when radius is 0 (1:3) all values are 0 (final value x= 0)
expected_cs_mat <- matrix(c(0,0,0,5,3,3,5, # (value = 0.0) when radius is 0, it's not closest to any (3 0s), then when rad = 1, close enough only to 0 (values 5,6=6-3), when rad == 2, captured by 1: (values 4,7=6-1)
0,0,0,5,5,5,5, # (value = .25) when radius is 0, it's not closest to any (3 0s), then less than rad away from 1 (so values 4:7:6-1)
0,0,0,5,5,5,5, # (value = 1.5) when radius is 0, it's not closest to any (3 0s), then less than rad away from 1 (so values 4:7:6-1)
0,0,0,5,4,4,5,# (value = 3) when radius is 0, it's not closest to any (3 0s), then closest to "2" (6-2), when rad == 2, captured by 1: (values 4,7=6-1
0,0,0,1,0,0,1, # (value = 6.23) when radius < 2, it's not closest to any (5 0s), when rad = 2 only in 8's orbit (6-5)
3,3,3,5,5,5,5), # (value = 0) starts closest to 0 (even though rad = 0) - so (3 values of 6-3), then closest to 1 (values 4:7=6-1)
byrow = TRUE,
nrow = length(obs_values))
testthat::expect_equal(expected_cs_mat, out$cs_mat)
testthat::expect_equal(expected_rad_range, out$radius_range)
# single observation --
obs_values2 <- c(1.5)
out2 <- cs_across_radius(inner_sim_data = inner_sim_data,
obs_values = obs_values2,
radius_prop_range = radius_prop_range,
sigma_prop = sigma_prop)
# same outcome...
testthat::expect_equal(out2$cs_mat, out$cs_mat[3,,drop = F])
testthat::expect_equal(out2$radius_range, out$radius_range)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.