Nothing
#' Check the balance of presences vs pseudoabsences among splits
#'
#' @param splits the data splits (an `rset` or `split` object), generated by a
#' function such as [spatialsample::spatial_block_cv()]
#' @param .col the column containing the presences
#' @returns a tibble of the number of presences and pseudoabsences in the
#' assessment and analysis set of each split (or training and testing in an
#' initial split)
#' @export
#' @examples
#' lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds",
#' package = "tidysdm"
#' ))
#' lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5)
#' check_splits_balance(lacerta_cv, class)
#'
check_splits_balance <- function(splits, .col) {
.col <- rlang::enquo(.col) %>%
rlang::quo_get_expr() %>%
rlang::as_string()
if (inherits(splits, "rset")) {
if (!(.col %in% names(splits$splits[[1]]$data))) {
stop(".col should be a column in the data used to generate the splits")
}
training_list <- lapply(splits$splits, function(x) {
table(
rsample::training(x) %>%
sf::st_drop_geometry() %>%
dplyr::pull(.col)
)
})
training_df <- do.call("rbind", training_list)
testing_list <- lapply(splits$splits, function(x) {
table(
rsample::testing(x) %>%
sf::st_drop_geometry() %>%
dplyr::pull(.col)
)
})
testing_df <- do.call("rbind", testing_list)
dimnames(testing_df)[[2]] <- paste0(dimnames(testing_df)[[2]], "_analysis")
dimnames(training_df)[[2]] <- paste0(
dimnames(training_df)[[2]],
"_assessment"
)
balance_df <- dplyr::bind_cols(training_df, testing_df)
} else if (inherits(splits, "rsplit")) {
if (!(.col %in% names(splits$data))) {
stop(".col should be a column in the data used to generate the splits")
}
training_df <- table(
rsample::training(splits) %>%
sf::st_drop_geometry() %>%
dplyr::pull(.col)
)
testing_df <- table(
rsample::testing(splits) %>%
sf::st_drop_geometry() %>%
dplyr::pull(.col)
)
# coerce to a df with only one row
balance_df <- as.data.frame(matrix(c(training_df, testing_df), nrow = 1))
names(balance_df) <- c(
paste0(names(testing_df), "_test"),
paste0(names(training_df), "_train")
)
balance_df <- tibble::as_tibble(balance_df)
} else {
stop("splits should be either a spatial_rset or a spatial_rsplit")
}
return(balance_df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.