Nothing
#' @title Calculate independent responses of each variables.
#' @description Calculate the independent responses of each variables
#' within the model.
#' @param model (Any predictive model). It is `isolation_forest` here. It could
#' be the item `model` of `POIsotree` made by function \code{\link{isotree_po}}.
#' @param var_occ (`data.frame`, `tibble`) The `data.frame` style table that
#' include values of environmental variables at occurrence locations.
#' @param variables (`stars`) The `stars` of environmental variables.
#' It should have multiple `attributes` instead of `dims`.
#' If you have `raster` object instead, you
#' could use \code{\link{st_as_stars}} to convert it to `stars` or use
#' \code{\link{read_stars}} directly read source data as a `stars`.
#' You also could use item `variables` of `POIsotree` made by function
#' \code{\link{isotree_po}}.
#' @param si (`integer`) The number of samples to generate response curves.
#' If it is too small, the response curves might be biased.
#' The default value is `1000`.
#' @param visualize (`logical`) if `TRUE`, plot the response curves.
#' The default is `FALSE`.
#'
#' @return (`IndependentResponse`) A list of
#' \itemize{
#' \item{responses_cont (`list`) A list of response values of continuous
#' variables}
#' \item{responses_cat (`list`) A list of response values of categorical
#' variables}
#' }
#'
#' @details
#' The values show how each environmental variable independently
#' affects the modeling prediction. They show how the predicted result
#' only using this variable changes as it is varied.
#'
#' @references
#' \itemize{
#' \item{Elith, Jane,
#' et al. "The evaluation strip: a new and robust method for plotting predicted
#' responses from species distribution models." \emph{Ecological modelling}
#' 186.3 (2005): 280-289.\doi{10.1016/j.ecolmodel.2004.12.007}}
#' }
#'
#' @seealso
#' \code{\link{plot.IndependentResponse}}
#'
#' @importFrom isotree isolation.forest
#' @importFrom dplyr select slice
#' @importFrom stars st_as_stars
#' @importFrom stats predict setNames
#' @importFrom rlang :=
#' @importFrom tidyselect all_of
#' @export
#' @examples
#' # Using a pseudo presence-only occurrence dataset of
#' # virtual species provided in this package
#' library(dplyr)
#' library(sf)
#' library(stars)
#' library(itsdm)
#'
#' data("occ_virtual_species")
#' obs_df <- occ_virtual_species %>% filter(usage == "train")
#' eval_df <- occ_virtual_species %>% filter(usage == "eval")
#' x_col <- "x"
#' y_col <- "y"
#' obs_col <- "observation"
#'
#' # Format the observations
#' obs_train_eval <- format_observation(
#' obs_df = obs_df, eval_df = eval_df,
#' x_col = x_col, y_col = y_col, obs_col = obs_col,
#' obs_type = "presence_only")
#'
#' env_vars <- system.file(
#' 'extdata/bioclim_tanzania_10min.tif',
#' package = 'itsdm') %>% read_stars() %>%
#' slice('band', c(1, 5, 12, 16))
#'
#' # With imperfect_presence mode,
#' mod <- isotree_po(
#' obs_mode = "imperfect_presence",
#' obs = obs_train_eval$obs,
#' obs_ind_eval = obs_train_eval$eval,
#' variables = env_vars, ntrees = 10,
#' sample_size = 0.8, ndim = 2L,
#' seed = 123L, nthreads = 1,
#' response = FALSE,
#' spatial_response = FALSE,
#' check_variable = FALSE)
#'
#' independent_responses <- independent_response(
#' model = mod$model,
#' var_occ = mod$vars_train,
#' variables = mod$variables)
#' plot(independent_responses)
#'
independent_response <- function(model,
var_occ,
variables,
si = 1000,
visualize = FALSE) {
# Check inputs
checkmate::assert_data_frame(var_occ)
checkmate::assert_int(si)
checkmate::assert_class(variables, 'stars')
stopifnot(length(dim(variables)) <= 2)
checkmate::assert_logical(visualize)
bands <- names(variables)
stopifnot(all(bands %in% colnames(var_occ)))
# Do full prediction
## Raster
var_pred_full <- predict(variables, model)
## Stretch result to be comparable with other results
var_pred_full <- 1 - var_pred_full
# Reformat data
## Make models
models <- lapply(bands, function(nm) {
ind_var_occ <- var_occ %>% select(all_of(nm))
# Remove and modify some arguments not works
# for single-variable model, other arguments
# inherit from model object.
args_iforest <- c(
list(data = ind_var_occ, seed = model$random_seed,
nthreads = model$nthreads),
model$params
)
args_iforest$ndim <- 1
args_iforest$ncols_per_tree <- min(ncol(args_iforest$data),
args_iforest$ncols_per_tree)
if (args_iforest$new_categ_action == "impute") {
args_iforest$new_categ_action <- "weighted"
}
do.call(isolation.forest, args_iforest)}) %>%
setNames(bands)
## Split numeric and categorical
isfacor <- as.vector(sapply(variables, is.factor))
bands_cont <- bands[!isfacor]
bands_cat <- bands[isfacor]
# Numeric variables
## Not limited to data volume, could generate as many as possible
## pseudo observations in [min, max], so the function could make
## smoother curves.
if (length(bands_cont) > 0) {
mins <- sapply(bands_cont, function(nm) {
min(variables %>% select(nm) %>% pull, na.rm = T)})
maxs <- sapply(bands_cont, function(nm) {
max(variables %>% select(nm) %>% pull, na.rm = T)})
vals_cont <- do.call(cbind, lapply(1:length(mins), function(n) {
seq(from = mins[n], to = maxs[n],
length.out = si)})) %>%
data.frame() %>%
setNames(bands_cont)
responses_cont <- lapply(names(vals_cont), function(nm) {
vals_tmp <- vals_cont %>% select(nm)
pred_tmp <- predict(models[[nm]], vals_tmp)
pred_tmp <- 1 - pred_tmp
pred_tmp <- .stars_stretch(var_pred_full, new_values = pred_tmp)
data.frame(y = pred_tmp,
x = vals_cont %>% pull(nm)) %>%
setNames(c("y", "x"))
})
names(responses_cont) <- bands_cont
rm(mins, maxs, vals_cont)
} else responses_cont <- NULL
# Categorical variables
## The number of pseudo observations is limited to factor levels
## So categorical variables should generate one by one
if (length(bands_cat) > 0) {
responses_cat <- lapply(bands_cat, function(nm) {
vals_this <- var_occ %>% pull(nm) %>%
levels() %>% as.factor() %>%
data.frame() %>% setNames(nm)
pred_tmp <- predict(models[[nm]], vals_this)
pred_tmp <- 1 - pred_tmp
pred_tmp <- .stars_stretch(var_pred_full, new_values = pred_tmp)
data.frame(y = pred_tmp,
x = vals_this %>% pull(nm)) %>%
setNames(c("y", "x"))
}) %>% setNames(bands_cat)
} else responses_cat <- NULL
# Put together
responses <- list(responses_cont = responses_cont,
responses_cat = responses_cat)
class(responses) <- append("IndependentResponse", class(responses))
# Visualize
if (visualize) {
print(plot(responses))
}
# Return
return(responses)
}
# independent_response end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.