Nothing
#' @title Calculate marginal responses of each variables.
#' @description Calculate the marginal responses of each variables within the model.
#' @param model (Any predictive model). In this package, it is `isolation_forest`.
#' It could be the item `model` of `POIsotree` made by function \code{\link{isotree_po}}.
#' @param var_occ (`data.frame`, `tibble`) The `data.frame` style table that
#' include values of environmental variables at occurrence locations.
#' @param variables (`stars`) The `stars` of environmental variables. It should have
#' multiple `attributes` instead of `dims`. If you have `raster` object instead, you
#' could use \code{\link{st_as_stars}} to convert it to `stars` or use
#' \code{\link{read_stars}} directly read source data as a `stars`. You also could
#' use item `variables` of `POIsotree` made by function \code{\link{isotree_po}}.
#' @param si (`integer`) The number of samples to generate response curves.
#' If it is too small, the response curves might be biased.
#' The default value is `1000`.
#' @param visualize (`logical`) if `TRUE`, plot the response curves.
#' The default is `FALSE`.
#' @return (`MarginalResponse`) A nested list of
#' \itemize{
#' \item{responses_cont (`list`) A list of response values of continuous variables}
#' \item{responses_cat (`list`) A list of response values of categorical variables}
#' }
#'
#' @seealso
#' \code{\link{plot.MarginalResponse}}
#'
#' @details
#' The values show how each environmental variable affects the modeling
#' prediction. They show how the predicted result changes as each environmental
#' variable is varied while keeping all other environmental variables at average
#' sample value. They might be hard to interpret if there are strongly correlated
#' variables. The users could use \code{\link{dim_reduce}} function to remove
#' the strong correlation from original environmental variable stack.
#'
#' @references
#' \itemize{
#' \item{Elith, Jane,
#' et al. "The evaluation strip: a new and robust method for plotting predicted
#' responses from species distribution models." \emph{Ecological modelling}
#' 186.3 (2005): 280-289.\doi{10.1016/j.ecolmodel.2004.12.007}}
#' }
#'
#' @importFrom dplyr select slice as_tibble pull n %>%
#' @importFrom stars st_as_stars
#' @importFrom stats predict setNames
#' @importFrom rlang :=
#' @export
#' @examples
#' # Using a pseudo presence-only occurrence dataset of
#' # virtual species provided in this package
#' library(dplyr)
#' library(sf)
#' library(stars)
#' library(itsdm)
#'
#' # Prepare data
#' data("occ_virtual_species")
#' obs_df <- occ_virtual_species %>% filter(usage == "train")
#' eval_df <- occ_virtual_species %>% filter(usage == "eval")
#' x_col <- "x"
#' y_col <- "y"
#' obs_col <- "observation"
#'
#' # Format the observations
#' obs_train_eval <- format_observation(
#' obs_df = obs_df, eval_df = eval_df,
#' x_col = x_col, y_col = y_col, obs_col = obs_col,
#' obs_type = "presence_only")
#'
#' env_vars <- system.file(
#' 'extdata/bioclim_tanzania_10min.tif',
#' package = 'itsdm') %>% read_stars() %>%
#' slice('band', c(1, 5, 12, 16))
#'
#' # With imperfect_presence mode,
#' mod <- isotree_po(
#' obs_mode = "imperfect_presence",
#' obs = obs_train_eval$obs,
#' obs_ind_eval = obs_train_eval$eval,
#' variables = env_vars, ntrees = 10,
#' sample_size = 0.8, ndim = 2L,
#' seed = 123L, nthreads = 1,
#' response = FALSE,
#' spatial_response = FALSE,
#' check_variable = FALSE)
#'
#' marginal_responses <- marginal_response(
#' model = mod$model,
#' var_occ = mod$vars_train,
#' variables = mod$variables)
#' plot(marginal_responses)
#' #'
marginal_response <- function(model,
var_occ,
variables,
si = 1000,
visualize = FALSE) {
# Check inputs
checkmate::assert_data_frame(var_occ)
checkmate::assert_int(si)
checkmate::assert_class(variables, 'stars')
stopifnot(length(dim(variables)) <= 2)
checkmate::assert_logical(visualize)
bands <- names(variables)
stopifnot(all(bands %in% colnames(var_occ)))
# Reformat data
## Split numeric and categorical
isfacor <- as.vector(sapply(variables, is.factor))
bands_cont <- bands[!isfacor]
bands_cat <- bands[isfacor]
# Do full prediction
## Raster
var_pred_full <- predict(variables, model)
## Stretch result to be comparable with other results
var_pred_full <- 1 - var_pred_full
# Numeric variables
## Not limited to data volume, could generate as many as possible
## pseudo observations in [min, max], so the function could make
## smoother curves.
if (length(bands_cont) > 0) {
mins <- sapply(bands_cont, function(nm) {
min(variables %>% pull(nm), na.rm = T)})
maxs <- sapply(bands_cont, function(nm) {
max(variables %>% pull(nm), na.rm = T)})
vals_cont <- do.call(
cbind, lapply(1:length(mins), function(m) {
seq(from = mins[m], to = maxs[m],
length.out = si)})) %>%
data.frame() %>%
setNames(bands_cont)
# Get means and expand
means <- lapply(bands, function(nm) {
if (nm %in% bands_cont) {
mean(var_occ %>% pull(nm), na.rm = T)
} else {
.mode(var_occ %>% pull(nm))
}}) %>%
setNames(bands) %>%
as_tibble() %>%
slice(rep(1:n(), each = nrow(vals_cont)))
responses_cont <- lapply(names(vals_cont), function(nm) {
vals_tmp <- means
vals_tmp <- vals_tmp %>%
mutate('{nm}' := vals_cont %>% pull(nm))
pred_tmp <- predict(model, vals_tmp)
pred_tmp <- 1 - pred_tmp
pred_tmp <- .stars_stretch(var_pred_full, new_values = pred_tmp)
data.frame(y = pred_tmp,
x = vals_cont %>% pull(nm)) %>%
setNames(c("y", "x"))
})
names(responses_cont) <- bands_cont
rm(mins, maxs, vals_cont, means)
} else responses_cont <- NULL
# Categorical variables
## The number of pseudo observations is limited to factor levels
## So categorical variables should generate one by one
if (length(bands_cat) > 0) {
responses_cat <- lapply(bands_cat, function(nm) {
vals_this <- var_occ %>% pull(nm) %>%
levels() %>% as.factor()
means_this <- lapply(bands, function(nm) {
if (nm %in% bands_cont) {
mean(var_occ %>% pull(nm), na.rm = T)
} else {
.mode(var_occ %>% pull(nm))
}}) %>%
setNames(bands) %>%
as_tibble() %>%
slice(rep(1:n(), each = length(vals_this)))
means_this <- means_this %>%
mutate('{nm}' := vals_this)
pred_tmp <- predict(model, means_this)
pred_tmp <- 1 - pred_tmp
pred_tmp <- .stars_stretch(var_pred_full, new_values = pred_tmp)
data.frame(y = pred_tmp,
x = vals_this) %>%
setNames(c("y", "x"))
}) %>% setNames(bands_cat)
} else responses_cat <- NULL
# Put together
responses <- list(responses_cont = responses_cont,
responses_cat = responses_cat)
class(responses) <- append("MarginalResponse", class(responses))
# Visualize
if (visualize) {
print(plot(responses))
}
# Return
return(responses)
}
# marginal_response end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.