Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
# xgboost uses data.table
data.table::setDTthreads(2)
RhpcBLASctl::blas_set_num_threads(2)
RhpcBLASctl::omp_set_num_threads(2)
download_data <- FALSE
# note that data were created in the overview vignette
## -----------------------------------------------------------------------------
library(tidysdm)
lacerta_ensemble
## -----------------------------------------------------------------------------
explainer_lacerta_ens <- explain_tidysdm(lacerta_ensemble)
## ----vip, fig.width=6, fig.height=4-------------------------------------------
library(DALEX)
vip_ensemble <- model_parts(explainer = explainer_lacerta_ens)
plot(vip_ensemble)
## ----pdp, fig.width=6, fig.height=4-------------------------------------------
pdp_bio05 <- model_profile(explainer_lacerta_ens, N = 500, variables = "bio05")
plot(pdp_bio05)
## -----------------------------------------------------------------------------
explainer_list <- explain_tidysdm(tidysdm::lacerta_ensemble, by_workflow = TRUE)
## ----profile, fig.width=6, fig.height=4---------------------------------------
profile_list <- lapply(explainer_list, model_profile,
N = 500,
variables = "bio05"
)
plot(profile_list)
## -----------------------------------------------------------------------------
library(tidysdm)
library(sf)
lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds",
package = "tidysdm"
))
## ----initial_split, fig.width=6, fig.height=4---------------------------------
set.seed(1005)
lacerta_initial <- spatial_initial_split(lacerta_thin,
prop = 1 / 5, spatial_block_cv
)
autoplot(lacerta_initial)
## -----------------------------------------------------------------------------
check_splits_balance(lacerta_initial, class)
## ----training_cv, fig.width=6, fig.height=4-----------------------------------
set.seed(1005)
lacerta_training <- training(lacerta_initial)
lacerta_cv <- spatial_block_cv(lacerta_training,
v = 5,
cellsize = grid_cellsize(lacerta_thin),
offset = grid_offset(lacerta_thin) + 0.00001
)
autoplot(lacerta_cv)
## -----------------------------------------------------------------------------
check_splits_balance(lacerta_cv, class)
## ----recipe-------------------------------------------------------------------
lacerta_rec_all <- recipe(lacerta_thin, formula = class ~ .)
lacerta_rec_uncor <- lacerta_rec_all %>%
step_rm(all_of(c(
"bio01", "bio02", "bio03", "bio04", "bio07", "bio08",
"bio09", "bio10", "bio11", "bio12", "bio14", "bio16",
"bio17", "bio18", "bio19", "altitude"
)))
lacerta_rec_uncor
## ----workflow_set-------------------------------------------------------------
lacerta_models <-
# create the workflow_set
workflow_set(
preproc = list(
uncor = lacerta_rec_uncor, # recipe for the glm
all = lacerta_rec_all, # recipe for the random forest
all = lacerta_rec_uncor # recipe for svm
),
models = list(
# the standard glm specs
glm = sdm_spec_glm(),
# rf specs with tuning
rf = sdm_spec_rf(),
# svm specs with tuning
svm = parsnip::svm_poly(
cost = tune::tune(),
degree = tune::tune()
) %>%
parsnip::set_engine("kernlab") %>%
parsnip::set_mode("classification")
),
# make all combinations of preproc and models,
cross = FALSE
) %>%
# tweak controls to store information needed later to create the ensemble
# note that we use the bayes version as we will use a Bayes search (see later)
option_add(control = stacks::control_stack_bayes())
## -----------------------------------------------------------------------------
rf_param <- lacerta_models %>%
# extract the rf workflow
extract_workflow("all_rf") %>%
# extract its parameters dials (used to tune)
extract_parameter_set_dials() %>%
# give it the predictors to finalize mtry
finalize(x = st_drop_geometry(lacerta_thin) %>% select(-class))
# now update the workflowset with the new parameter info
lacerta_models <- lacerta_models %>%
option_add(param_info = rf_param, id = "all_rf")
## ----tune_grid----------------------------------------------------------------
set.seed(1234567)
lacerta_models <-
lacerta_models %>%
workflow_map("tune_bayes",
resamples = lacerta_cv, initial = 8,
metrics = sdm_metric_set(), verbose = TRUE
)
## -----------------------------------------------------------------------------
autoplot(lacerta_models)
## ----build_stack, fig.width=6, fig.height=4-----------------------------------
library(stacks)
set.seed(1005)
lacerta_stack <-
# initialize the stack
stacks() %>%
# add candidate members
add_candidates(lacerta_models) %>%
# determine how to combine their predictions
blend_predictions() %>%
# fit candidates with non-zero weights (i.e. non-zero stacking coefficients)
fit_members()
autoplot(lacerta_stack, type = "weights")
## ----predict_test-------------------------------------------------------------
lacerta_testing <- testing(lacerta_initial)
lacerta_test_pred <-
lacerta_testing %>%
bind_cols(predict(lacerta_stack, ., type = "prob"))
## ----assess_test--------------------------------------------------------------
sdm_metric_set()(data = lacerta_test_pred, truth = class, .pred_presence)
## ----eval=download_data-------------------------------------------------------
# download_dataset("WorldClim_2.1_10m")
# climate_vars <- lacerta_rec_all$var_info %>%
# filter(role == "predictor") %>%
# pull(variable)
#
# climate_present <- pastclim::region_slice(
# time_ce = 1985,
# bio_variables = climate_vars,
# data = "WorldClim_2.1_10m",
# crop = iberia_poly
# )
## ----echo=FALSE, results='hide', eval=!download_data--------------------------
climate_present <- terra::readRDS(
system.file("extdata/lacerta_climate_present_10m.rds", package = "tidysdm")
)
climate_vars <- lacerta_rec_all$var_info %>%
filter(role == "predictor") %>%
pull(variable)
if (!all(climate_vars %in% names(climate_present))) {
stop("mismatched variables in the raster")
}
## ----plot_present, fig.width=6, fig.height=4----------------------------------
prediction_present <- predict_raster(lacerta_stack, climate_present,
type = "prob"
)
library(tidyterra)
ggplot() +
geom_spatraster(data = prediction_present, aes(fill = .pred_presence)) +
scale_fill_terrain_c() +
# plot presences used in the model
geom_sf(data = lacerta_thin %>% filter(class == "presence"))
## -----------------------------------------------------------------------------
library(tidysdm)
# load the dataset
lacerta_thin <- readRDS(system.file("extdata/lacerta_thin_all_vars.rds",
package = "tidysdm"
))
# create a topography variable with 3 levels based on altitude
lacerta_thin$topography <- cut(lacerta_thin$altitude,
breaks = c(-Inf, 200, 800, Inf),
labels = c("plains", "hills", "mountains")
)
table(lacerta_thin$topography)
## -----------------------------------------------------------------------------
# subset to variable of interest
lacerta_thin <- lacerta_thin %>% select(
class, bio05, bio06, bio12,
bio15, topography
)
lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) %>%
step_dummy(topography)
lacerta_rec
## -----------------------------------------------------------------------------
lacerta_prep <- prep(lacerta_rec)
summary(lacerta_prep)
## -----------------------------------------------------------------------------
lacerta_bake <- bake(lacerta_prep, new_data = lacerta_thin)
glimpse(lacerta_bake)
## -----------------------------------------------------------------------------
# define the models
lacerta_models <-
# create the workflow_set
workflow_set(
preproc = list(default = lacerta_rec),
models = list(
# the standard glm specs
glm = sdm_spec_glm(),
# rf specs with tuning
rf = sdm_spec_rf()
),
# make all combinations of preproc and models,
cross = TRUE
) %>%
# tweak controls to store information needed later to create the ensemble
option_add(control = control_ensemble_grid())
# tune
set.seed(100)
lacerta_cv <- spatial_block_cv(lacerta_thin, v = 3)
lacerta_models <-
lacerta_models %>%
workflow_map("tune_grid",
resamples = lacerta_cv, grid = 3,
metrics = sdm_metric_set(), verbose = TRUE
)
# fit the ensemble
lacerta_ensemble <- simple_ensemble() %>%
add_member(lacerta_models, metric = "boyce_cont")
## -----------------------------------------------------------------------------
lacerta_ensemble$workflow[[1]] %>% extract_fit_parsnip()
## -----------------------------------------------------------------------------
climate_present <- terra::readRDS(
system.file("extdata/lacerta_climate_present_10m.rds",
package = "tidysdm"
)
)
# first we add a topography variable to the climate data
climate_present$topography <- climate_present$altitude
climate_present$topography <- terra::classify(climate_present$topography,
rcl = c(-Inf, 200, 800, Inf),
include.lowest = TRUE,
brackets = TRUE
)
library(terra)
levels(climate_present$topography) <-
data.frame(ID = c(0, 1, 2), topography = c("plains", "hills", "mountains"))
# now we can predict
predict_factor <- predict_raster(lacerta_ensemble, climate_present)
plot(predict_factor)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.