## ----12-spatial-cv-1, message=FALSE-----------------------------------------------------------------
library(dplyr)
library(future)
library(lgr)
library(mlr3)
library(mlr3learners)
library(mlr3extralearners)
library(mlr3spatiotempcv)
library(mlr3tuning)
library(mlr3viz)
library(progressr)
library(sf)
library(terra)
## ----12-spatial-cv-2--------------------------------------------------------------------------------
data("lsl", "study_mask", package = "spDataLarge")
ta = terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))
## ----lsl-map, echo=FALSE, out.width="70%", fig.cap="Landslide initiation points (red) and points unaffected by landsliding (blue) in Southern Ecuador.", fig.scap="Landslide initiation points."----
# library(tmap)
# data("lsl", package = "spDataLarge")
# ta = terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))
# lsl_sf = sf::st_as_sf(lsl, coords = c("x", "y"), crs = "EPSG:32717")
# hs = terra::shade(slope = ta$slope * pi / 180,
# terra::terrain(ta$elev, v = "aspect", unit = "radians"))
# # so far tmaptools does not support terra objects
#
# bbx = tmaptools::bb(raster::raster(hs), xlim = c(-0.0001, 1),
# ylim = c(-0.0001, 1), relative = TRUE)
#
# map = tm_shape(hs, bbox = bbx) +
# tm_grid(col = "black", n.x = 1, n.y = 1, labels.inside.frame = FALSE,
# labels.rot = c(0, 90), lines = FALSE) +
# tm_raster(palette = gray(0:100 / 100), n = 100, legend.show = FALSE) +
# tm_shape(ta$elev) +
# tm_raster(alpha = 0.5, palette = terrain.colors(10), legend.show = FALSE) +
# tm_shape(lsl_sf) +
# tm_bubbles("lslpts", size = 0.2, palette = "-RdYlBu",
# title.col = "Landslide: ") +
# tm_layout(inner.margins = 0) +
# tm_legend(bg.color = "white")
# tmap::tmap_save(map, filename = "images/lsl-map-1.png", width = 11,
# height = 11, units = "cm")
knitr::include_graphics("images/lsl-map-1.png")
## ----lslsummary, echo=FALSE, warning=FALSE----------------------------------------------------------
lsl_table = lsl |>
mutate(across(.cols = -any_of(c("x", "y", "lslpts")), ~signif(., 2)))
knitr::kable(lsl_table[c(1, 2, 350), ], caption = "Structure of the lsl dataset.",
caption.short = "`lsl` dataset.", booktabs = TRUE) |>
kableExtra::kable_styling(latex_options = "scale_down")
## ----12-spatial-cv-6--------------------------------------------------------------------------------
fit = glm(lslpts ~ slope + cplan + cprof + elev + log10_carea,
family = binomial(),
data = lsl)
## ----12-spatial-cv-7--------------------------------------------------------------------------------
class(fit)
fit
## ----12-spatial-cv-8--------------------------------------------------------------------------------
pred_glm = predict(object = fit, type = "response")
head(pred_glm)
## ----12-spatial-cv-9, eval=FALSE--------------------------------------------------------------------
## # making the prediction
## pred = terra::predict(ta, model = fit, type = "response")
## ----lsl-susc, echo=FALSE, out.width="70%",fig.cap="Spatial prediction of landslide susceptibility using a GLM.", fig.scap = "Spatial prediction of landslide susceptibility.", warning=FALSE----
# # attach study mask for the natural part of the study area
# data("lsl", "study_mask", package = "spDataLarge")
# ta = terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))
# study_mask = terra::vect(study_mask)
# lsl_sf = sf::st_as_sf(lsl, coords = c("x", "y"), crs = 32717)
# hs = terra::shade(ta$slope * pi / 180,
# terra::terrain(ta$elev, v = "aspect", unit = "radians"))
# bbx = tmaptools::bb(raster::raster(hs), xlim = c(-0.0001, 1),
# ylim = c(-0.0001, 1), relative = TRUE)
#
# map = tm_shape(hs, bbox = bbx) +
# tm_grid(col = "black", n.x = 1, n.y = 1, labels.inside.frame = FALSE,
# labels.rot = c(0, 90), lines = FALSE) +
# tm_raster(palette = "white", legend.show = FALSE) +
# # hillshade
# tm_shape(terra::mask(hs, study_mask), bbox = bbx) +
# tm_raster(palette = gray(0:100 / 100), n = 100,
# legend.show = FALSE) +
# # prediction raster
# tm_shape(terra::mask(pred, study_mask)) +
# tm_raster(alpha = 0.5, palette = "Reds", n = 6, legend.show = TRUE,
# title = "Susceptibility") +
# tm_layout(legend.position = c("left", "bottom"),
# legend.title.size = 0.9,
# inner.margins = 0)
# tmap::tmap_save(map, filename = "images/lsl-susc-1.png", width = 11,
# height = 11, units = "cm")
knitr::include_graphics("images/lsl-susc-1.png")
## ----12-spatial-cv-10, message=FALSE, eval=FALSE----------------------------------------------------
## pROC::auc(pROC::roc(lsl$lslpts, fitted(fit)))
## #> Area under the curve: 0.8216
## ----partitioning, fig.cap="Spatial visualization of selected test and training observations for cross-validation of one repetition. Random (upper row) and spatial partitioning (lower row).", echo=FALSE, fig.scap="Spatial visualization of selected test and training observations."----
knitr::include_graphics("images/13_partitioning.png")
## ----building-blocks, echo=FALSE, fig.height=4, fig.width=4, fig.cap="Basic building blocks of the mlr3 package. Source: @bischl_applied_2024. (Permission to reuse this figure was kindly granted.)", fig.scap="Basic building blocks of the mlr3 package."----
knitr::include_graphics("images/13_ml_abstraction_crop.png")
## ----12-spatial-cv-11, eval=FALSE-------------------------------------------------------------------
## # create task
## task = mlr3spatiotempcv::TaskClassifST$new(
## id = "ecuador_lsl",
## backend = mlr3::as_data_backend(lsl),
## target = "lslpts",
## positive = "TRUE",
## extra_args = list(
## coordinate_names = c("x", "y"),
## coords_as_features = FALSE,
## crs = "EPSG:32717")
## )
## ----autoplot, eval=FALSE---------------------------------------------------------------------------
## # plot response against each predictor
## mlr3viz::autoplot(task, type = "duo")
## # plot all variables against each other
## mlr3viz::autoplot(task, type = "pairs")
## ----12-spatial-cv-12, eval=FALSE-------------------------------------------------------------------
## mlr3extralearners::list_mlr3learners(
## filter = list(class = "classif", properties = "twoclass"),
## select = c("id", "mlr3_package", "required_packages")) |>
## head()
## ----lrns, echo=FALSE-------------------------------------------------------------------------------
lrns_df = mlr3extralearners::list_mlr3learners(
filter = list(class = "classif", properties = "twoclass"),
select = c("id", "mlr3_package", "required_packages")) |>
head()
# dput(lrns_df)
# lrns_df = structure(list(Class = c("classif.adaboostm1", "classif.binomial",
# "classif.featureless", "classif.fnn", "classif.gausspr", "classif.IBk"
# ), Name = c("ada Boosting M1", "Binomial Regression", "Featureless classifier",
# "Fast k-Nearest Neighbour", "Gaussian Processes", "k-Nearest Neighbours"
# ), `Short name` = c("adaboostm1", "binomial", "featureless",
# "fnn", "gausspr", "ibk"), Package = c("RWeka", "stats", "mlr",
# "FNN", "kernlab", "RWeka")), row.names = c(NA, 6L), class = "data.frame")
knitr::kable(lrns_df,
caption = paste("Sample of available learners for binomial",
"tasks in the mlr3 package."),
caption.short = "Sample of available learners.", booktabs = TRUE)
## ----12-spatial-cv-13, eval=FALSE-------------------------------------------------------------------
## learner = mlr3::lrn("classif.log_reg", predict_type = "prob")
## ----12-spatial-cv-14, eval=FALSE-------------------------------------------------------------------
## learner$help()
## ----12-spatial-cv-15, eval=FALSE-------------------------------------------------------------------
## learner$train(task)
## learner$model
## ----12-spatial-cv-16, eval=FALSE, echo=FALSE-------------------------------------------------------
## learner$model$formula
## task$data()
## learner$model
## ----12-spatial-cv-17, eval=FALSE-------------------------------------------------------------------
## fit = glm(lslpts ~ ., family = binomial(link = "logit"),
## data = select(lsl, -x, -y))
## identical(fit$coefficients, learner$model$coefficients)
## ----12-spatial-cv-18, eval=FALSE-------------------------------------------------------------------
## resampling = mlr3::rsmp("repeated_spcv_coords", folds = 5, repeats = 100)
## ----12-spatial-cv-19, eval=FALSE-------------------------------------------------------------------
## # reduce verbosity
## lgr::get_logger("mlr3")$set_threshold("warn")
## # run spatial cross-validation and save it to resample result glm (rr_glm)
## rr_spcv_glm = mlr3::resample(task = task,
## learner = learner,
## resampling = resampling)
## # compute the AUROC as a data.table
## score_spcv_glm = rr_spcv_glm$score(measure = mlr3::msr("classif.auc"))
## # keep only the columns you need
## score_spcv_glm = score_spcv_glm[, .(task_id, learner_id, resampling_id, classif.auc)]
## ----12-spatial-cv-21-------------------------------------------------------------------------------
score = readRDS("extdata/12-bmr_score.rds")
score_spcv_glm = score[learner_id == "classif.log_reg" &
resampling_id == "repeated_spcv_coords"]
## ----12-spatial-cv-22-------------------------------------------------------------------------------
mean(score_spcv_glm$classif.auc) |>
round(2)
## ----boxplot-cv, echo=FALSE, out.width="75%", fig.cap="Boxplot showing the difference in GLM AUROC values on spatial and conventional 100-repeated 5-fold cross-validation.", fig.scap="Boxplot showing AUROC values."----
library(ggplot2)
# rename the levels of resampling_id
score[, resampling_id := as.factor(resampling_id) |>
forcats::fct_recode("conventional CV" = "repeated_cv",
"spatial CV" = "repeated_spcv_coords") |>
forcats::fct_rev()]
# create the boxplot
ggplot2::ggplot(data = score[learner_id == "classif.log_reg"],
mapping = ggplot2::aes(x = resampling_id, y = classif.auc)) +
ggplot2::geom_boxplot(fill = c("lightblue2", "mistyrose2")) +
ggplot2::theme_bw() +
ggplot2::labs(y = "AUROC", x = "")
## ----12-spatial-cv-23-------------------------------------------------------------------------------
mlr3_learners = list_mlr3learners()
mlr3_learners[class == "classif" & grepl("svm", id),
.(id, class, mlr3_package, required_packages)]
## ----12-spatial-cv-24-------------------------------------------------------------------------------
lrn_ksvm = mlr3::lrn("classif.ksvm", predict_type = "prob", kernel = "rbfdot")
lrn_ksvm$fallback = lrn("classif.featureless", predict_type = "prob")
## ----12-spatial-cv-25-------------------------------------------------------------------------------
# performance estimation level
perf_level = mlr3::rsmp("repeated_spcv_coords", folds = 5, repeats = 100)
## ----inner-outer, echo=FALSE, fig.cap="Schematic of hyperparameter tuning and performance estimation levels in CV. (Figure was taken from Schratz et al. (2019). Permission to reuse it was kindly granted.)", fig.scap="Schematic of hyperparameter tuning."----
knitr::include_graphics("images/13_cv.png")
## ----12-spatial-cv-26, eval=FALSE-------------------------------------------------------------------
## # five spatially disjoint partitions
## tune_level = mlr3::rsmp("spcv_coords", folds = 5)
## # use 50 randomly selected hyperparameters
## terminator = mlr3tuning::trm("evals", n_evals = 50)
## tuner = mlr3tuning::tnr("random_search")
## # define the outer limits of the randomly selected hyperparameters
## search_space = paradox::ps(
## C = paradox::p_dbl(lower = -12, upper = 15, trafo = function(x) 2^x),
## sigma = paradox::p_dbl(lower = -15, upper = 6, trafo = function(x) 2^x)
## )
## ----12-spatial-cv-27, eval=FALSE-------------------------------------------------------------------
## at_ksvm = mlr3tuning::AutoTuner$new(
## learner = lrn_ksvm,
## resampling = tune_level,
## measure = mlr3::msr("classif.auc"),
## search_space = search_space,
## terminator = terminator,
## tuner = tuner
## )
## ----future, eval=FALSE-----------------------------------------------------------------------------
## library(future)
## # execute the outer loop sequentially and parallelize the inner loop
## future::plan(list("sequential", "multisession"),
## workers = floor(availableCores() / 2))
## ----12-spatial-cv-30, eval=FALSE-------------------------------------------------------------------
## progressr::with_progress(expr = {
## rr_spcv_svm = mlr3::resample(task = task,
## learner = at_ksvm,
## # outer resampling (performance level)
## resampling = perf_level,
## store_models = FALSE,
## encapsulate = "evaluate")
## })
##
## # stop parallelization
## future:::ClusterRegistry("stop")
## # compute the AUROC values
## score_spcv_svm = rr_spcv_svm$score(measure = mlr3::msr("classif.auc"))
## # keep only the columns you need
## score_spcv_svm = score_spcv_svm[, .(task_id, learner_id, resampling_id, classif.auc)]
## ----12-spatial-cv-31-------------------------------------------------------------------------------
score = readRDS("extdata/12-bmr_score.rds")
score_spcv_svm = score[learner_id == "classif.ksvm.tuned" &
resampling_id == "repeated_spcv_coords"]
## ----12-spatial-cv-33-------------------------------------------------------------------------------
# final mean AUROC
round(mean(score_spcv_svm$classif.auc), 2)
## ---- echo=FALSE, results='asis'--------------------------------------------------------------------
res = knitr::knit_child('_12-ex.Rmd', quiet = TRUE, options = list(include = FALSE, eval = FALSE))
cat(res, sep = '\n')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.