blockCV: Spatial and Environmental Blocking for K-Fold and LOO Cross-Validation

## ----eval=FALSE---------------------------------------------------------------
#  # install stable version from CRAN
#  install.packages("blockCV", dependencies = TRUE)
#  
#  # install latest update from GitHub
#  remotes::install_github("rvalavi/blockCV", dependencies = TRUE)
#  

## ----message=TRUE, warning=TRUE-----------------------------------------------
# loading the package
library(blockCV)


## ----fig.height=5, fig.width=7.2, warning=FALSE, message=FALSE----------------
library(sf) # working with spatial vector data
library(terra) # working with spatial raster data
library(tmap) # plotting maps

# load raster data
# the pipe operator |> is available for R version 4.1 or higher
rasters <- system.file("extdata/au/", package = "blockCV") |>
  list.files(full.names = TRUE) |>
  terra::rast()


## ----fig.height=4.5, fig.width=7.1--------------------------------------------
# load species presence-absence data and convert to sf
points <- read.csv(system.file("extdata/", "species.csv", package = "blockCV"))
head(points)


## ----fig.height=4.5, fig.width=7.1--------------------------------------------
pa_data <- sf::st_as_sf(points, coords = c("x", "y"), crs = 7845)

## ----fig.height=4.5, fig.width=7.1--------------------------------------------
tm_shape(rasters[[1]]) +
  tm_raster(legend.show = FALSE, n = 30, palette = gray.colors(10)) +
  tm_shape(pa_data) +
  tm_dots(col = "occ", style = "cat", size = 0.1)


## ----results='hide', fig.keep='all', warning=FALSE, message=FALSE, fig.height=5, fig.width=7----
sb1 <- cv_spatial(x = pa_data,
                  column = "occ", # the response column (binary or multi-class)
                  k = 5, # number of folds
                  size = 350000, # size of the blocks in metres
                  selection = "random", # random blocks-to-fold
                  iteration = 50, # find evenly dispersed folds
                  biomod2 = TRUE) # also create folds for biomod2


## ----warning=FALSE, message=FALSE, fig.height=5, fig.width=7------------------
sb2 <- cv_spatial(x = pa_data,
                  column = "occ",
                  r = rasters, # optionally add a raster layer
                  k = 5, 
                  size = 350000, 
                  hexagon = FALSE, # use square blocks
                  selection = "random",
                  progress = FALSE, # turn off progress bar for vignette
                  iteration = 50, 
                  biomod2 = TRUE)


## ----warning=FALSE, message=FALSE, fig.height=5, fig.width=7------------------
# systematic fold assignment 
# and also use row/column for creating blocks instead of size
sb3 <- cv_spatial(x = pa_data,
                  column = "occ",
                  rows_cols = c(12, 10),
                  hexagon = FALSE,
                  selection = "systematic")


## ----warning=FALSE, message=FALSE, fig.height=5, fig.width=7------------------
# checkerboard block to CV fold assignment
sb4 <- cv_spatial(x = pa_data,
                  column = "occ",
                  size = 350000,
                  hexagon = FALSE,
                  selection = "checkerboard")


## ----warning=FALSE, message=FALSE, fig.height=5, fig.width=7------------------
tm_shape(sb4$blocks) +
  tm_fill(col = "folds", style = "cat")


## -----------------------------------------------------------------------------
# spatial clustering
set.seed(6)
scv <- cv_cluster(x = pa_data,
                  column = "occ", # optional: counting number of train/test records
                  k = 5)

## ----warning=FALSE, message=FALSE---------------------------------------------
# environmental clustering
set.seed(6)
ecv <- cv_cluster(x = pa_data,
                  column = "occ",
                  r = rasters,
                  k = 5, 
                  scale = TRUE)


## ----results='hide', fig.keep='all'-------------------------------------------
bloo <- cv_buffer(x = pa_data,
                  column = "occ",
                  size = 350000)


## ----fig.height=5, fig.width=7------------------------------------------------
nncv <- cv_nndm(x = pa_data,
                column = "occ",
                r = rasters,
                size = 350000,
                num_sample = 5000, 
                sampling = "regular",
                min_train = 0.1,
                plot = TRUE)


## ----warning=FALSE, message=FALSE, fig.height=6, fig.width=8------------------
cv_plot(cv = scv, 
        x = pa_data)


## ----warning=FALSE, message=FALSE, fig.height=5, fig.width=8------------------
cv_plot(cv = bloo,
        x = pa_data,
        num_plots = c(1, 50, 100)) # only show folds 1, 50 and 100


## ----warning=FALSE, message=FALSE, fig.height=5, fig.width=7------------------
cv_plot(cv = sb1,
        r = rasters,
        raster_colors = terrain.colors(10, alpha = 0.5),
        label_size = 4) 


## ----fig.height=4, fig.width=6------------------------------------------------
cv_similarity(cv = ecv, # the environmental clustering
              x = pa_data, 
              r = rasters, 
              progress = FALSE)


## ----results='hide', fig.keep='all', warning=FALSE, message=FALSE, fig.height=5, fig.width=7.2----
sac1 <- cv_spatial_autocor(r = rasters, 
                           num_sample = 5000)


## -----------------------------------------------------------------------------
# class of the output result
class(sac1)

## -----------------------------------------------------------------------------
# summary statistics of the output
summary(sac1)

## ----warning=FALSE, message=FALSE, fig.height=5, fig.width=7.2----------------
sac2 <- cv_spatial_autocor(x = pa_data, 
                           column =  "occ")


## ----eval=TRUE, fig.height=4, fig.width=7-------------------------------------
library(automap)

plot(sac2$variograms[[1]])


## ----eval=FALSE---------------------------------------------------------------
#  cv_block_size(r = rasters)
#  

## ----eval=FALSE---------------------------------------------------------------
#  cv_block_size(x = pa_data,
#                column = "occ") # optionally add the response
#  

## ----eval=FALSE---------------------------------------------------------------
#  cv_block_size(x = pa_data,
#                column = "occ",
#                r = rasters,
#                min_size = 2e5,
#                max_size = 9e5)
#

rvalavi/blockCV documentation built on Nov. 1, 2024, 2:56 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rvalavi/blockCV
Spatial and Environmental Blocking for K-Fold and LOO Cross-Validation

inst/doc/tutorial_1.R
In rvalavi/blockCV: Spatial and Environmental Blocking for K-Fold and LOO Cross-Validation

R Package Documentation

Browse R Packages

We want your feedback!

rvalavi/blockCV Spatial and Environmental Blocking for K-Fold and LOO Cross-Validation

inst/doc/tutorial_1.R In rvalavi/blockCV: Spatial and Environmental Blocking for K-Fold and LOO Cross-Validation

R Package Documentation

Browse R Packages

We want your feedback!

rvalavi/blockCV
Spatial and Environmental Blocking for K-Fold and LOO Cross-Validation

inst/doc/tutorial_1.R
In rvalavi/blockCV: Spatial and Environmental Blocking for K-Fold and LOO Cross-Validation