#' Use distance (buffer) around records to separate train and test folds
#'
#' This function generates spatially separated train and test folds by considering buffers of specified distance around each observation point.
#' This approach is a form of \emph{leave-one-out} cross-validation. Each fold is generated by excluding nearby observations around each testing
#' point within the specified distance (ideally the range of spatial autocorrelation, see \code{\link{spatialAutoRange}}). In this method the test set never
#' directly abuts a training presence or absence. For more information see the details section.
#'
#' When working with presence-background (presence and pseudo-absence) data (specified by \code{spDataType} argument), only presence records are used
#' for specifying the folds. Consider a target presence point. The buffer is defined around this target point, using the specified range (\code{theRange}).
#' The testing fold comprises the target presence point and all background points within the buffer (this is the default. If \code{addBG = FALSE} the bacground
#' points are ignored). Any non-target presence points inside the buffer are excluded. All points (presence and background) outside of buffer
#' are used for training set. The methods cycles through all the \emph{presence} data, so the number of folds is equal to the number of presence points in the dataset.
#'
#' For presence-absence data, folds are created based on all records, both presences and absences. As above, a target observation (presence or absence) forms a
#' test point, all presence and absence points other than the target point within the buffer are ignored, and the training set comprises all presences and
#' absences outside the buffer. Apart from the folds, the number of \emph{training-presence}, \emph{training-absence}, \emph{testing-presence} and \emph{testing-absence}
#' records is stored and returned in the \code{records} table. If \code{species = NULL} (no column with 0s and 1s are defined), the procedure is like presence-absence data.
#'
#'
#' @param speciesData A SpatialPointsDataFrame, SpatialPoints or sf object containing species data.
#' @param species Character. Indicating the name of the field in which species presence/absence data (0s and 1s) are stored. If \code{speceis = NULL}
#' the presence and absence data will be treated the same and only training and testing records will be counted.
#' @param theRange Numeric value of the specified range by which the training and testing datasets are separated (See \code{\link{spatialAutoRange}}).
#' This distance should be in \strong{\emph{metres}}. The range can be explored by \code{spatialAutoRange}.
#' @param spDataType Character input indicating the type of data. It can take two values, \strong{PA} for \emph{presence-absence} data and \strong{PB} for
#' \emph{presence-background} data, when \code{species} is not \code{NULL}. See the details section for more information on these two approaches.
#' @param addBG Logical. Add background points to the test set when \code{spDataType = "PB"}.
#' @param progress Logical. If TRUE a progress bar will be shown.
#'
#' @seealso \code{\link{spatialAutoRange}} for selecting buffer distance; \code{\link{spatialBlock}} and \code{\link{envBlock}} for
#' alternative blocking strategies; \code{\link{foldExplorer}} for visualisation of the generated folds.
#'
#' @return An object of class S3. A list of objects including:
#' \itemize{
#' \item{folds - a list containing the folds. Each fold has two vectors with the training (first) and testing (second) indices}
#' \item{k - number of the folds}
#' \item{range - the distance band to separated trainig and testing folds)}
#' \item{species - the name of the species (column), if provided}
#' \item{dataType - species data type}
#' \item{records - a table with the number of points in each category of training and testing}
#' }
#'
#' @export
#'
#' @examples
#' \dontrun{
#'
#' # import presence-absence species data
#' PA <- read.csv(system.file("extdata", "PA.csv", package = "blockCV"))
#' # coordinate reference system
#' Zone55s <- "+proj=utm +zone=55 +south +ellps=GRS80 +units=m +no_defs"
#' # make a SpatialPointsDataFrame object from data.frame
#' pa_data <- sp::SpatialPointsDataFrame(PA[,c("x", "y")], PA, proj4string=CRS(Zone55s))
#'
#' # buffering with presence-absence data
#' bf1 <- buffering(speciesData= pa_data,
#' species= "Species", # to count the number of presences and absences
#' theRange= 68000,
#' spDataType = "PA",
#' progress = T)
#'
#'
#' # import presence-background species data
#' PB <- read.csv(system.file("extdata", "PB.csv", package = "blockCV"))
#' # make a SpatialPointsDataFrame object from data.frame
#' pb_data <- sp::SpatialPointsDataFrame(PB[,c("x", "y")], PB, proj4string=CRS(Zone55s))
#'
#' # buffering with presence-background data
#' bf2 <- buffering(speciesData= pb_data,
#' species= "Species",
#' theRange= 68000,
#' spDataType = "PB",
#' addBG = TRUE, # add background data to testing folds
#' progress = T)
#'
#' # buffering with no species attribute
#' bf3 <- buffering(speciesData= pa_data,
#' theRange= 63300)
#'
#' }
buffering <- function(speciesData, species=NULL, theRange, spDataType="PA", addBG=TRUE, progress=TRUE){
if((methods::is(speciesData, "SpatialPoints") || methods::is(speciesData, "sf"))==FALSE){stop("speciesData should be SpatialPointsDataFrame, SpatialPoints or sf object")}
if(methods::is(speciesData, "sf")){
sfobj <- speciesData
} else{
sfobj <- sf::st_as_sf(speciesData)
}
speciesData$ID <- 1:length(speciesData)
if(is.null(sf::st_crs(sfobj))){
stop("The coordinate reference system of species data should be defined")
} else if(sp::is.projected(speciesData)){ # this is due to a recent change (Jan 2018) in the sf package that doesn't recognize the projected crs units. It will be fixed soon.
sf::st_crs(sfobj) <- NA
}
dmatrix <- sf::st_distance(sfobj)
distuni <- dmatrix[1,1] # take the unit to avoid using units package
distuni[1] <- theRange
foldList <- list()
if(!is.null(species)){
presences <- speciesData[speciesData@data[,species]==1,] # creating a layer of presence data
if(spDataType=="PB"){
prI <- which(speciesData@data[,species] == 1) # presence indices to loop through
n <- length(prI)
trainTestTable <- base::data.frame(trainPr=rep(0, n), trainAb=0, testPr=0, testAb=0)
if(progress==TRUE){
pb <- progress::progress_bar$new(format = " Progress [:bar] :percent in :elapsed",
total=n, clear=FALSE, width=75) # add progress bar
}
j <- 0
for(i in prI){ # loop through presences
j <- j + 1
trainSet <- which(dmatrix[i, ] > distuni)
if(addBG==TRUE){
test <- which(dmatrix[i, ] <= distuni)
inside <- speciesData[test, ]
testSet <- inside[inside@data[,species]==0,]$ID # creating IDs of bg data
testSet[length(testSet) + 1] <- i
} else{
testSet <- i
}
foldList[[j]] <- assign(paste0("fold", j), list(trainSet, testSet))
lnPrsences <- length(presences)
lnBackground <- length(speciesData) - lnPrsences
trainPoints <- speciesData[trainSet, ]
trainTestTable$trainPr[j] <- length(trainPoints[trainPoints@data[,species]==1,])
trainTestTable$trainAb[j] <- length(trainPoints[trainPoints@data[,species]!=1,])
trainTestTable$testPr[j] <- 1
if(addBG==TRUE){
trainTestTable$testAb[j] <- lnBackground - trainTestTable$trainAb[j]
} else{
trainTestTable$testAb[j] <- 0
}
if(progress==TRUE){
pb$tick() # update progress bar
}
}
} else if(spDataType=="PA"){
n <- length(speciesData)
trainTestTable <- base::data.frame(trainPr=rep(0, n), trainAb=0, testPr=0, testAb=0)
if(progress==TRUE){
pb <- progress::progress_bar$new(format = " Progress [:bar] :percent in :elapsed",
total=n, clear=FALSE, width=75) # add progress bar
}
for(i in 1:n){
trainSet <- which(dmatrix[i, ] > distuni)
testSet <- i
foldList[[i]] <- assign(paste0("fold", i), list(trainSet, testSet))
lnPrsences <- length(presences)
# lnAbsence <- length(speciesData) - lnPrsences
trainPoints <- speciesData[trainSet, ]
trainTestTable$trainPr[i] <- length(trainPoints[trainPoints@data[,species]==1,])
trainTestTable$trainAb[i] <- length(trainPoints[trainPoints@data[,species]!=1,])
if(speciesData@data[testSet, species] == 1){
trainTestTable$testPr[i] <- 1
trainTestTable$testAb[i] <- 0
} else{
trainTestTable$testPr[i] <- 0
trainTestTable$testAb[i] <- 1
}
if(progress==TRUE){
pb$tick() # update progress bar
}
}
}
} else{ # data with no species column
n <- length(speciesData)
trainTestTable <- base::data.frame(train=rep(0, n), test=0)
if(progress==TRUE){
pb <- progress::progress_bar$new(format = " Progress [:bar] :percent in :elapsed",
total=n, clear=FALSE, width=75) # add progress bar
}
for(i in 1:n){
trainSet <- which(dmatrix[i, ] > distuni)
testSet <- i
foldList[[i]] <- assign(paste0("fold", i), list(trainSet, testSet))
trainTestTable$train[i] <- length(trainSet)
trainTestTable$test[i] <- length(testSet)
if(progress==TRUE){
pb$tick() # update progress bar
}
}
}
theList <- list(folds=foldList, k=n, species=species, range=theRange, dataType=spDataType, records=trainTestTable)
class(theList) <- c("BufferedBlock")
return(theList)
}
#' @export
print.BufferedBlock <- function(x, ...){
print(class(x))
}
#' @export
summary.BufferedBlock <- function(object, ...){
print("Number of recoreds in each category")
print(object$records)
}
systematicNum <- function(layer, num=5){
n <- nrow(layer)
if(n %% num == 0){
a <- n/num
c <- rep(1:num, a)
} else {
a <- floor(n/num)
b <- n %% num
c <- c(rep(1:num, a), 1:b)
}
return(c)
}
#' Use spatial blocks to separate train and test folds
#'
#' This function creates spatially separated folds based on a pre-specified distance. It assigns blocks to the training and
#' testing folds randomly, systematically or in a checkerboard pattern. The distance (\code{theRange}) should be in \strong{metres},
#' regardless of the unit of the reference system of the input data (for more information see the details section). By default,
#' the function creates blocks according to the extent and shape of the study area, assuming that the user has considered the
#' landscape for the given species and case study.
#' Alternatively, blocks can be created based on species spatial data. This is especially useful when the
#' species data is not evenly dispersed in the whole region. Blocks can also be offset so the origin is not at the outer
#' corner of the rasters. Instead of providing a distance, the blocks can also be created by specifying a number of rows and
#' columns and divide the study area into vertical or horizontal bins, as presented in Wenger & Olden (2012) and Bahn & McGill (2012).
#' Finally, the blocks can be specified by a user-defined spatial polygon layer.
#'
#'
#' To keep the consistency, all the functions use \strong{metres} as their unit. In this function, when the input map
#' has geographic coordinate system (decimal degrees), the block size is calculated based on deviding \code{theRange} by
#' 111325 (the standard distance of a degree in metres, on the Equator) to change the unit to degree. This value is optional
#' and can be changed by user via \code{degMetre} argument.
#'
#' The \code{xOffset} and \code{yOffset} can be used to change the spatial position of the blocks. It can also be used to
#' assess the sensitivity of analysis results to shifting in the blocking arrangements. These options are available when \code{theRange}
#' is defined. By default the region is located in the middle of the blocks and by setting the offsets, the blocks will shift.
#'
#' Roberts et. al. (2017) suggest that blocks should be substantially bigger than the range of spatial
#' autocorrelation (in model residual) to obtain realistic error estimates, while a buffer with the size of
#' the spatial autocorrelation range would result in a good estimation of error. This is because of the so-called
#' edge effect (O'Sullivan & Unwin, 2014), whereby points located on the edges of the blocks of opposite sets are
#' not separated spatially. Blocking with a buffering strategy overcomes this issue (see \code{\link{buffering}}).
#'
#'
#' @inheritParams buffering
#' @param blocks A SpatialPolygons* or sf object to be used as the blocks. This can be a user defined polygon and it must cover all
#' the species points.
#' @param theRange Numeric value of the specified range by which blocks are created and training/testing data are separated.
#' This distance should be in \strong{metres}. The range could be explored by \code{spatialAutoRange()} and \code{rangeExplorer()} functions.
#' @param k Integer value. The number of desired folds for cross-validation. The default is \code{k = 5}.
#' @param selection Type of assignment of blocks into folds. Can be \strong{random} (default), \strong{systematic} or \strong{checkerboard} pattern (not working with user-defined blocks).
#' @param iteration Integer value. The number of attempts to create folds that fulfil the set requirement for minimum number
#' of points in each category (\emph{training-presence}, \emph{training-absence}, \emph{testing-presence}
#' and \emph{testing-absence}), as specified by \code{numLimit} value.
#' @param numLimit Integer value. The minimum number of points in each category of data (see above - \code{iterration}).
#' If \code{numLimit = NULL}, the most evenly dispersed number of records is chosen (given the number of iteration).
#' @param maskBySpecies Logical. If raster layer is provided and \code{maskBySpecies = TRUE}, the blocks will be
#' created based on the raster extent, but only those blocks covering species data is kept. The default is \code{TRUE}.
#' @param degMetre Integer. The conversion rate of metres to degree. See the details section for more information.
#' @param rasterLayer RasterLayer for visualisation. If provided, this will be used to specify the blocks covering the area.
#' @param border SpatialPolygons* or sf object to clip the block based on a border. This might increase the computation time.
#' @param showBlocks Logical. If TRUE the final blocks with fold numbers will be plotted. A raster layer could be specified
#' in \code{rasterlayer} argument to be as background.
#' @param biomod2Format Logical. Creates a matrix of folds that can be directly used in the \pkg{biomod2} package as
#' a \emph{DataSplitTable} for cross-validation.
#' @param progress Logical. If TRUE shows a progress bar when \code{numLimit = NULL} in random fold selection.
#' @param rows Integer value by which the area is divided into latitudinal bins.
#' @param cols Integer value by which the area is divided into longitudinal bins.
#' @param xOffset Numeric value between \strong{0} and \strong{1} for shifting the blocks horizontally.
#' The value is the proportion of block size.
#' @param yOffset Numeric value between \strong{0} and \strong{1} for shifting the blocks vertically. The value is the proportion of block size.
#'
#' @seealso \code{\link{spatialAutoRange}} and \code{\link{rangeExplorer}} for selecting block size; \code{\link{buffering}}
#' and \code{\link{envBlock}} for alternative blocking strategies; \code{\link{foldExplorer}} for visualisation of the generated folds.
#' @seealso For \emph{DataSplitTable} see \code{\link[biomod2]{BIOMOD_cv}} in \pkg{biomod2} package
#'
#' @references Bahn, V., & McGill, B. J. (2012). Testing the predictive performance of distribution models. Oikos, 122(3), 321–331.
#'
#' O’Sullivan, D., Unwin, D.J., 2010. Geographic Information Analysis, 2nd ed. John Wiley & Sons.
#'
#' Roberts et al., 2017. Cross-validation strategies for data with temporal, spatial, hierarchical,
#' or phylogenetic structure. Ecography. 40: 913-929.
#'
#' Wenger, S.J., Olden, J.D., 2012. Assessing transferability of ecological models: an underappreciated aspect of statistical
#' validation. Methods Ecol. Evol. 3, 260–267.
#'
#' @return An object of class S3. A list of objects including:
#' \itemize{
#' \item{folds - a list containing the folds. Each fold has two vectors with the training (first) and testing (second) indices}
#' \item{foldID - a vector of values indicating the number of the fold for each observation (each number corresponds to the same point in species data)}
#' \item{biomodTable - a matrix with the folds to be used in \pkg{biomod2} package}
#' \item{k - number of the folds}
#' \item{blocks - SpatialPolygon of the blocks}
#' \item{range - the distance band of separating trainig and testing folds, if provided}
#' \item{species - the name of the species (column), if provided}
#' \item{plots - ggplot object}
#' \item{records - a table with the number of points in each category of training and testing}
#' }
#' @export
#'
#' @examples
#' \dontrun{
#'
#' # load package data
#' awt <- raster::brick(system.file("extdata", "awt.grd", package = "blockCV"))
#' # import presence-absence species data
#' PA <- read.csv(system.file("extdata", "PA.csv", package = "blockCV"))
#' # make a SpatialPointsDataFrame object from data.frame
#' pa_data <- sp::SpatialPointsDataFrame(PA[,c("x", "y")], PA, proj4string=raster::crs(awt))
#'
#' # spatial blocking by specified range and random assignment
#' sb1 <- spatialBlock(speciesData = pa_data,
#' species = "Species",
#' theRange = 68000,
#' k = 5,
#' selection = 'random',
#' iteration = 250,
#' numLimit = NULL,
#' biomod2Format = TRUE,
#' xOffset = 0.3, # shift the blocks horizontally
#' yOffset = 0)
#'
#' # spatial blocking by row/column and systematic fold assignment
#' sb2 <- spatialBlock(speciesData = pa_data,
#' species = "Species",
#' rasterLayer = awt,
#' rows = 5,
#' cols = 8,
#' k = 5,
#' selection = 'systematic',
#' maskBySpecies = TRUE,
#' biomod2Format = TRUE)
#'
#' }
spatialBlock <- function(speciesData, species=NULL, blocks=NULL, rasterLayer=NULL, theRange=NULL, rows=NULL, cols=NULL, k=5,
selection='random', iteration=250, numLimit=NULL, maskBySpecies=TRUE, degMetre=111325, border=NULL,
showBlocks=TRUE, biomod2Format=TRUE, xOffset=0, yOffset=0, progress=TRUE){
if(selection != 'systematic' && selection != 'random' && selection != 'checkerboard'){stop("The selection argument must be 'systematic', 'random' or 'checkerboard'")}
chpattern <- FALSE
if(selection == 'checkerboard'){
chpattern <- TRUE
if(k != 2){
k <- 2
message("k has been set to 2 because of checkerboard fold selection")
}
}
if(methods::is(speciesData, "sf")){
speciesData <- sf::as_Spatial(speciesData)
}
if(methods::is(speciesData, "SpatialPoints") && !methods::is(speciesData, "SpatialPointsDataFrame")){
speciesData <- sp::SpatialPointsDataFrame(speciesData, data.frame(ID=1:length(speciesData)))
}
if(methods::is(border, "sf")){
border <- sf::as_Spatial(border)
}
if(is.null(blocks)){
if(is.null(rasterLayer)){
net <- rasterNet(speciesData, resolution=theRange, xbin=cols, ybin=rows, degree=degMetre, xOffset=xOffset, yOffset=yOffset, checkerboard=chpattern)
if(is.null(border)){
subBlocks <- raster::intersect(net, speciesData)
} else{
subBlocks <- crop(net, border)
}
} else{
if(is.null(border)){
if(maskBySpecies==FALSE){
subBlocks <- rasterNet(rasterLayer[[1]], mask=TRUE, resolution=theRange, xbin=cols, ybin=rows, degree=degMetre, xOffset=xOffset, yOffset=yOffset, checkerboard=chpattern)
} else{
net <- rasterNet(rasterLayer[[1]], resolution=theRange, xbin=cols, ybin=rows, degree=degMetre, xOffset=xOffset, yOffset=yOffset, checkerboard=chpattern)
subBlocks <- raster::intersect(net, speciesData)
}
} else{ # having a border
net <- rasterNet(rasterLayer[[1]], resolution=theRange, xbin=cols, ybin=rows, degree=degMetre, xOffset=xOffset, yOffset=yOffset, checkerboard=chpattern)
subBlocks <- crop(net, border)
}
}
} else{
if(methods::is(blocks, "SpatialPolygonsDataFrame")){
subBlocks <- raster::intersect(blocks, speciesData)
} else if(methods::is(blocks, "SpatialPolygons")){
df <- data.frame(ID=1:length(blocks))
blocks <- sp::SpatialPolygonsDataFrame(blocks, df, match.ID=FALSE)
subBlocks <- raster::intersect(blocks, speciesData)
if(!is.null(species)){
species <- NULL
message("The species argument has been set to NULL since there is no table associated with SpatialPoints object")
}
} else if(methods::is(blocks, "sf")){
blocks <- sf::as_Spatial(blocks)
subBlocks <- raster::intersect(blocks, speciesData)
} else{
stop("The input blocks should be a SpatialPolygons* object")
}
if(selection == 'checkerboard'){
selection <- 'systematic'
message("'checkerboard' fold selection does not work with user defined blocks. 'systematic' will be used instead.")
}
}
# given a wrong iteration, ensure the process will be run at least once
if(iteration < 1 || is.numeric(iteration)==FALSE){
iteration=1
message("The interation has been set to 1! \n", "Iteration must be a numeric value, equal or higher than one.")
} else if(is.numeric(iteration) && iteration >= 10000){
message("Due to the large number of iterations, the process might take a while")
}
if(progress==TRUE && is.null(numLimit)){
pb <- progress::progress_bar$new(format = " Progress [:bar] :percent in :elapsed",
total=iteration, clear=FALSE, width=75) # add progress bar
}
maxNumRecord <- 0
maxSD <- Inf
for(i in 1:iteration){
nrowBlocks <- length(subBlocks)
if(k > nrowBlocks){
stop("'k' is bigger than the number of the blocks")
} else if(k < 2){stop("'k' must be 2 or higher")}
if(selection=='systematic'){
subBlocks$folds <- systematicNum(subBlocks, k)
} else if(selection=='checkerboard'){
subBlocks$folds <- subBlocks$layer
} else if(selection=='random'){
# create random folds
subBlocks$folds <- 0
nrowBlocks <- length(subBlocks)
if(k==2){
subBlocks$folds[sample(nrowBlocks, nrowBlocks/2, replace=FALSE)] <- 1
unfold <- which(subBlocks$folds==0)
subBlocks$folds[unfold] <- 2
} else {
num <- floor(nrowBlocks / k)
for(kk in 1:num){ # while
unfold <- which(subBlocks$folds==0)
subBlocks$folds[sample(unfold, k, replace=FALSE)]=1:k
}
if(nrowBlocks %% k != 0){
rest <- nrowBlocks %% k
unfold <- which(subBlocks$folds==0)
subBlocks$folds[unfold]=1:rest
}
}
}
if(!is.null(species)){
presences <- speciesData[speciesData@data[,species]==1,] # creating a layer of presence data
trainTestTable <- base::data.frame(trainPr=rep(0, k), trainAb=0, testPr=0, testAb=0)
} else{
trainTestTable <- base::data.frame(train=rep(0, k), test=0)
}
foldList <- list()
foldNum <- rep(NA, nrow(speciesData))
biomodTable <- data.frame(RUN1=rep(TRUE, length(speciesData)))
for(p in 1:k){
sp.over <- sp::over(speciesData, subBlocks[subBlocks$folds==p, ]) # overlay layers to specify the inside & oudside points
trainSet <- which(is.na(sp.over[,1])) # exclude all the data from the bufer area
testSet <- which(!is.na(sp.over[,1]))
foldNum[testSet] <- p
foldList[[p]] <- assign(paste0("fold", p), list(trainSet, testSet))
if(!is.null(species)){
lnPrsences <- length(presences)
lnAbsences <- length(speciesData) - lnPrsences
trainPoints <- speciesData[which(is.na(sp.over[,1])),]
trainTestTable$trainPr[p] <- length(trainPoints[trainPoints@data[,species]==1,])
trainTestTable$trainAb[p] <- length(trainPoints[trainPoints@data[,species]!=1,])
trainTestTable$testPr[p] <- lnPrsences - trainTestTable$trainPr[p]
trainTestTable$testAb[p] <- lnAbsences - trainTestTable$trainAb[p]
} else{
trainTestTable$train[p] <- length(trainSet)
trainTestTable$test[p] <- length(testSet)
}
if(biomod2Format==TRUE){ # creating a biomod2 DataSplitTable for validation
colm <- paste0("RUN", p)
biomodTable[,colm] <- FALSE
biomodTable[,colm][trainSet] <- TRUE # biomodTable[trainSet,colm] <- TRUE # #### Change this!
}
}
if(selection=='systematic' || selection == 'checkerboard'){
# if(iteration > 1){message('Iteration has been set to 1 due to systematic selection')}
break
} else{ # do the rest if the selection is random
if(is.numeric(numLimit) && numLimit >= 0){
if(any(trainTestTable < numLimit)==FALSE){ # exit the loop if meet the limit number
break
}
} else if(is.null(numLimit)){ # find the highest minimum number in the table and store relevant objects
if(min(trainTestTable) >= maxNumRecord && stats::sd(unlist(trainTestTable)) < maxSD){
trainTestTable2 <- trainTestTable
maxNumRecord <- min(trainTestTable2)
maxSD <- stats::sd(unlist(trainTestTable))
subBlocks2 <- subBlocks
foldList2 <- foldList
foldNum2 <- foldNum
biomodTable2 <- biomodTable
iter <- i
}
} else (stop("numLimit argument should be a numeric value equal or hagher than 0 or be NULL"))
if(progress==TRUE && is.null(numLimit)){
pb$tick() # update progress bar
}
}
}
if(is.null(numLimit) && selection=='random'){ # return the optimum bloks, table etc.
subBlocks <- subBlocks2
trainTestTable <- trainTestTable2
foldList <- foldList2
foldNum <- foldNum2
biomodTable <- biomodTable2
print(paste0("The best fold was in iteration ", iter, ":"))
# print(trainTestTable)
}
if(!is.null(numLimit) && any(trainTestTable < numLimit)==TRUE){ # show a message if criteria is not met
if(numLimit==0){
message(paste("There are one or more folds with", numLimit, "records"))
} else{
message(paste("There are one or more folds with less than", numLimit, "records"))
}
}
print(trainTestTable)
centroids <- as.data.frame(sp::coordinates(subBlocks)); names(centroids) <- c("Easting", "Northing") # take the centroids
subBlocks@data <- cbind(subBlocks@data, centroids) # add the centeroids to blocks
polyObj <- ggplot2::fortify(subBlocks)
if(is.na(sp::proj4string(speciesData))){
mapext <- raster::extent(speciesData)[1:4]
if(mapext >= -180 && mapext <= 180){
xaxes <- "Longitude"
yaxes <- "Latitude"
} else {
xaxes <- "Easting"
yaxes <- "Northing"
}
} else{
if(sp::is.projected(speciesData)){
xaxes <- "Easting"
yaxes <- "Northing"
} else{
xaxes <- "Longitude"
yaxes <- "Latitude"
}
}
if(is.null(rasterLayer)){
p2 <- ggplot() +
geom_polygon(aes(x = long, y = lat, group=group),
data = polyObj, color ="red",
fill ="orangered4",
alpha = 0.04,
size = 0.2) +
geom_text(aes(label=folds, x=Easting, y=Northing), data=subBlocks@data) +
xlab(xaxes) + ylab(yaxes) +
coord_fixed() +
ggtitle('Spatial blocks', subtitle=paste("The", selection, 'fold assignment by', i, 'iteration'))
} else{
if(methods::is(rasterLayer, 'Raster')){
samp <- raster::sampleRegular(rasterLayer[[1]], 5e+05, asRaster=TRUE)
map.df <- raster::as.data.frame(samp, xy=TRUE, centroids=TRUE, na.rm=TRUE)
colnames(map.df) <- c("Easting", "Northing", "MAP")
mid <- mean(map.df$MAP)
p2 <- ggplot(data=map.df, aes(y=Northing, x=Easting)) +
geom_raster(aes(fill=MAP)) +
coord_fixed() +
scale_fill_gradient2(low="darkred", mid="yellow", high="darkgreen", midpoint=mid) +
guides(fill=FALSE) +
ggtitle('Spatial blocks', subtitle=paste("The", selection, 'fold assignment by', i, 'iteration')) +
geom_polygon(aes(x = long, y = lat, group=group),
data = polyObj, color ="red",
fill ="orangered4",
alpha = 0.04,
size = 0.2) +
xlab(xaxes) + ylab(yaxes) +
geom_text(aes(label=folds, x=Easting, y=Northing), data=subBlocks@data) # lable blocks
}
}
if(showBlocks==TRUE){
plot(p2)
}
# save the objects
if(biomod2Format==TRUE){
biomodTable <- as.matrix(biomodTable)
theList <- list(folds=foldList, foldID=foldNum, biomodTable=biomodTable, k=k, blocks=subBlocks, species=species,
range=theRange, plots=p2, records=trainTestTable)
} else{
theList <- list(folds=foldList, foldID=foldNum, biomodTable=NULL, k=k, blocks=subBlocks, species=species,
range=theRange, plots=p2, records=trainTestTable)
}
class(theList) <- c("SpatialBlock")
return(theList)
}
#' @export
print.SpatialBlock <- function(x, ...){
print(class(x))
}
#' @export
plot.SpatialBlock <- function(x, y, ...){
plot(x$plots)
message("Please use foldExplorer function to plot each fold interactively")
}
#' @export
summary.SpatialBlock <- function(object, ...){
print("Number of recoreds in each category")
print(object$records)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.