#' Prepare a base object to perform further analyses
#'
#' @description prepare an S3 object that will serve as the base to perform all
#' further analyses. This object will contain geographic and environmental
#' information that will be used to characterize the region of interest.
#'
#' @param region SpatVector of the region of interest; for instance,
#' a country, another type of administrative are, or a protected area.
#' @param variables SpatRaster of environmental variables.
#' @param mask (optional) SpatVector object to mask \code{variables} and
#' reduce \code{region} to an area that is more relevant for analysis (e.g.,
#' only areas with natural vegetation cover). Default = NULL.
#' @param preselected_sites data.frame containing sites that must be included
#' in posterior selections of sites for the survey system. Columns must be:
#' "Sites", "Longitude", "Latitude", in that order.
#' @param do_pca (logical) whether or not to perform a principal component
#' analysis. Default = FALSE.
#' @param center (logical) whether or not to center variables. Argument to be
#' passed to the function \code{\link[stats]{prcomp}}. Default = TRUE.
#' @param scale (logical) whether or not to scale the variables. Recommended
#' when variables are in different units. Argument to be passed to the function
#' \code{\link[stats]{prcomp}}. Default = FALSE.
#' @param variables_in_matrix (character) name of variables to include in
#' matrix. If NULL (the default) all variables will be included.
#' @param verbose (logical) whether or not to print messages about the process.
#' Default = TRUE.
#'
#' @return
#' An S3 object of class \code{\link{master_matrix}} containing the following
#' elements:
#' - data_matrix: a date.frame with information about geographic location of
#' raster cells, initial environmental data, and if \code{do_pca} is TRUE,
#' the first two principal components derived from original data.
#' - region: a SpatVector representing the region of interest.
#' - mask: SpatVector object used. NULL if \code{mask} was not defined.
#' - preselected_sites: sites defined by used. NULL if \code{preselected_sites}
#' was not defined.
#' - raster_base: a SpatRaster representing the raster masked to region or
#' mask, which will be used for plotting purposes.
#' - PCA_results: if \code{do_pca} is TRUE, other results from principal
#' component analysis. If FALSE, PCA_results element of the object is NULL.
#'
#' @details
#' This function helps in preparing all data as needed for further analyses
#' aiming to define a survey sampling system considering geographic and
#' environmental spaces in the \code{region} of interest.
#'
#' If \code{mask} is defined all analyses will be restricted to such an area.
#' If \code{mask} is not fully contained by \code{region}, the mask used for
#' reducing \code{variables}, and returned as part of the S3 object
#' (master_matrix) is the intersection between them.
#'
#' If \code{preselected_sites} is defined, environmental values and, if
#' \code{do_pca} = TRUE, principal components are added to such records. These
#' records and their characteristics will be considered in further analyses.
#'
#' @usage
#' prepare_master_matrix(region, variables, mask = NULL,
#' preselected_sites = NULL, do_pca = FALSE,
#' center = TRUE, scale = FALSE,
#' variables_in_matrix = NULL, verbose = TRUE)
#'
#' @export
#' @importFrom terra crop as.data.frame extract
#' @importFrom stats prcomp predict
#'
#' @examples
#' # Data
#' mx <- terra::vect(system.file("extdata/mx.gpkg", package = "biosurvey"))
#'
#' variables <- terra::rast(system.file("extdata/variables.tif",
#' package = "biosurvey"))
#'
#' # Create master matrix object
#' m_matrix <- prepare_master_matrix(region = mx, variables = variables,
#' do_pca = TRUE, center = TRUE, scale = TRUE)
prepare_master_matrix <- function(region, variables, mask = NULL,
preselected_sites = NULL, do_pca = FALSE,
center = TRUE, scale = FALSE,
variables_in_matrix = NULL, verbose = TRUE) {
# Initial tests
if (missing(region)) {
stop("Argument 'region' must be defined")
}
if (!class(region)[1] == "SpatVector") {
stop("'region' must be of class SpatVector")
}
if (missing(variables)) {
stop("Argument 'variables' must be defined")
}
if (!class(variables)[1] == "SpatRaster") {
stop("'variables' must be of class SpatRaster")
}
if (!is.null(mask)) {
if (!class(mask)[1] == "SpatVector") {
stop("'mask' must be of class SpatVector")
}
}
# Mask variables to polygons (region of interest)
if (verbose == TRUE) {
message("Processing raster layers, please wait...")
}
if (!is.null(mask)) {
variables <- terra::crop(variables, mask, mask = TRUE)
} else {
variables <- terra::crop(variables, region, mask = TRUE)
}
# Adding user defined points
if (!is.null(preselected_sites)) {
if (class(preselected_sites)[1] != "data.frame") {
stop("Argument 'preselected_sites' must be of class data.frame.")
}
preselected_sites <- data.frame(preselected_sites,
terra::extract(
variables,
preselected_sites[, 2:3])[, -1]
)
colnames(preselected_sites)[2:3] <- c("Longitude", "Latitude")
}
# Base raster
b_raster <- variables[[1]]
names(b_raster) <- "base"
b_raster[!is.na(b_raster[])] <- 1
#b_raster <- as(b_raster,"SpatialPolygonsDataFrame")
# Raster to matrix
variables <- terra::as.data.frame(variables, xy = TRUE)
colnames(variables)[1:2] <- c("Longitude", "Latitude")
# Selecting variables for the matrix
if (!is.null(variables_in_matrix)) {
variables <- variables[, c("Longitude", "Latitude", variables_in_matrix)]
if (!is.null(preselected_sites)) {
prna <- colnames(preselected_sites)
preselected_sites <- preselected_sites[, c(prna[1:3], variables_in_matrix)]
}
}
# If do_pca is TRUE, do PCA
if (do_pca == TRUE) {
if (verbose == TRUE) {
message("Performing PCA analysis")
}
pca <- stats::prcomp(variables[, -(1:2)], center = center, scale. = scale)
# Create matrix
master_m <- data.frame(variables, pca$x[, 1:2])
pca$x <- NULL
# Predict in predefined sites
if (!is.null(preselected_sites)) {
sspca <- stats::predict(pca, preselected_sites[, -(1:3)])
preselected_sites <- data.frame(preselected_sites, sspca[, 1:2])
}
# Return results
return(new_master_matrix(data_matrix = master_m, region = region,
mask = mask, raster_base = b_raster,
preselected_sites = preselected_sites,
PCA_results = pca))
} else {
# Create matrix
master_m <- data.frame(variables)
# Return results
return(new_master_matrix(data_matrix = master_m, region = region,
mask = mask, raster_base = b_raster,
preselected_sites = preselected_sites,
PCA_results = NULL))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.