R/perform_pca.R

Defines functions perform_pca

Documented in perform_pca

#' Principal Component Analysis for raster layers
#'
#' @description
#' This function performs principal component analysis (PCA) with a set of
#' raster variables.
#'
#' @usage
#' perform_pca(raster_variables, exclude_from_pca = NULL, project = FALSE,
#'             projection_data = NULL, out_dir = NULL, overwrite = FALSE,
#'             progress_bar = FALSE, center = TRUE, scale = FALSE,
#'             variance_explained = 95, min_explained = 5)
#'
#' @param raster_variables (SpatRaster) set of predictor variables that the
#'        function will summarize into a set of orthogonal, uncorrelated
#'        components based on PCA.
#' @param exclude_from_pca (character) variable names within raster_variables that
#'        should not be included in the PCA transformation. Instead, these
#'        variables will be added directly to the final set of output variables
#'        without being modified. The default is NULL, meaning all variables
#'        will be used unless specified otherwise.
#' @param project (logical) whether the function should project new data from
#'        different scenarios (e.g. future variables) onto the PCA coordinates
#'        generated by the initial analysis. If TRUE, the argument
#'        projection_data needs to be defined. Default is FALSE.
#' @param projection_data an object of class `prepared_projection` returned by the
#'        [prepare_projection()] function. This file contains the paths
#'        to the raster files representing each scenario. Only applicable if
#'        `project = TRUE`. Default is NULL.
#' @param out_dir (character) a path to a root directory for saving the raster
#'        files of each projection. Default = NULL.
#' @param overwrite (logical) whether to overwrite SpatRaster if they already
#'        exists when projecting. Only applicable if `write_files` is set to
#'        TRUE. Default is FALSE.
#' @param progress_bar (logical) whether to display a progress bar during
#'        processing projections. Only applicable if `project = TRUE`. Default
#'        is FALSE
#' @param center (logical) whether the variables should be zero-centered.
#'        Default is TRUE.
#' @param scale (logical) whether the variables should be scaled to have unit
#'        variance before the analysis takes place. Default is FALSE.
#' @param variance_explained (numeric) the cumulative percentage of total
#'        variance that must be explained by the selected principal components.
#'        Default is 95.
#' @param min_explained (numeric) the minimum percentage of total variance that
#'        a principal component must explain to be retained. Default is 5.
#'
#' @return
#' A list containing the following elements:
#' - env: A SpatRaster object that contains the orthogonal components derived
#' from the PCA. PCs correspond to the variables used to perform the analysis.
#' - pca: an object of class prcomp, containing the details of the PCA analysis.
#' See \code{\link[stats]{prcomp}}().
#' - variance_explained_cum_sum: The cumulative percentage of total variance
#' explained by each of the selected principal components. This value indicates
#' how much of the data's original variability is captured by the PCA
#' transformation.
#' - projection_directory: the root directory where projection files were saved.
#' Not NULL only if `project` was set to TRUE. This directory contains the
#' projected raster files for each scenario.
#'
#' @importFrom terra prcomp predict writeRaster rast
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @export
#'
#' @examples
#' # PCA with current variables
#' # Import raster layers
#' var <- terra::rast(system.file("extdata", "Current_variables.tif",
#'                                package = "kuenm2"))
#'
#' # PCA
#' pca_var <- perform_pca(raster_variables = var, exclude_from_pca = "SoilType",
#'                        center = TRUE, scale = TRUE)
#'
#' pca_var
#'
#' # Project PCA for new scenarios (future)
#' # First, organize and prepare future variables
#' # Set the input directory containing the raw future climate variables
#' # For this example, the data is located in the "inst/extdata" folder.
#' in_dir <- system.file("extdata", package = "kuenm2")
#'
#' # Create a "Future_raw" folder in a temporary directory and copy the variables.
#' out_dir_future <- file.path(tempdir(), "Future_raw1")
#'
#' # Organize and rename the future climate data, structuring it by year and GCM.
#' # The 'SoilType' variable will be appended as a static variable in each scenario.
#' # The files will be renamed following the "bio_" format
#' organize_future_worldclim(input_dir = in_dir, output_dir = out_dir_future,
#'                           name_format = "bio_", static_variables = var$SoilType)
#'
#' # Prepare projections
#' pr <- prepare_projection(variable_names = c("bio_1", "bio_7", "bio_12",
#'                                             "bio_15", "SoilType"),
#'                          future_dir = out_dir_future,
#'                          future_period = c("2041-2060", "2081-2100"),
#'                          future_pscen = c("ssp126", "ssp585"),
#'                          future_gcm = c("ACCESS-CM2", "MIROC6"),
#'                          raster_pattern = ".tif*")
#'
#' # Create folder to save projection results
#' out_dir <- file.path(tempdir(), "PCA_projections")
#' dir.create(out_dir, recursive = TRUE)
#'
#' # Perform and project PCA for new scenarios (future)
#' proj_pca <- perform_pca(raster_variables = var, exclude_from_pca = "SoilType",
#'                         project = TRUE, projection_data = pr,
#'                         out_dir = out_dir, center = TRUE, scale = TRUE)
#'
#' proj_pca$projection_directory  # Directory with projected PCA-variables

perform_pca <- function(raster_variables, exclude_from_pca = NULL,
                        project = FALSE, projection_data = NULL, out_dir = NULL,
                        overwrite = FALSE, progress_bar = FALSE,
                        center = TRUE, scale = FALSE,
                        variance_explained = 95, min_explained = 5) {

  #Check datas
  if (missing(raster_variables)) {
    stop("Argument 'raster_variables' must be defined.")
  }
  if (!inherits(raster_variables, "SpatRaster")) {
    stop("Argument 'raster_variables' must be a 'SpatRaster'.")
  }
  if (!is.null(exclude_from_pca) & !inherits(exclude_from_pca, "character")) {
    stop("Argument 'exclude_from_pca' must be NULL or a 'character'.")
  }

  if (project & is.null(projection_data)) {
    stop("If 'project' is set to TRUE, 'projection_data' must be specified.")
  }
  if (project & !is.null(projection_data) & !inherits(projection_data, "projection_data")) {
    stop("Argument 'projection_data' must be a 'projection_data' object.")
  }
  if (project & is.null(out_dir)) {
    stop("If 'project' is set to TRUE, out_dir must be 'specified'.")
  }
  if (project & !is.null(out_dir) & !inherits(out_dir, "character")) {
    stop("Argument 'out_dir' must be a 'character'.")
  }

  if (!is.numeric(variance_explained)) {
    stop("Argument 'variance_explained' must be 'numeric'.")
  }
  if (!is.numeric(min_explained)) {
    stop("Argument 'min_explained' must be 'numeric'.")
  }

  #Check if variables are present in projection data
  if (project) {
    var_out <- setdiff(names(raster_variables), projection_data$variables)

    if (length(var_out) > 0) {
      stop("One or more variables from 'raster_variables' are missing in 'projection_data'.",
           "Variables in 'raster_variables' and 'projection_data' must match.")
    }
  }

  #End of check data

  if (!is.null(exclude_from_pca)) {
    var_to_pca <-  setdiff(names(raster_variables), exclude_from_pca)
  } else {
    var_to_pca <- names(raster_variables)
  }

  pca <- terra::prcomp(raster_variables[[var_to_pca]], center = center,
                       scale = scale)
  pca$x <- NULL
  pca$vars_in <- var_to_pca
  pca$vars_out <- exclude_from_pca

  d_exp <- cumsum(pca$sdev^2/sum(pca$sdev^2)) * 100
  d_exp <- d_exp[(pca$sdev^2/sum(pca$sdev^2) * 100) > min_explained]

  ind_exp <- if (max(d_exp) > variance_explained) {
    min(which(d_exp >= variance_explained))
  } else {
    length(d_exp)
  }

  env <- terra::predict(raster_variables[[var_to_pca]], pca, index = 1:ind_exp)

  if (!is.null(exclude_from_pca)) {
    env <- c(env, raster_variables[[exclude_from_pca]])
  }
  names(d_exp) <- paste0("PC", 1:ind_exp)

  if (project) {
    #Save current variables
    dir.create(file.path(out_dir, "/Current"), recursive = TRUE,
               showWarnings = FALSE)
    terra::writeRaster(env, file.path(out_dir, "/Current/Variables.tif"),
                       overwrite = overwrite)

    #Check scenarios to predict
    res_path <- check_pred_scenarios(projection_data = projection_data,
                                     out_dir = out_dir)
    #Show progress bar?
    if (progress_bar) {
      pb <- utils::txtProgressBar(0, nrow(res_path), style = 3)
      progress <- function(n) utils::setTxtProgressBar(pb, n)
    }

    for (x in 1:nrow(res_path)) {
      # Import scenario x
      sc_x <- terra::rast(list.files(res_path$input_path[x], full.names = TRUE,
                                     pattern = projection_data$raster_pattern))

      #Predict
      p_x <- terra::predict(sc_x[[pca$vars_in]], pca)
      if (!is.null(exclude_from_pca)) {
        p_x <- c(p_x, sc_x[[exclude_from_pca]])
      }

      #Write results
      dir.create(res_path$output_path[x], recursive = TRUE,
                 showWarnings = FALSE)  # do we want to prevent warnings?
      terra::writeRaster(p_x,
                         file.path(res_path$output_path[x], "Variables.tif"),
                         overwrite = overwrite)

      # Sets the progress bar to the current state
      if (progress_bar) {
        utils::setTxtProgressBar(pb, x)
      }
    }

  } #End of project
  return(list(env = env, pca = pca, variance_explained_cumsum = d_exp,
              projection_directory = out_dir))
}

Try the kuenm2 package in your browser

Any scripts or data that you put into this service are public.

kuenm2 documentation built on April 21, 2026, 1:07 a.m.