R/compare_gcms.R
In chooseGCM: Selecting General Circulation Models for Species Distribution Modeling

Documented in compare_gcms

#' Compare General Circulation Models (GCMs)
#'
#' This function compares future climate projections from multiple General Circulation Models (GCMs) based on their similarity in terms of variables. The function uses three clustering algorithms — k-means, hierarchical clustering, and closestdist — to group GCMs, and generates visualizations for the resulting clusters.
#'
#' @param s A list of stacks of General Circulation Models (GCMs).
#' @param var_names Character. A vector with the names of the variables to compare, or 'all' to include all available variables.
#' @param study_area An Extent object, or any object from which an Extent object can be extracted. Defines the study area for cropping and masking the rasters.
#' @param scale Logical. Whether to apply centering and scaling to the data. Default is \code{TRUE}.
#' @param k Numeric. The number of clusters to use for k-means clustering.
#' @param clustering_method Character. The clustering method to use. One of: "kmeans", "hclust", or "closestdist". Default is "closestdist".
#'
#' @return A list with two items: \code{suggested_gcms} (the names of the GCMs suggested for further analysis) and \code{statistics_gcms} (a grid of plots visualizing the clustering results).
#'
#' @author Luíz Fernando Esser (luizesser@gmail.com)
#' https://luizfesser.wordpress.com
#'
#' @examples
#' var_names <- c("bio_1", "bio_12")
#' s <- import_gcms(system.file("extdata", package = "chooseGCM"), var_names = var_names)
#' study_area <- terra::ext(c(-80, -30, -50, 10)) |> terra::vect(crs="epsg:4326")
#' compare_gcms(s, var_names, study_area, k = 3, clustering_method = "closestdist")
#'
#' @import checkmate
#' @import ggplot2
#' @importFrom factoextra fviz_cluster fviz_nbclust fviz_dend
#' @importFrom cowplot plot_grid
#' @importFrom stats dist
#'
#' @export
compare_gcms <- function(s, var_names = c("bio_1", "bio_12"), study_area = NULL, scale = TRUE, k = 3, clustering_method = "closestdist") {
  checkmate::assertList(s, types = "SpatRaster")
  checkmate::assertCharacter(var_names, unique = TRUE, any.missing = FALSE)
  checkmate::assertCount(k, positive = TRUE)
  checkmate::assertChoice(clustering_method, c("kmeans", "hclust", "closestdist"))

  if ("all" %in% var_names) {
    var_names <- names(s[[1]])
  }

  # Transform stacks
  x <- transform_gcms(s, var_names, study_area = study_area)
  if (scale) {
    x <- lapply(x, function(y) {y <- as.data.frame(scale(y))})
  }
  flatten_vars <- flatten_gcms(x)

  # Calculate the distance matrix
  dist_matrix <- stats::dist(t(flatten_vars))
  # hm <- fviz_dist(
  #  dist_matrix,
  #  order = TRUE,
  #  show_labels = TRUE,
  #  lab_size = NULL,
  #  gradient = list(low = "#FDE725FF", mid = "#21908CFF", high = "#440154FF")) +
  #  ggtitle("Distance Matrix Heatmap")

  # Calculate the Monte Carlo permutations
  mc <- montecarlo_gcms(s, var_names, study_area, perm = 10000, dist_method = "euclidean", clustering_method = "closestdist")

  # Run K-means
  cl <- stats::kmeans(dist_matrix, k, nstart = 10000, iter.max = 1000)
  gcms <- apply(cl$centers, 1, function(x) {
    which.min(x) |> names()
  })

  # plot
  kmeans_plot <- factoextra::fviz_cluster(cl,
                           data = dist_matrix,
                           palette = "jco",
                           ggtheme = ggplot2::theme_minimal(),
                           check_overlap = TRUE,
                           main = "K-means Clustering Plot",
                           legend = "none",
                           repel = TRUE,
                           label.select = NA) +
    ggplot2::geom_text(ggplot2::aes(label = ifelse(colnames(cl$centers) %in% gcms, colnames(cl$centers), "")),
                       color = "red",
                       vjust = -1,
                       size = 4) +
    ggplot2::geom_text(ggplot2::aes(label = ifelse(!colnames(cl$centers) %in% gcms, colnames(cl$centers), "")),
                       color = "black",
                       vjust = -1,
                       size = 4) +
    ggplot2::theme(plot.margin = ggplot2::unit(c(0.2, 0, 0, 1), "cm"))

  # Include elbow, silhouette and gap methods
  flatten_subset <- stats::na.omit(flatten_vars)
  #
  # if(nrow(flatten_subset)>1000){
  #  n <- 1000
  # } else {
  #  n <- nrow(flatten_subset)
  # }
  #
  # flatten_subset <- flatten_subset[sample(nrow(flatten_subset), n),]
  # sil <- fviz_nbclust(flatten_subset, FUN = kmeans, method = "silhouette")

  # Plot Environment closest dist
  env <- env_gcms(s, var_names, study_area, highlight = mc$suggested_gcms[[k-1]], title = "Closest subset to the Global Mean")

  # Compute hierarchical clustering and cut into k clusters
  res <- factoextra::hcut(t(flatten_subset), k = k)
  mean_all <- sapply(x, function(y) {
    y <- colMeans(y, na.rm = TRUE)
  })
  mean_all <- rowMeans(mean_all)
  res2 <- vector()
  for (i in 1:k) {
    mean_cluster <- sapply(x[res$cluster==i], function(y) {
      y <- colMeans(y, na.rm = TRUE)
    })
    vals <- as.matrix(stats::dist(t(cbind(mean_cluster, mean_all))))[,"mean_all"]
    res2[i] <- names(which.min(vals[vals > 0]))
  }
  dend <- factoextra::fviz_dend(res,
                                horiz = TRUE,
                                cex = 0.8,
                                palette = "jco",
                                main = "Hierarchical Clustering",
                                label_cols = ifelse(res$labels[res$order] %in% res2, "red", "black")
  ) + ggplot2::theme(
    plot.margin = ggplot2::unit(c(0.2, 0, 0, 1), "cm")  # Adjust the left margin (last number) to add more space
  )

  statistics_gcms <- cowplot::plot_grid(kmeans_plot,
                                        dend,
                                        mc$montecarlo_plot,
                                        env,
                                        labels = c("A", "B", "C", "D"),
                                        ncol = 2,
                                        rel_widths = 4
  )

  return(list(
    suggested_gcms = mc$suggested_gcms,
    statistics_gcms = statistics_gcms
  ))
}

Any scripts or data that you put into this service are public.

chooseGCM documentation built on April 3, 2025, 5:27 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

chooseGCM
Selecting General Circulation Models for Species Distribution Modeling

R/compare_gcms.R
In chooseGCM: Selecting General Circulation Models for Species Distribution Modeling

Defines functions compare_gcms

Documented in compare_gcms

Try the chooseGCM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chooseGCM Selecting General Circulation Models for Species Distribution Modeling

R/compare_gcms.R In chooseGCM: Selecting General Circulation Models for Species Distribution Modeling

Defines functions compare_gcms

Documented in compare_gcms

Try the chooseGCM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chooseGCM
Selecting General Circulation Models for Species Distribution Modeling

R/compare_gcms.R
In chooseGCM: Selecting General Circulation Models for Species Distribution Modeling