#' spca: Principal Component Analysis for Spatial Data
#'
#' @description Performs PCA for a stack of raster layers.
#' @param layers_stack A RasterStack of environmental variables
#' @param layers_to_proj A RasterStack of the environmental variables that will be projected (default NULL). If provided
#' the function will project the PCA for those layers by using the PCA object computed for the `layers_stack`. It can also use the PCA object
#' stored in `pca_obj`.
#' @param pca_obj An object of class \code{\link[stats]{prcomp}} (default NULL). Usefull when the user already
#' has done the PCa for `layers_stack`
#' and just wants to project the PCA on the `layers_to_proj` object.
#' @param sv_dir A directory where the PCs will be saved. If NULL the PCs will not be written.
#' @param sv_proj_dir A directory where the PCs projection will be saved. If NULL the PCs will be written inside sv_dir.
#' @param layers_format A raster format for writing PCA results (see \code{\link[raster]{writeFormats}}). Default = ".asc"
#' @return A list containing either the raster stack with Principal Components of `layers_stack` or `layers_to_proj`,
#' a barplot of the cumulative and explained variance of each compoent of `layers_stack` and a \code{\link[stats]{prcomp}} object(`pca_obj`).
#' @details spca uses the function \code{\link[stats]{prcomp}} of the `stats` package. If `sv_dir` is provided the PCs
#' and `pca_obj` will be stored on it. The names of the layers in `layers_to_proj` need to be named
#' with exactly the same names of those either in`layers_stack` or the `pca_obj`.
#' @importFrom stats prcomp
#' @import ggplot2
#' @import dplyr
#' @export
#'
#' @examples
#' # -------------------------------------------------------------------
#' # A PCA on layers_stack without saving.
#' layers_stack <- raster::stack(list.files(system.file("extdata",
#' package = "ntbox"),
#' pattern = "M_layers.tif$",
#' full.names = TRUE))
#' pcs <- spca(layers_stack, sv_dir=NULL,layers_format=NULL)
#' raster::plot(pcs$pc_layers)
#'
#' \dontrun{
#' # -------------------------------------------------------------------
#' # PCA projection without saving.
#'
#' layers_to_proj <- raster::stack(list.files(system.file("extdata",
#' package = "ntbox"),
#' pattern = "G_layers.tif$",
#' full.names = TRUE))
#' pcs_with_proj <- spca(layers_stack =layers_stack,
#' layers_to_proj = layers_to_proj,
#' pca_obj = NULL,
#' sv_dir=NULL,layers_format=NULL)
#' # Barplot of the Cumulative and explained variance in each component,
#' print(pcs_with_proj$pca_plot)
#' }
#' # -------------------------------------------------------------------
#' # PCA projection and saving.
#' \dontrun{
#' pcs_with_proj_sv <- spca(layers_stack =layers_stack,
#' layers_to_proj = layers_to_proj,
#' pca_obj = NULL,
#' sv_dir=".",layers_format=".asc")
#'
#' }
#' # -------------------------------------------------------------------
#' # PCA projection, saving and using the pca_obj.
#' \dontrun{
#'
#' pca_obj <- base::readRDS(list.files(system.file("extdata",
#' package = "ntbox"),
#' pattern = ".rds$",
#' full.names = TRUE))
#'
#' pcs_with_proj_sv <- spca(layers_stack =NULL,
#' layers_to_proj = layers_to_proj,
#' pca_obj = pca_obj,
#' sv_dir=".",layers_format=".tif")
#'
#' raster::plot(pcs_with_proj_sv$pcs_layers_projection)
#' }
spca <- function (layers_stack, layers_to_proj = NULL, pca_obj = NULL,
sv_dir = NULL, layers_format = ".asc", sv_proj_dir = NULL)
{
results <- list()
if (!is.null(layers_stack)) {
layers_stack <- raster::stack(layers_stack)
}
if (methods::is(layers_stack, "RasterStack") && !methods::is(pca_obj,"prcomp")) {
id_vals <- which(!is.na(layers_stack[[1]][]))
layers_vals <- names(layers_stack) %>% purrr::map_dfc(function(x) {
df1 <- data.frame(val = layers_stack[[x]][])
df1 <- df1[id_vals,]
return(df1)
})
names(layers_vals) <- names(layers_stack)
layers_noNA <- as.matrix(layers_vals)
#id_vals <- which(stats::complete.cases(layers_vals))
#layers_noNA <- layers_vals[id_vals, ]
pca_obj <- stats::prcomp(x = layers_noNA, center = TRUE,
scale. = TRUE)
pca_summary <- base::summary(pca_obj)
pca_plot <- .plot_pca(pca_summary = pca_summary)
pca_layer <- layers_stack[[1]]
nombres_pcs <- colnames(pca_obj$x)
if (ncol(layers_noNA) > 9)
nombres_pcs[1:9] <- paste0("PC0", 1:9)
rm(layers_vals)
gc()
pca_estimate <- pca_obj$x
pca_values <- rep(NA, raster::ncell(layers_stack[[1]]))
colnames(pca_estimate) <- nombres_pcs
layers_pca <- raster::stack(seq_along(nombres_pcs) %>%
purrr::map(function(x) {
pca_values[id_vals] <- pca_estimate[, x]
pca_layer[] <- pca_values
return(pca_layer)
}))
names(layers_pca) <- nombres_pcs
results <- list(pc_layers = layers_pca, pc_results = pca_obj,
pca_plot = pca_plot)
if(is.character(sv_dir)) {
sv_dir <- base::normalizePath(sv_dir)
if(!dir.exists(sv_dir)) dir.create(sv_dir)
}
if (!is.null(sv_dir) && dir.exists(sv_dir) && methods::is(results$pc_layers,"RasterStack")) {
n_layers <- seq_along(names(layers_pca))
pca_plot_path <- file.path(sv_dir, "pca_variance_exp.pdf")
ggplot2::ggsave(pca_plot_path, plot = pca_plot, width = 8,
height = 8)
layers_path <- file.path(sv_dir, paste0(names(layers_pca),
layers_format))
n_layers %>% purrr::map(~raster::writeRaster(layers_pca[[.x]],
layers_path[.x], overwrite = TRUE))
pca_obj_file <- file.path(sv_dir, paste0("pca_object",
format(Sys.time(), "%y_%m_%d_%H_%M"), ".rds"))
saveRDS(pca_obj, pca_obj_file)
}
}
if(inherits(layers_to_proj,what = "Raster"))
layers_to_proj <- raster::stack(layers_to_proj)
if (methods::is(layers_to_proj,"RasterStack") && methods::is(pca_obj, "prcomp")) {
nombres_pcs <- colnames(pca_obj$x)
if (raster::nlayers(layers_to_proj) > 9)
nombres_pcs[1:9] <- paste0("PC0", 1:9)
if (!all(names(layers_to_proj) == nombres_pcs))
cat(paste("Assuming that the layers that have the same position in the",
"stack represent the same kind of variables\n\n"))
layers_to_proj <- layers_to_proj[[1:length(names(pca_obj$center))]]
names(layers_to_proj) <- names(pca_obj$center)
id_vals <- which(!is.na(layers_to_proj[[1]][]))
proj_data <- names(layers_to_proj) %>% purrr::map_dfc(function(x) {
df1 <- data.frame(val = layers_to_proj[[x]][])
df1 <- df1[id_vals,]
return(df1)
})
names(proj_data) <- names(layers_to_proj)
proj_noNA <- as.matrix(proj_data)
rm(proj_data)
gc()
pc_projDF <- stats::predict(pca_obj, newdata = proj_noNA)
if (length(colnames(pc_projDF)) > 9)
colnames(pc_projDF)[1:9] <- paste0("PC0", 1:9)
pca_values <- rep(NA, raster::ncell(layers_to_proj[[1]]))
pca_layer <- layers_to_proj[[1]]
layers_to_proj <- raster::stack(seq_along(nombres_pcs) %>%
purrr::map(function(x) {
pca_values[id_vals] <- pc_projDF[, x]
pca_layer[] <- pca_values
return(pca_layer)
}))
names(layers_to_proj) <- colnames(pc_projDF)
results$pcs_layers_projection <- layers_to_proj
if ((!is.null(sv_dir) && dir.exists(sv_dir)) && is.null(sv_proj_dir))
sv_proj_dir <- sv_dir
if (is.character(sv_proj_dir)){
sv_proj_dir <- base::normalizePath(sv_proj_dir)
if(!dir.exists(sv_proj_dir)) dir.create(sv_proj_dir)
}
if (!is.null(results$pcs_layers_projection) && !is.null(sv_proj_dir) && dir.exists(sv_proj_dir)) {
if (sv_proj_dir == sv_dir) {
sv_proj_dir <- file.path(sv_dir, "pca_projection")
dir.create(sv_proj_dir)
}
layers_path_proj <- file.path(sv_proj_dir, paste0(names(layers_to_proj),
layers_format))
if (exists("layers_path_proj")) {
1:length(layers_path_proj) %>% purrr::map(~raster::writeRaster(layers_to_proj[[.x]],
layers_path_proj[.x], overwrite = TRUE))
}
}
return(results)
}
if (length(results) > 0L)
return(results)
else stop("layers: must be of class RasterStack")
}
# This code is entirely adapted from
# https://www.r-graph-gallery.com/297-circular-barplot-with-groups/
.plot_pca <- function(pca_summary){
data_pca <- data.frame(
pca=rep(colnames(pca_summary$importance),2) ,
group= rep(c("Cumulative Proportion","Proportion of Variance"),
each=length(colnames(pca_summary$importance))),
value=c(pca_summary$importance[3,],pca_summary$importance[2,])*100
)
group <- id <- stat <- end <- NULL
empty_bar <- 3
to_add <- data.frame( matrix(NA, empty_bar*nlevels(data_pca$group), ncol(data_pca)) )
colnames(to_add) <- colnames(data_pca)
to_add$group <- rep(levels(data_pca$group), each=empty_bar)
data_pca <- rbind(data_pca, to_add)
data_pca <- data_pca %>% dplyr::arrange(group)
data_pca$id <- seq(1, nrow(data_pca))
label_data <- data_pca
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar #
label_data$hjust<-ifelse( angle < -90, 1, 0)
label_data$angle<-ifelse(angle < -90, angle+180, angle)
start <- NULL
base_data <- data_pca %>%
dplyr::group_by(group) %>%
dplyr::summarize(start=min(id), end=max(id) - empty_bar) %>%
dplyr::rowwise() %>%
dplyr::mutate(title=mean(c(start,end)))
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data),
1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
p <- ggplot2::ggplot(data_pca, ggplot2::aes_(x=~as.factor(id),
y=~value, fill=~group)) +
ggplot2::geom_bar(ggplot2::aes_(x=~as.factor(id), y=~value, fill=~group),
stat="identity", alpha=0.5) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
ggplot2::geom_segment(data=grid_data,
ggplot2::aes_(x = ~end, y = 80, xend = ~start, yend = 80),
colour = "grey", alpha=1, size=0.3 ,
inherit.aes = FALSE ) +
ggplot2::geom_segment(data=grid_data,
ggplot2::aes_(x = ~end, y = 60, xend = ~start, yend = 60),
colour = "grey", alpha=1, size=0.3 ,
inherit.aes = FALSE ) +
ggplot2::geom_segment(data=grid_data,
ggplot2::aes_(x = ~end, y = 40, xend = ~start, yend = 40),
colour = "grey", alpha=1, size=0.3 ,
inherit.aes = FALSE ) +
ggplot2::geom_segment(data=grid_data,
ggplot2::aes_(x = ~end, y = 20, xend = ~start, yend = 20),
colour = "grey", alpha=1, size=0.3 ,
inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
#annotate("text", x = rep(max(data_pca$id),4),
# y = c(20, 40, 60, 80),
# label = c("20", "40", "60", "80") ,
# color="#A8A8A8", size=3.5 , angle=0,
# fontface="bold", hjust=1) +
ggplot2::geom_bar(ggplot2::aes_(x=~as.factor(id), y=~value,
fill=~group), stat="identity",
alpha=0.5) +
ggplot2::ylim(-100,120) +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.position = "none",
axis.text = ggplot2::element_blank(),
axis.title = ggplot2::element_blank(),
panel.grid = ggplot2::element_blank(),
plot.margin = grid::unit(rep(-1,4), "cm")
) +
ggplot2::coord_polar() +
ggplot2::geom_text(data=label_data,
ggplot2::aes_(x=~id, y=~value+10,
label=~paste(pca,"\n",value),
hjust=~hjust), color="black",
fontface="bold",alpha=0.6,
size=2.85, angle= label_data$angle,
inherit.aes = FALSE ) +
# Add base line information
ggplot2::geom_segment(data=base_data,
ggplot2::aes_(x = ~start, y = -5, xend = ~end, yend = -5),
colour = "black", alpha=0.8, size=0.6 ,
inherit.aes = FALSE ) +
ggplot2::geom_text(data=base_data,
ggplot2::aes_(x = ~title, y = -18, label=~group),
hjust=c(1,0), colour = "black",
alpha=0.8, size=4, fontface="bold",
inherit.aes = FALSE)
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.