Nothing
## PCAs (Two-steps process) to prepare data for clustering
#'
#' @author Xavier Rotllan-Puig
#' @title PCAs4clust
#' @description PCAs4clust runs a two-steps process to prepare the data to be clustered
#' @details Firstly, a Principal Component Analysis ('screening PCA') with all the variables in
#' 'obj2process' is run in order to know the optimal number of variables to be used in a subsequent
#' PCA, as well as the most associated variable to those Principal Components (PCs). A threshold
#' of cumulative variance (cumul_var_threshold; default = 0.9) is needed. Secondly, a 'final PCA'
#' is run with the results of the 'screening PCA' (i.e. number of PC axes and their most associated
#' variables). PCAs4clust uses \code{\link[stats]{prcomp}} to run PCAs
#' @importFrom stats na.omit varimax prcomp
#' @rawNamespace import(terra, except = na.omit)
#' @importFrom data.table rbindlist setorderv :=
#' @param obj2process SpatRaster object (or its file name). Each layer is one variable
#' @param cumul_var_threshold Numeric. Optional (default = 0.9). Threshold of cumulative variance
#' to select the number of useful PCs
#' @param filename Character. Output filename. Optional
#' @param ... Optional. Arguments for \code{\link[stats]{prcomp}}
#' @return SpatRaster object
#' @name PCAs4clust
#' @seealso \code{\link{rm_multicol}}; \code{\link[stats]{prcomp}}
#' @export
#' @examples
#' \donttest{
#' dirctry <- paste0(system.file(package='LPDynR'), "/extdata")
#' variables_noCor <- rm_multicol(dir2process = dirctry,
#' multicol_cutoff = 0.7)
#' PCAs4clust(obj2process = variables_noCor,
#' cumul_var_threshold = 0.9)
#' }
#'
PCAs4clust <- function(obj2process = NULL,
cumul_var_threshold = 0.9,
filename = "",
...){
## Reading in data (not correlated Phenolo variables)
if(is.null(obj2process)) stop("Please provide an objects of classe SpatRaster (or a file names to read in from)")
if(is.character(obj2process)){
obj2process <- rast(obj2process)
}else if(!class(obj2process) %in% c("SpatRaster")){
stop("Please provide objects of classe SpatRaster (or a file name to read in from)")
}
if(!is.numeric(cumul_var_threshold) |
is.na(cumul_var_threshold) | is.null(cumul_var_threshold) |
cumul_var_threshold < 0 | cumul_var_threshold > 1)
stop("Please provide a threshold of cumulative variance (cumul_var_threshold) between 0 and 1")
## Performing the 'screening PCA'
obj2process_df <- as.data.frame(obj2process)
dts <- list(...)
if(is.null(dts$retx)) dts$retx <- TRUE
if(is.null(dts$center)) dts$center <- TRUE
if(is.null(dts$scale.)) dts$scale. <- TRUE
pca <- prcomp(stats::na.omit(obj2process_df),
retx = dts$retx,
center = dts$center, # variables are zero centered
scale. = dts$scale. # scaling variables to have unit variance
)
## Rotating
pca.rotated <- varimax(pca$rotation, normalize = TRUE)
pca_importance <- summary(pca)
pca_importance <- as.data.frame(pca_importance$importance)
usefulPCs <- which(pca_importance[3 ,] >= cumul_var_threshold)[1]
pca_importance <- pca_importance[, 1:usefulPCs]
nPCs <- ncol(pca_importance)
#if(nPCs > ncol(pca_importance)) nPCs <- ncol(pca_importance)
screeningPCA_variables_df <- pca.rotated$loadings
screeningPCA_variables_df <- as.data.frame(unclass(screeningPCA_variables_df))
screeningPCA_variables_df <- round(screeningPCA_variables_df, 0)[, c(1:nPCs)]
screeningPCA_variables <- names(which(apply(screeningPCA_variables_df, 1, sum) != 0))
#if(length(screeningPCA_variables) != nPCs) stop("inconsistency among number of variables selected for the next step ('final PCA') and number of PCs")
## Performing the 'Final PCA'
obj2process_df <- obj2process_df[, names(obj2process_df) %in% screeningPCA_variables]
pca_final <- prcomp(stats::na.omit(obj2process_df),
retx = dts$retx,
center = dts$center, # variables are zero centered
scale. = dts$scale. # scaling variables to have unit variance
)
## Spatial Patterns of PCs
#if(exists("pca")) rm(pca)
#if(exists("obj2process_df")) rm(obj2process_df)
#gc()
pca_final_rottd_varbles <- as.data.frame(pca_final$x)
#if(exists("pca_final")) rm(pca_final)
#gc()
pca_final_rottd_varbles$rn <- as.integer(rownames(pca_final_rottd_varbles))
num_pix <- ncol(obj2process) * nrow(obj2process)
num_pix1 <- c(1:num_pix)
num_pix1 <- num_pix1[!num_pix1 %in% as.integer(rownames(pca_final_rottd_varbles))]
df2fill <- as.data.frame(matrix(NA, nrow = length(num_pix1), ncol = ncol(pca_final_rottd_varbles)))
names(df2fill) <- c(paste0("PC", seq(1:(ncol(pca_final_rottd_varbles) - 1))), "rn")
df2fill$rn <- num_pix1
rownames(df2fill) <- num_pix1
#rm(num_pix1)
pca_final_raster1 <- rbindlist(list(pca_final_rottd_varbles, df2fill))
#rm(df2fill)
setorderv(pca_final_raster1, "rn")
pca_final_raster1 <- pca_final_raster1[, rn := NULL]
xtnt <- ext(obj2process)
pca_final_brick <- rast()
for(i in 1:ncol(pca_final_raster1)){
rastr_tmp <- rast(nrows = nrow(obj2process), ncols = ncol(obj2process),
xmin = xtnt[1], xmax = xtnt[2], ymin = xtnt[3], ymax = xtnt[4],
crs = crs(obj2process),
#ext,
#resolution,
vals = pca_final_raster1[[i]])
#pca_final_brick[[i]] <- rastr_tmp
pca_final_brick <- suppressWarnings(c(pca_final_brick, rastr_tmp))
names(pca_final_brick[[i]]) <- names(pca_final_raster1)[i]
}
#pca_final_brick <- brick(pca_final_brick)
## Saving results
if (filename != "") writeRaster(pca_final_brick, filename = filename, overwrite = TRUE)
return(pca_final_brick)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.