# ==============================================================================
# biodivMapR
# Lib_MapSpectralSpecies.R
# ==============================================================================
# PROGRAMMERS:
# Jean-Baptiste FERET <jb.feret@irstea.fr>
# Copyright 2019/06 Jean-Baptiste FERET
# ==============================================================================
# This Library applies clustering on a selection of components stored in a PCA
# file previously created ("Perform_PCA.R") and produces corresponding spectral
# species
# ==============================================================================
#' maps spectral species based on PCA file computed previously
#'
#' @param Input_Image_File character. Path of the image to be processed
#' @param Output_Dir character. Path for output directory
#' @param TypePCA character. Type of PCA: choose either "PCA" or "SPCA"
#' @param PCA_Files character. Path of the PCA image
#' @param PCA_model list. Parameters for teh PCA model to be applied on original image
#' @param SpectralFilter list. information about spectral band location
#' (central wavelength), bands to keep...
#' @param MaskPath character. Path of the mask corresponding to the image
#' @param Pix_Per_Partition numeric. number of pixels for each partition
#' @param nb_partitions numeric. number of partition
#' @param CR boolean. Set to TRUE if continuum removal should be applied
#' @param nbCPU numeric. Number of CPUs to use in parallel.
#' @param MaxRAM numeric. MaxRAM maximum size of chunk in GB to limit RAM allocation when reading image file.
#' @param nbclusters numeric. number of clusters defined in k-Means
#'
#' @return None
#' @importFrom utils read.table
#' @export
map_spectral_species <- function(Input_Image_File,Output_Dir,PCA_Files,PCA_model,SpectralFilter,MaskPath,Pix_Per_Partition,nb_partitions,CR= TRUE,TypePCA = "SPCA", nbclusters = 50, nbCPU = 1, MaxRAM = FALSE) {
# if no prior diversity map has been produced --> need PCA file
if (!file.exists(PCA_Files)) {
message("")
message("*********************************************************")
message("WARNING: PCA file expected but not found")
print(PCA_Files)
message("process aborted")
message("*********************************************************")
message("")
stop()
} else {
# define directories
Output_Dir_SS <- define_output_subdir(Output_Dir, Input_Image_File, TypePCA, "SpectralSpecies")
Output_Dir_PCA <- define_output_subdir(Output_Dir, Input_Image_File, TypePCA, "PCA")
Spectral_Species_Path <- paste(Output_Dir_SS, "SpectralSpecies", sep = "")
# WS_Save <- paste(Output_Dir_PCA, "PCA_Info.RData", sep = "")
# load(file = WS_Save)
## 1- Select components used to perform clustering
PC_Select_Path <- paste(Output_Dir_PCA, "Selected_Components.txt", sep = "")
if (file.exists(PC_Select_Path)) {
PC_Select <- read.table(PC_Select_Path)[[1]]
# sample data from image and perform PCA
ImPathHDR <- get_HDR_name(Input_Image_File)
HDR <- read_ENVI_header(ImPathHDR)
Subset <- get_random_subset_from_image(Input_Image_File, HDR, MaskPath, nb_partitions, Pix_Per_Partition)
# if needed, apply continuum removal
if (CR == TRUE) {
Subset$DataSubset <- apply_continuum_removal(Subset$DataSubset, SpectralFilter, nbCPU = nbCPU)
}
print("Apply PCA to subset prior to k-Means")
if (TypePCA == "PCA" | TypePCA == "SPCA") {
dataPCA <- t(t(PCA_model$eiV[, 1:PCA_model$Nb_PCs]) %*% t(center_reduce(Subset$DataSubset, PCA_model$mu, PCA_model$scale)))
}
dataPCA <- dataPCA[, PC_Select]
if (length(PC_Select) == 1) {
dataPCA <- matrix(dataPCA, ncol = 1)
}
message("Selected components:")
print(PC_Select)
message("Please add carriage return after last selected component if not part of the list")
message("If these do not match with your selection, please correct file following file:")
print(PC_Select_Path)
} else {
print(paste("File named ", PC_Select_Path, "needs to be created first"))
print("Image processing aborted")
stop()
}
## 2- PERFORM KMEANS FOR EACH ITERATION & DEFINE SPECTRAL SPECIES
print("perform k-means clustering for each subset and define centroids")
# scaling factor subPCA between 0 and 1
Kmeans_info <- init_kmeans(dataPCA, Pix_Per_Partition, nb_partitions, nbclusters, nbCPU)
## 3- ASSIGN SPECTRAL SPECIES TO EACH PIXEL
print("apply Kmeans to the whole image and determine spectral species")
apply_kmeans(PCA_Files, PC_Select, MaskPath, Kmeans_info, Spectral_Species_Path, nbCPU, MaxRAM)
}
return(invisible())
}
# computes k-means from nb_partitions subsets taken from dataPCA
#
# @param dataPCA initial dataset sampled from PCA image
# @param Pix_Per_Partition number of pixels per iteration
# @param nb_partitions number of k-means then averaged
# @param nbCPU
# @param nbclusters number of clusters used in kmeans
#
# @return list of centroids and parameters needed to center/reduce data
#' @importFrom future plan multiprocess sequential
#' @importFrom future.apply future_lapply
#' @importFrom stats kmeans
init_kmeans <- function(dataPCA, Pix_Per_Partition, nb_partitions, nbclusters, nbCPU = 1) {
m0 <- apply(dataPCA, 2, function(x) min(x))
M0 <- apply(dataPCA, 2, function(x) max(x))
d0 <- M0 - m0
dataPCA <- center_reduce(dataPCA, m0, d0)
# get the dimensions of the images, and the number of subimages to process
dataPCA <- split(as.data.frame(dataPCA), rep(1:nb_partitions, each = Pix_Per_Partition))
# multiprocess kmeans clustering
plan(multiprocess, workers = nbCPU) ## Parallelize using four cores
Schedule_Per_Thread <- ceiling(length(dataPCA) / nbCPU)
res <- future_lapply(dataPCA, FUN = kmeans, centers = nbclusters, iter.max = 50, nstart = 10,
algorithm = c("Hartigan-Wong"), future.scheduling = Schedule_Per_Thread)
plan(sequential)
Centroids <- list(nb_partitions)
for (i in (1:nb_partitions)) {
Centroids[[i]] <- res[[i]]$centers
}
my_list <- list("Centroids" = Centroids, "MinVal" = m0, "MaxVal" = M0, "Range" = d0)
return(my_list)
}
# Applies Kmeans clustering to PCA image
#
# @param PCA_Path path for the PCA image
# @param PC_Select PCs selected from PCA
# @param MaskPath Path for the mask
# @param Kmeans_info information about kmeans computed in previous step
# @param nbCPU
# @param MaxRAM
# @param Spectral_Species_Path path for spectral species file to be written
#
# @return None
apply_kmeans <- function(PCA_Path, PC_Select, MaskPath, Kmeans_info, Spectral_Species_Path, nbCPU = 1, MaxRAM = FALSE) {
ImPathHDR <- get_HDR_name(PCA_Path)
HDR_PCA <- read_ENVI_header(ImPathHDR)
PCA_Format <- ENVI_type2bytes(HDR_PCA)
HDR_Shade <- get_HDR_name(MaskPath)
HDR_Shade <- read_ENVI_header(HDR_Shade)
# prepare for sequential processing: SeqRead_Image informs about byte location to read
if (MaxRAM == FALSE) {
MaxRAM <- 0.25
}
nbPieces <- split_image(HDR_PCA, MaxRAM)
SeqRead_PCA <- where_to_read(HDR_PCA, nbPieces)
SeqRead_Shade <- where_to_read(HDR_Shade, nbPieces)
# create output file for spectral species assignment
HDR_SS <- HDR_PCA
nb_partitions <- length(Kmeans_info$Centroids)
HDR_SS$bands <- nb_partitions
HDR_SS$`data type` <- 1
Iter <- paste('Iter', 1:nb_partitions, collapse = ", ")
HDR_SS$`band names` <- Iter
HDR_SS$wavelength <- NULL
HDR_SS$fwhm <- NULL
HDR_SS$resolution <- NULL
HDR_SS$bandwidth <- NULL
HDR_SS$purpose <- NULL
HDR_SS$`byte order` <- get_byte_order()
headerFpath <- paste(Spectral_Species_Path, ".hdr", sep = "")
write_ENVI_header(HDR_SS, headerFpath)
# create Spectral species file
fidSS <- file(
description = Spectral_Species_Path, open = "wb", blocking = TRUE,
encoding = getOption("encoding"), raw = FALSE
)
close(fidSS)
for (i in 1:nbPieces) {
print(paste("Spectral Species Piece #", i, "/", nbPieces))
Location_RW <- list()
Location_RW$nbLines <- SeqRead_PCA$Lines_Per_Chunk[i]
Location_RW$Byte_Start_PCA <- SeqRead_PCA$ReadByte_Start[i]
Location_RW$lenBin_PCA <- SeqRead_PCA$ReadByte_End[i] - SeqRead_PCA$ReadByte_Start[i] + 1
Location_RW$Byte_Start_Shade <- SeqRead_Shade$ReadByte_Start[i]
Location_RW$lenBin_Shade <- SeqRead_Shade$ReadByte_End[i] - SeqRead_Shade$ReadByte_Start[i] + 1
Location_RW$Byte_Start_SS <- 1 + (SeqRead_Shade$ReadByte_Start[i] - 1) * nb_partitions
Location_RW$lenBin_SS <- nb_partitions * (SeqRead_Shade$ReadByte_End[i] - SeqRead_Shade$ReadByte_Start[i]) + 1
compute_spectral_species(PCA_Path, MaskPath, Spectral_Species_Path, Location_RW, PC_Select, Kmeans_info, nbCPU)
}
return(invisible())
}
# this function reads PCA file and defines the spectral species for each pixel
# based on the set of cluster centroids defined for each iteration
# applies kmeans --> closest cluster corresponds to the "spectral species"
#
# @param PCA_Path path for the PCA image
# @param MaskPath Path for the mask
# @param Spectral_Species_Path path for spectral species file to be written
# @param Location_RW where to read/write information in binary file
# @param PC_Select PCs selected from PCA
# @param nbCPU
# @param Kmeans_info information about kmeans computed in previous step
#
# @return None
#' @importFrom snow splitRows
#' @importFrom future plan multiprocess sequential
#' @importFrom future.apply future_lapply
compute_spectral_species <- function(PCA_Path, MaskPath, Spectral_Species_Path, Location_RW, PC_Select, Kmeans_info, nbCPU = 1) {
# characteristics of the centroids computed during preprocessing
nb_partitions <- length(Kmeans_info$Centroids)
nbCentroids <- nrow(Kmeans_info$Centroids[[1]])
CentroidsArray <- do.call("rbind", Kmeans_info$Centroids)
# read shade file and PCA file
ShadeHDR <- get_HDR_name(MaskPath)
HDR_Shade <- read_ENVI_header(ShadeHDR)
Shade.Format <- ENVI_type2bytes(HDR_Shade)
ImgFormat <- "Shade"
Shade_Chunk <- read_image_subset(MaskPath, HDR_Shade, Location_RW$Byte_Start_Shade, Location_RW$lenBin_Shade, Location_RW$nbLines, Shade.Format, ImgFormat)
PCA_HDR <- get_HDR_name(PCA_Path)
HDR_PCA <- read_ENVI_header(PCA_HDR)
PCA_Format <- ENVI_type2bytes(HDR_PCA)
# read "unfolded" (2D) PCA image
ImgFormat <- "2D"
PCA_Chunk <- read_image_subset(PCA_Path, HDR_PCA, Location_RW$Byte_Start_PCA, Location_RW$lenBin_PCA, Location_RW$nbLines, PCA_Format, ImgFormat)
PCA_Chunk <- PCA_Chunk[, PC_Select]
if (length(PC_Select) == 1) {
PCA_Chunk <- matrix(PCA_Chunk, ncol = 1)
}
PCA_Chunk <- center_reduce(PCA_Chunk, Kmeans_info$MinVal, Kmeans_info$Range)
# eliminate shaded pixels
keepShade <- which(Shade_Chunk == 1)
PCA_Chunk <- matrix(PCA_Chunk[keepShade, ], ncol = length(PC_Select))
# Prepare writing of spectral species distribution file
SS_HDR <- get_HDR_name(Spectral_Species_Path)
HDR_SS <- read_ENVI_header(SS_HDR)
SS_Format <- ENVI_type2bytes(HDR_SS)
# for each pixel in the subset, compute the nearest cluster for each iteration
Nearest_Cluster <- matrix(0, nrow = Location_RW$nbLines * HDR_PCA$samples, ncol = nb_partitions)
# rdist consumes RAM --> divide data into pieces if too big and multiprocess
nbSamples_Per_Rdist <- 25000
if (length(keepShade) > 0) {
nbSubsets <- ceiling(length(keepShade) / nbSamples_Per_Rdist)
PCA_Chunk <- splitRows(PCA_Chunk, nbSubsets)
plan(multiprocess, workers = nbCPU) ## Parallelize using four cores
Schedule_Per_Thread <- ceiling(nbSubsets / nbCPU)
res <- future_lapply(PCA_Chunk, FUN = RdistList, CentroidsArray = CentroidsArray, nbClusters = nrow(Kmeans_info$Centroids[[1]]), nb_partitions = nb_partitions, future.scheduling = Schedule_Per_Thread)
plan(sequential)
res <- do.call("rbind", res)
Nearest_Cluster[keepShade, ] <- res
rm(res)
gc()
}
Nearest_Cluster <- array(Nearest_Cluster, c(Location_RW$nbLines, HDR_PCA$samples, nb_partitions))
# Write spectral species distribution in the output file
fidSS <- file(
description = Spectral_Species_Path, open = "r+b", blocking = TRUE,
encoding = getOption("encoding"), raw = FALSE
)
Nearest_Cluster <- aperm(Nearest_Cluster, c(2, 3, 1))
if (!Location_RW$Byte_Start_SS == 1) {
seek(fidSS, where = SS_Format$Bytes * (Location_RW$Byte_Start_SS - 1), origin = "start", rw = "write")
}
writeBin(c(as.integer(Nearest_Cluster)), fidSS, size = SS_Format$Bytes, endian = .Platform$endian, useBytes = FALSE)
close(fidSS)
rm(list = ls())
gc()
return(invisible())
}
# Compute distance between each pixel of input data and each of the nbClusters x nb_partitions centroids
#
# @param InputData
# @param CentroidsArray
# @param nbClusters
# @param nb_partitions
#
# @return ResDist
#' @importFrom fields rdist
RdistList <- function(InputData, CentroidsArray, nbClusters, nb_partitions) {
# number of pixels in input data
nbPixels <- nrow(InputData)
# compute distance between each pixel and each centroid
cluster_dist <- rdist(InputData, CentroidsArray)
# reshape distance into a matrix: all pixels from iteration 1, then all pixels from it2...
cluster_dist <- matrix(aperm(array(cluster_dist, c(nbPixels, nbClusters, nb_partitions)), c(1, 3, 2)), nrow = nbPixels * nb_partitions)
ResDist <- matrix(max.col(-cluster_dist), nrow = nbPixels)
return(ResDist)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.