knitr::opts_chunk$set(echo = TRUE) library(knitr) #library(kableExtra)
The attempt of this tutorial is to document the implmentation of the cluster analysis developed in Pari's paper.
First at all, we wil import a set of modules that will speed up the performance of this excercise from the the document clip_raster with R extension. Moreover, I will call COS map 2015 with shapefile extension that intersect 10 images of 2018 over the year. For this example I will call only shapes associated to water bodies.
#source('/home/willimarti2008/Documents/DGT/sraster/R/clip_raster.R') source('\\\\dgt-699\\IPSTERS\\sraster\\R\\clip_raster.R') #=============================== #Example #=============================== library(rgdal) library(sf) library(rasterVis) library(stars) library(raster) file_shape = '\\\\dgt-699\\IPSTERS\\COS2015\\COS2015_v2_08_02_2019_clip.shp' #file_shape = '/home/willimarti2008/Documents/DGT/COS2015/COS2015_v2_08_02_2019_clip.shp' legend = st_read(file_shape) table(legend$Legend) class_analysis = 'irrigated' legend_class = legend[which(legend$Legend == class_analysis),] rm(legend)
library(units) areas_poly = st_area(legend_class) boxplot(areas_poly,col = "bisque") quantile(areas_poly,probs=c(0.25,0.5,0.65,0.75,1)) lim_max = quantile(areas_poly,probs=c(0.90)) lim_min = 10000 # Minimun mapping unit units(lim_min) <- as_units("m^2") areas_poly_limit = areas_poly[areas_poly >= lim_min & areas_poly < lim_max] boxplot(areas_poly_limit,col = "bisque") cat("Number of polygons: ", length(areas_poly_limit))
legend_class_q1 = legend_class[which(areas_poly >= lim_min & areas_poly < lim_max),] #query 1: by area legend_class_buffer_q1 = st_buffer(legend_class_q1,-20)
images_folder = '\\\\dgt-699\\IPSTERS\\IMAGES' #images_folder = '/home/willimarti2008/Documents/DGT/images' join_path = function(x,path_folder){ a = strsplit(x,'[.]') format_file = a[[1]][length(a[[1]])] if(format_file == 'tif'){ return(paste0(path_folder,'/',x)) } } list_images = c("S2A_L2A_20171002-113001_T29SND.tif", "S2A_L2A_20171221-112810_T29SND.tif", "S2A_L2A_20180321-112321_T29SND.tif", "S2A_L2A_20180619-112602_T29SND.tif", "S2A_L2A_20180729-112845_T29SND.tif", "S2A_L2A_20180818-112627_T29SND.tif", "S2A_L2A_20180927-112959_T29SND.tif") paths_images = unlist(lapply(list_images,join_path,images_folder))
We consider two shapes, one polygon corresponds to the original one extracted from COS 2015 and the another one after boundary extraction of 20m around original polygon.
shape = legend_class_q1[legend_class_q1$OBJECTI == 15930,] #8557 shape_buffer = legend_class_buffer_q1[legend_class_buffer_q1$OBJECTI == 15930,] plot(st_geometry(shape),border='red') plot(st_geometry(shape_buffer), col = 'blue', add=T)
This template focus especifically in the general steps of how to export information for future classification process, therefore, I will only work with the original polygon.
For the 92266 polygon we want to extract the spectral and temporal information associated to Snetinel 2 2018. The function as_raster also provides NDVI index. Since we have 9 images with 10 bands of sentinel 2, plus NDVI per image, in total we have 99 layers for our cluster analysis.
names_bands = c('B2','B3', 'B4', 'B5', 'B6', 'B7','B8','B8A', 'B11','B12') result = as_sraster(shape_buffer,paths_images,names_bands) plot(funct_plot((result$data[["2017-10-02"]][,,1]))) #st_write(as.data.frame(result),"output_1.csv",layer_options = "GEOMETRY=AS_XY")
list_images_ndvi = c("S2A_L2A_20171002-113001_T29SND_NDVI.tif", "S2A_L2A_20171221-112810_T29SND_NDVI.tif", "S2A_L2A_20180321-112321_T29SND_NDVI.tif", "S2A_L2A_20180619-112602_T29SND_NDVI.tif", "S2A_L2A_20180729-112845_T29SND_NDVI.tif", "S2A_L2A_20180818-112627_T29SND_NDVI.tif", "S2A_L2A_20180927-112959_T29SND_NDVI.tif") paths_images_ndvi = unlist(lapply(list_images_ndvi,join_path,images_folder)) result_ndvi = as_sraster(shape_buffer, paths_images_ndvi, names_bands = c("NDVI")) plot(funct_plot((result_ndvi$data[["2017-10-02"]][,,1]))) #st_write(as.data.frame(result_ndvi),"output_ndvi.csv",layer_options = "GEOMETRY=AS_XY")
list_images_NDBI = c("S2A_L2A_20171002-113001_T29SND_NDBI.tif", "S2A_L2A_20171221-112810_T29SND_NDBI.tif", "S2A_L2A_20180321-112321_T29SND_NDBI.tif", "S2A_L2A_20180619-112602_T29SND_NDBI.tif", "S2A_L2A_20180729-112845_T29SND_NDBI.tif", "S2A_L2A_20180818-112627_T29SND_NDBI.tif", "S2A_L2A_20180927-112959_T29SND_NDBI.tif") paths_images_NDBI = unlist(lapply(list_images_NDBI,join_path,images_folder)) result_NDBI = as_sraster(shape_buffer, paths_images_NDBI, names_bands = c("NDBI")) plot(funct_plot((result_NDBI$data[["2017-10-02"]][,,1]))) result_NDBI #st_write(as.data.frame(result_NDBI),"output_ndbi.csv",layer_options = "GEOMETRY=AS_XY")
list_images_NDMIR = c("S2A_L2A_20171002-113001_T29SND_NDMIR.tif", "S2A_L2A_20171221-112810_T29SND_NDMIR.tif", "S2A_L2A_20180321-112321_T29SND_NDMIR.tif", "S2A_L2A_20180619-112602_T29SND_NDMIR.tif", "S2A_L2A_20180729-112845_T29SND_NDMIR.tif", "S2A_L2A_20180818-112627_T29SND_NDMIR.tif", "S2A_L2A_20180927-112959_T29SND_NDMIR.tif") paths_images_NDMIR = unlist(lapply(list_images_NDMIR,join_path,images_folder)) result_NDMIR = as_sraster(shape_buffer, paths_images_NDMIR, names_bands = c("NDMIR")) plot(funct_plot((result_NDMIR$data[["2017-10-02"]][,,1]))) result_NDMIR #st_write(as.data.frame(result_NDMIR),"output_NDMIR.csv",layer_options = "GEOMETRY=AS_XY")
list_images_con5 = c("S2A_L2A_20171002-113001_T29SND_CON3.tif", "S2A_L2A_20171221-112810_T29SND_CON3.tif", "S2A_L2A_20180321-112321_T29SND_CON3.tif", "S2A_L2A_20180619-112602_T29SND_CON3.tif", "S2A_L2A_20180729-112845_T29SND_CON3.tif", "S2A_L2A_20180818-112627_T29SND_CON3.tif", "S2A_L2A_20180927-112959_T29SND_CON3.tif") paths_images_con5 = unlist(lapply(list_images_con5,join_path,images_folder)) result_con5 = as_sraster(shape_buffer, paths_images_con5, names_bands = c("CON5")) plot(funct_plot((result_con5$data[["2017-10-02"]][,,1]))) #st_write(as.data.frame(result_ndvi),"output_ndvi.csv",layer_options = "GEOMETRY=AS_XY")
result_stack = stack(list(result,result_ndvi,result_NDBI, result_NDMIR)) result_stack$bands #st_write(as.data.frame(result_stack),"output_stack.csv",layer_options = "GEOMETRY=AS_XY")
Internally the number of cluster is defined by the Calinski-Harabasz index. As result, we have 3 clusters
cluster_matrix = kmeans_sraster(result_stack, pca = TRUE) mask_raster = funct_plot(cluster_matrix) mask_raster = as.factor(mask_raster) rat <- levels(mask_raster)[[1]] name_clusters = names(table(cluster_matrix)) rat[["cluster"]] <- name_clusters levels(mask_raster) <- rat levelplot(mask_raster,col.regions=c("red","blue","yellow","green","black","orange"))
result_clip = clip_sraster(result_stack, mask = cluster_matrix, type ="Majority rule") plot(funct_plot((result_clip$data[["2018-03-21"]][,,11]))) #st_write(as.data.frame(result_clip),"output_clip_P4_ndvi.csv",layer_options = "GEOMETRY=AS_XY")
#result_clip = clip_sraster(result_stack, mask = cluster_matrix, type ="rule_ndvi_water") #plot(funct_plot((result_clip$data[["2017-10-02"]][,,11]))) #st_write(as.data.frame(result_clip),"output_clip_test2.csv",layer_options = "GEOMETRY=AS_XY")
Well, this workflow also attempts to construct a database with the spectral-temporal information that must be part of the trainig and validation modelling. So here we want to evaluate haw fast we can retrieve pseudotraining and put it a file that later on we will use for modelling.
This example covers only water. I want only call 20 polygons for this example
#========================= #random selection #========================= #Goal : stratified random selection of traing samples at level of polygon, (queriying only one class) #n_samples = 150 #set.seed(123) #setting same random selection for testing #index = sample(1:dim(legend_class_buffer_q1)[1], n_samples,replace = FALSE) file_index = 'C:\\IPSTERS\\sraster\\ins4\\sampling_0\\output_conifers_No rule.csv' index_df = read.csv(file_index) index = unique(index_df$Object) #write.csv(data.frame(unique(result_df_rule$Object),class_analysis),file_index)
The following script contains the workflow that generalize the process done with the polygon of water done above. We require 3.5 seconds per polygon, so working with 20 this process can take one minute and 10 seconds more.
valor_zero = set_units(0,m^2) workflow <- function(shape, paths_images,type){ if(sf::st_area(shape)>valor_zero){ result = as_sraster(shape,paths_images,names_bands=c("B2","B3","B4","B5","B6","B7","B8","B8A","B11","B12")) result_ndvi = as_sraster(shape, paths_images_ndvi, names_bands = c("NDVI")) result_NDBI = as_sraster(shape, paths_images_NDBI, names_bands = c("NDBI")) result_NDMIR = as_sraster(shape, paths_images_NDMIR, names_bands = c("NDMIR")) result_con5 = as_sraster(shape, paths_images_con5, names_bands = c("CON5")) result_stack = stack(list(result, result_ndvi,result_NDBI, result_NDMIR, result_con5)) if(length(which(!is.na(result_stack$data[[1]][,,1]))) >= 25){ cluster_matrix = kmeans_sraster(result_stack, pca=TRUE) result_clip = clip_sraster(result_stack, mask = cluster_matrix, type) if(is.null(result_clip)){ return(NULL) } else{ return(as.data.frame(result_clip)) } } } }
#query 2 eval= FALSE # legend_class_buffer_q2 = legend_class_buffer_q1[legend_class_buffer_q1$OBJECTI %in% index,] list_shapes = split(legend_class_buffer_q2,legend_class_buffer_q2$OBJECTI) type_rule ="Majority rule" result_list_rule = lapply(list_shapes, workflow, paths_images, type_rule) result_df_rule = do.call("rbind",result_list_rule)
library(reshape2) library(ggplot2) index_indvi = grep("NDVI",colnames(result_df_rule)) result_df_rule_ndvi = result_df_rule[,c(1,index_indvi)] st_geometry(result_df_rule_ndvi) = NULL result_aggregate_time = aggregate(result_df_rule_ndvi, by = list(result_df_rule_ndvi$Object), FUN = median, na.rm =TRUE) resultmelt <- melt(result_aggregate_time[,-1], id.vars = "Object") #removing ndvi text resultmelt$variable <- as.Date(substr(resultmelt[,c("variable")], 1, 10)) resultmelt$Object <- as.factor(resultmelt$Object) #ggplot p1 <- ggplot(resultmelt, aes(variable, value, group = Object)) + geom_line(color="red") + theme(legend.position="top") print(p1)
y = result_df_rule[,c(1,index_indvi)] st_geometry(y) <- NULL index_preserve = b_distance(y, prob= 0.80) result_df_rule2 = result_df_rule[result_df_rule$Object %in% index_preserve, ] #graphic 2 resultmelt$Type = "Outlier" resultmelt[resultmelt$Object %in% index_preserve,c("Type")] = "Data" #ggplot p1 <- ggplot(resultmelt, aes(variable, value, group = Object)) + geom_line(aes(color=Type)) + theme(legend.position="top") + scale_linetype_manual(values=c("twodash", "dotted"))+ scale_size_manual(values=c(4, 4)) print(p1)
#ggplot p1 <- ggplot(resultmelt[resultmelt$Type == 'Outlier',], aes(variable, value, group = Object)) + geom_line(aes(color=Type)) + theme(legend.position="top") + scale_linetype_manual(values=c("twodash", "dotted")) + scale_size_manual(values=c(4, 4)) print(p1)
To save the results
#eval= FALSE st_crs(result_df_rule)<-st_crs(legend_class) st_crs(result_df_rule2)<-st_crs(legend_class) st_write(result_df_rule, paste0("output_",class_analysis,"_",type_rule,".csv"), layer_options = "GEOMETRY=AS_XY") st_write(result_df_rule2, paste0("output_",class_analysis,"_",type_rule,"_bdist.csv"), layer_options = "GEOMETRY=AS_XY") #st_write(result_df_ndvirule2, paste0("output_",class_analysis,"_ndvirule.csv"), layer_options = "GEOMETRY=AS_XY")
#eval= FALSE #result_list_nr = lapply(list_shapes, workflow, paths_images, type ="No rule") #result_df_nr = do.call("rbind",result_list_nr) #st_crs(result_df_nr)<-st_crs(legend_class) #st_write(result_df_nr, paste0("output_",class_analysis,"_norule.csv"), layer_options = "GEOMETRY=AS_XY")
#result_list_rndvi = lapply(list_shapes, workflow, paths_images, type ="rule_ndvi_water") #result_df_rndvi = do.call("rbind",result_list_rndvi) #st_crs(result_df_rndvi)<-st_crs(legend_class) #st_write(result_df_rndvi, paste0("output_",class_analysis,"_ndvirule.csv"), layer_options = "GEOMETRY=AS_XY") #rm(list=ls()) #gc()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.