knitr::opts_chunk$set(echo = TRUE)
library(knitr)
#library(kableExtra)

Introduction

The attempt of this tutorial is to document the implmentation of the cluster analysis developed in Pari's paper.

First at all, we wil import a set of modules that will speed up the performance of this excercise from the the document clip_raster with R extension. Moreover, I will call COS map 2015 with shapefile extension that intersect 10 images of 2018 over the year. For this example I will call only shapes associated to water bodies.

#source('/home/willimarti2008/Documents/DGT/sraster/R/clip_raster.R')
source('\\\\dgt-699\\IPSTERS\\sraster\\R\\clip_raster.R')

#===============================
#Example
#===============================

library(rgdal)
library(sf)
library(rasterVis)
library(stars)
library(raster)

file_shape = '\\\\dgt-699\\IPSTERS\\COS2015\\COS2015_v2_08_02_2019_clip.shp'
#file_shape = '/home/willimarti2008/Documents/DGT/COS2015/COS2015_v2_08_02_2019_clip.shp'
legend = st_read(file_shape)
table(legend$Legend)

class_analysis = 'irrigated'

legend_class = legend[which(legend$Legend == class_analysis),]
rm(legend)

Size of the polygons

library(units)
areas_poly = st_area(legend_class)
boxplot(areas_poly,col = "bisque")

quantile(areas_poly,probs=c(0.25,0.5,0.65,0.75,1))
lim_max = quantile(areas_poly,probs=c(0.90))
lim_min = 10000   # Minimun mapping unit
units(lim_min) <- as_units("m^2")

areas_poly_limit = areas_poly[areas_poly >= lim_min & areas_poly < lim_max]

boxplot(areas_poly_limit,col = "bisque")

cat("Number of polygons: ", length(areas_poly_limit))

Query based on area

legend_class_q1 = legend_class[which(areas_poly >= lim_min & areas_poly < lim_max),]
#query 1: by area
legend_class_buffer_q1 = st_buffer(legend_class_q1,-20)

Images

images_folder = '\\\\dgt-699\\IPSTERS\\IMAGES'
#images_folder = '/home/willimarti2008/Documents/DGT/images'
join_path = function(x,path_folder){
  a = strsplit(x,'[.]')
  format_file = a[[1]][length(a[[1]])]
  if(format_file == 'tif'){
    return(paste0(path_folder,'/',x))
  }
}

list_images = c("S2A_L2A_20171002-113001_T29SND.tif",
                 "S2A_L2A_20171221-112810_T29SND.tif",
                 "S2A_L2A_20180321-112321_T29SND.tif",
                 "S2A_L2A_20180619-112602_T29SND.tif",
                 "S2A_L2A_20180729-112845_T29SND.tif",
                 "S2A_L2A_20180818-112627_T29SND.tif",
                 "S2A_L2A_20180927-112959_T29SND.tif")

paths_images = unlist(lapply(list_images,join_path,images_folder))

Example Polygon

We consider two shapes, one polygon corresponds to the original one extracted from COS 2015 and the another one after boundary extraction of 20m around original polygon.

shape = legend_class_q1[legend_class_q1$OBJECTI == 15930,]  #8557
shape_buffer = legend_class_buffer_q1[legend_class_buffer_q1$OBJECTI == 15930,]

plot(st_geometry(shape),border='red')
plot(st_geometry(shape_buffer), col = 'blue', add=T)

This template focus especifically in the general steps of how to export information for future classification process, therefore, I will only work with the original polygon.

sraster object

For the 92266 polygon we want to extract the spectral and temporal information associated to Snetinel 2 2018. The function as_raster also provides NDVI index. Since we have 9 images with 10 bands of sentinel 2, plus NDVI per image, in total we have 99 layers for our cluster analysis.

names_bands = c('B2','B3', 'B4', 'B5', 'B6', 'B7','B8','B8A', 'B11','B12')
result = as_sraster(shape_buffer,paths_images,names_bands)
plot(funct_plot((result$data[["2017-10-02"]][,,1])))
#st_write(as.data.frame(result),"output_1.csv",layer_options = "GEOMETRY=AS_XY")

Adding NDVI and converting to sraster object

list_images_ndvi = c("S2A_L2A_20171002-113001_T29SND_NDVI.tif",
                 "S2A_L2A_20171221-112810_T29SND_NDVI.tif",
                 "S2A_L2A_20180321-112321_T29SND_NDVI.tif",
                 "S2A_L2A_20180619-112602_T29SND_NDVI.tif",
                 "S2A_L2A_20180729-112845_T29SND_NDVI.tif",
                 "S2A_L2A_20180818-112627_T29SND_NDVI.tif",
                 "S2A_L2A_20180927-112959_T29SND_NDVI.tif")

paths_images_ndvi = unlist(lapply(list_images_ndvi,join_path,images_folder))

result_ndvi = as_sraster(shape_buffer, paths_images_ndvi, names_bands = c("NDVI"))
plot(funct_plot((result_ndvi$data[["2017-10-02"]][,,1])))
#st_write(as.data.frame(result_ndvi),"output_ndvi.csv",layer_options = "GEOMETRY=AS_XY")

Adding NDBI and converting to sraster object

list_images_NDBI = c("S2A_L2A_20171002-113001_T29SND_NDBI.tif",
                 "S2A_L2A_20171221-112810_T29SND_NDBI.tif",
                 "S2A_L2A_20180321-112321_T29SND_NDBI.tif",
                 "S2A_L2A_20180619-112602_T29SND_NDBI.tif",
                 "S2A_L2A_20180729-112845_T29SND_NDBI.tif",
                 "S2A_L2A_20180818-112627_T29SND_NDBI.tif",
                 "S2A_L2A_20180927-112959_T29SND_NDBI.tif")

paths_images_NDBI = unlist(lapply(list_images_NDBI,join_path,images_folder))

result_NDBI = as_sraster(shape_buffer, paths_images_NDBI, names_bands = c("NDBI"))
plot(funct_plot((result_NDBI$data[["2017-10-02"]][,,1])))
result_NDBI
#st_write(as.data.frame(result_NDBI),"output_ndbi.csv",layer_options = "GEOMETRY=AS_XY")

Adding NDMIR and converting to sraster object

list_images_NDMIR = c("S2A_L2A_20171002-113001_T29SND_NDMIR.tif",
                 "S2A_L2A_20171221-112810_T29SND_NDMIR.tif",
                 "S2A_L2A_20180321-112321_T29SND_NDMIR.tif",
                 "S2A_L2A_20180619-112602_T29SND_NDMIR.tif",
                 "S2A_L2A_20180729-112845_T29SND_NDMIR.tif",
                 "S2A_L2A_20180818-112627_T29SND_NDMIR.tif",
                 "S2A_L2A_20180927-112959_T29SND_NDMIR.tif")

paths_images_NDMIR = unlist(lapply(list_images_NDMIR,join_path,images_folder))

result_NDMIR = as_sraster(shape_buffer, paths_images_NDMIR, names_bands = c("NDMIR"))
plot(funct_plot((result_NDMIR$data[["2017-10-02"]][,,1])))
result_NDMIR
#st_write(as.data.frame(result_NDMIR),"output_NDMIR.csv",layer_options = "GEOMETRY=AS_XY")

Adding FILTER ndvi conv 5

list_images_con5 = c("S2A_L2A_20171002-113001_T29SND_CON3.tif",
                 "S2A_L2A_20171221-112810_T29SND_CON3.tif",
                 "S2A_L2A_20180321-112321_T29SND_CON3.tif",
                 "S2A_L2A_20180619-112602_T29SND_CON3.tif",
                 "S2A_L2A_20180729-112845_T29SND_CON3.tif",
                 "S2A_L2A_20180818-112627_T29SND_CON3.tif",
                 "S2A_L2A_20180927-112959_T29SND_CON3.tif")

paths_images_con5 = unlist(lapply(list_images_con5,join_path,images_folder))

result_con5 = as_sraster(shape_buffer, paths_images_con5, names_bands = c("CON5"))
plot(funct_plot((result_con5$data[["2017-10-02"]][,,1])))
#st_write(as.data.frame(result_ndvi),"output_ndvi.csv",layer_options = "GEOMETRY=AS_XY")

stacking layers

result_stack = stack(list(result,result_ndvi,result_NDBI, result_NDMIR))
result_stack$bands
#st_write(as.data.frame(result_stack),"output_stack.csv",layer_options = "GEOMETRY=AS_XY")

K means

Internally the number of cluster is defined by the Calinski-Harabasz index. As result, we have 3 clusters

cluster_matrix = kmeans_sraster(result_stack, pca = TRUE)

mask_raster = funct_plot(cluster_matrix)
mask_raster = as.factor(mask_raster)
rat <- levels(mask_raster)[[1]]
name_clusters = names(table(cluster_matrix))
rat[["cluster"]] <- name_clusters
levels(mask_raster) <- rat

levelplot(mask_raster,col.regions=c("red","blue","yellow","green","black","orange"))

Extracting spectral temporal information with Majority rule

result_clip = clip_sraster(result_stack, mask = cluster_matrix, type ="Majority rule")
plot(funct_plot((result_clip$data[["2018-03-21"]][,,11])))
#st_write(as.data.frame(result_clip),"output_clip_P4_ndvi.csv",layer_options = "GEOMETRY=AS_XY")

Extracting spectral temporal information with ndvi rule for water class

#result_clip = clip_sraster(result_stack, mask = cluster_matrix, type ="rule_ndvi_water")
#plot(funct_plot((result_clip$data[["2017-10-02"]][,,11])))
#st_write(as.data.frame(result_clip),"output_clip_test2.csv",layer_options = "GEOMETRY=AS_XY")

Working with more polygons

Well, this workflow also attempts to construct a database with the spectral-temporal information that must be part of the trainig and validation modelling. So here we want to evaluate haw fast we can retrieve pseudotraining and put it a file that later on we will use for modelling.

This example covers only water. I want only call 20 polygons for this example

#=========================
#random selection
#=========================
#Goal : stratified random selection of traing samples at level of polygon, (queriying only one class)

#n_samples = 150
#set.seed(123)   #setting same random selection for testing
#index = sample(1:dim(legend_class_buffer_q1)[1], n_samples,replace = FALSE)

file_index = 'C:\\IPSTERS\\sraster\\ins4\\sampling_0\\output_conifers_No rule.csv'
index_df = read.csv(file_index)

index = unique(index_df$Object)

#write.csv(data.frame(unique(result_df_rule$Object),class_analysis),file_index)

The following script contains the workflow that generalize the process done with the polygon of water done above. We require 3.5 seconds per polygon, so working with 20 this process can take one minute and 10 seconds more.

valor_zero = set_units(0,m^2)
workflow <- function(shape, paths_images,type){
  if(sf::st_area(shape)>valor_zero){
    result = as_sraster(shape,paths_images,names_bands=c("B2","B3","B4","B5","B6","B7","B8","B8A","B11","B12"))
    result_ndvi = as_sraster(shape, paths_images_ndvi, names_bands = c("NDVI"))
    result_NDBI = as_sraster(shape, paths_images_NDBI, names_bands = c("NDBI"))
    result_NDMIR = as_sraster(shape, paths_images_NDMIR, names_bands = c("NDMIR"))
    result_con5 = as_sraster(shape, paths_images_con5, names_bands = c("CON5"))
    result_stack = stack(list(result, result_ndvi,result_NDBI, result_NDMIR, result_con5))
    if(length(which(!is.na(result_stack$data[[1]][,,1]))) >=  25){
        cluster_matrix = kmeans_sraster(result_stack, pca=TRUE)
        result_clip = clip_sraster(result_stack, mask = cluster_matrix, type)
        if(is.null(result_clip)){
              return(NULL)
        }
        else{
            return(as.data.frame(result_clip))            
        }
    }
    }
}

Cluster analysis

#query 2  eval= FALSE 
#
legend_class_buffer_q2 = legend_class_buffer_q1[legend_class_buffer_q1$OBJECTI %in% index,]

list_shapes = split(legend_class_buffer_q2,legend_class_buffer_q2$OBJECTI)

type_rule ="Majority rule"

result_list_rule = lapply(list_shapes, workflow, paths_images, type_rule)

result_df_rule = do.call("rbind",result_list_rule)

Plot signal

library(reshape2)
library(ggplot2)
index_indvi = grep("NDVI",colnames(result_df_rule))

result_df_rule_ndvi = result_df_rule[,c(1,index_indvi)]

st_geometry(result_df_rule_ndvi) = NULL

result_aggregate_time = aggregate(result_df_rule_ndvi, by = list(result_df_rule_ndvi$Object), FUN = median, na.rm =TRUE)
resultmelt <- melt(result_aggregate_time[,-1], id.vars = "Object")
#removing ndvi text
resultmelt$variable <- as.Date(substr(resultmelt[,c("variable")], 1, 10))
resultmelt$Object <- as.factor(resultmelt$Object)

#ggplot
p1 <- ggplot(resultmelt, aes(variable, value, group = Object)) + 
  geom_line(color="red") + theme(legend.position="top")
print(p1)

Bhattacharyya distance

y = result_df_rule[,c(1,index_indvi)]
st_geometry(y) <- NULL
index_preserve = b_distance(y, prob= 0.80)
result_df_rule2 = result_df_rule[result_df_rule$Object %in% index_preserve, ]

#graphic 2
resultmelt$Type = "Outlier"
resultmelt[resultmelt$Object %in% index_preserve,c("Type")] = "Data"

#ggplot
p1 <- ggplot(resultmelt, aes(variable, value, group = Object)) + 
  geom_line(aes(color=Type)) + theme(legend.position="top") +
  scale_linetype_manual(values=c("twodash", "dotted"))+
  scale_size_manual(values=c(4, 4))

print(p1)
#ggplot
p1 <- ggplot(resultmelt[resultmelt$Type == 'Outlier',], aes(variable, value, group = Object)) + 
  geom_line(aes(color=Type)) + theme(legend.position="top") +
  scale_linetype_manual(values=c("twodash", "dotted")) +
  scale_size_manual(values=c(4, 4))

print(p1)

To save the results

#eval= FALSE
st_crs(result_df_rule)<-st_crs(legend_class)
st_crs(result_df_rule2)<-st_crs(legend_class)
st_write(result_df_rule, paste0("output_",class_analysis,"_",type_rule,".csv"), layer_options = "GEOMETRY=AS_XY")
st_write(result_df_rule2, paste0("output_",class_analysis,"_",type_rule,"_bdist.csv"), layer_options = "GEOMETRY=AS_XY")
#st_write(result_df_ndvirule2, paste0("output_",class_analysis,"_ndvirule.csv"), layer_options = "GEOMETRY=AS_XY")

saving pixel values without processing

#eval= FALSE
#result_list_nr = lapply(list_shapes, workflow, paths_images, type ="No rule")
#result_df_nr = do.call("rbind",result_list_nr)
#st_crs(result_df_nr)<-st_crs(legend_class)
#st_write(result_df_nr, paste0("output_",class_analysis,"_norule.csv"), layer_options = "GEOMETRY=AS_XY")

saving pixel values ndvi processing

#result_list_rndvi = lapply(list_shapes, workflow, paths_images, type ="rule_ndvi_water")
#result_df_rndvi = do.call("rbind",result_list_rndvi)
#st_crs(result_df_rndvi)<-st_crs(legend_class)
#st_write(result_df_rndvi, paste0("output_",class_analysis,"_ndvirule.csv"), layer_options = "GEOMETRY=AS_XY")
#rm(list=ls())
#gc()


MapSentinel/sraster documentation built on April 24, 2020, 5:43 a.m.