In MapSentinel/sraster:

knitr::opts_chunk$set(echo = TRUE)
library(knitr)
#library(kableExtra)

Introduction

The attempt of this tutorial is to document the implmentation of the cluster analysis developed in Pari's paper.

#source('/home/willimarti2008/Documents/DGT/sraster/R/clip_raster.R')
source('C:\\IPSTERS\\sraster\\R\\clip_raster.R')

library(rgdal)
library(sf)
library(rasterVis)
library(stars)
library(raster)

#file_shape = '/home/willimarti2008/Documents/DGT/COS2015/COS2015_v2_08_02_2019_clip.shp'
file_shape = 'C:\\IPSTERS\\COS2015\\COS2015_v2_08_02_2019_clip.shp'
legend = st_read(file_shape)
table(legend$Legend2)

class_analysis = 'other_broadleaf'

legend_class = legend[which(legend$Legend2 == class_analysis),]
#rm(legend)

Buffer

legend_class_buffer = st_buffer(legend_class,-20)

Query Polygons based on HRL

To transform all HRL mask to polygon vector may represent a really demanding computationall task. This task is mean to later on applying an intersection wiht the polygons of COS. However, before that intersection we require that the mask layer shrinks boundaries in 20 meters in order to have higher certainty that the pixel is covering this kind of class.

raster to points

#file_mask = c('/home/willimarti2008/Documents/DGT/sraster/images/mask_broadleaf_forestGL.tif')
file_mask = c('C:\\IPSTERS\\sraster\\images\\mask_broadleaf_forestGL.tif')
hrl = raster(file_mask)
hrl_points = st_as_sf(rasterToPoints(hrl,spatial = TRUE))

Intersection

intersect_poly_hrl_cos = st_intersects(legend_class_buffer , hrl_points)

number_points = function(x, threshold){
    if(length(x)>= threshold){
      return(length(x))
    }else
    {
      return(99999)
    }
}
#threshold: at least one pixel is intersected
len_vector = lapply(intersect_poly_hrl_cos, number_points, threshold = 25)
index_poly = which(unlist(len_vector) != 99999)
legend_class_q0 = legend_class_buffer[index_poly,]
#hrl_points = NULL

Size of the polygons

library(units)
areas_poly = st_area(legend_class_q0)
boxplot(areas_poly,col = "bisque")

quantile(areas_poly,probs=c(0.25,0.5,0.65,0.75,1))
lim_max = quantile(areas_poly,probs=c(0.90))
lim_min = 10000   # Minimun mapping unit
units(lim_min) <- as_units("m^2")

areas_poly_limit = areas_poly[areas_poly >= lim_min & areas_poly < lim_max]

boxplot(areas_poly_limit,col = "bisque")

cat("Number of polygons: ", length(areas_poly_limit))

Query based on area

legend_class_q1 = legend_class_q0[which(areas_poly >= lim_min & areas_poly < lim_max),]
legend_class_buffer_q1 = st_buffer(legend_class_q1,-20)

Images

#images_folder = '/home/willimarti2008/Documents/DGT/images'
images_folder = 'C:\\IPSTERS\\IMAGES'

join_path = function(x,path_folder){
  a = strsplit(x,'[.]')
  format_file = a[[1]][length(a[[1]])]
  if(format_file == 'tif'){
    return(paste0(path_folder,'\\',x))
  }
}

list_images = c("S2A_L2A_20171002-113001_T29SND.tif",
                 "S2A_L2A_20171221-112810_T29SND.tif",
                 "S2A_L2A_20180321-112321_T29SND.tif",
                 "S2A_L2A_20180619-112602_T29SND.tif",
                 "S2A_L2A_20180729-112845_T29SND.tif",
                 "S2A_L2A_20180818-112627_T29SND.tif",
                 "S2A_L2A_20180927-112959_T29SND.tif")

paths_images = unlist(lapply(list_images,join_path,images_folder))

Example

We consider two shapes, one polygon corresponds to the original one extracted from COS 2015 and the another one after boundary extraction of 20m around original polygon.

shape = legend_class_q1[1,]
shape_buffer = legend_class_buffer_q1[1,]

plot(st_geometry(shape),border='red')
plot(st_geometry(shape_buffer),col ='blue', add= T)

names_bands = c('B2','B3', 'B4', 'B5', 'B6', 'B7','B8','B8A', 'B11','B12')
result = as_sraster(shape_buffer,paths_images,names_bands)
plot(funct_plot((result$data[["2017-10-02"]][,,1])))

Adding NDVI and converting to sraster object

list_images_ndvi = c("S2A_L2A_20171002-113001_T29SND_NDVI.tif",
                 "S2A_L2A_20171221-112810_T29SND_NDVI.tif",
                 "S2A_L2A_20180321-112321_T29SND_NDVI.tif",
                 "S2A_L2A_20180619-112602_T29SND_NDVI.tif",
                 "S2A_L2A_20180729-112845_T29SND_NDVI.tif",
                 "S2A_L2A_20180818-112627_T29SND_NDVI.tif",
                 "S2A_L2A_20180927-112959_T29SND_NDVI.tif")

paths_images_ndvi = unlist(lapply(list_images_ndvi,join_path,images_folder))

result_ndvi = as_sraster(shape_buffer, paths_images_ndvi, names_bands = c("NDVI"))
plot(funct_plot((result_ndvi$data[["2017-10-02"]][,,1])))
#st_write(as.data.frame(result_ndvi),"output_ndvi.csv",layer_options = "GEOMETRY=AS_XY")

Adding NDBI and converting to sraster object

list_images_NDBI = c("S2A_L2A_20171002-113001_T29SND_NDBI.tif",
                 "S2A_L2A_20171221-112810_T29SND_NDBI.tif",
                 "S2A_L2A_20180321-112321_T29SND_NDBI.tif",
                 "S2A_L2A_20180619-112602_T29SND_NDBI.tif",
                 "S2A_L2A_20180729-112845_T29SND_NDBI.tif",
                 "S2A_L2A_20180818-112627_T29SND_NDBI.tif",
                 "S2A_L2A_20180927-112959_T29SND_NDBI.tif")

paths_images_NDBI = unlist(lapply(list_images_NDBI,join_path,images_folder))

result_NDBI = as_sraster(shape_buffer, paths_images_NDBI, names_bands = c("NDBI"))
plot(funct_plot((result_NDBI$data[["2017-10-02"]][,,1])))
result_NDBI
#st_write(as.data.frame(result_NDBI),"output_ndbi.csv",layer_options = "GEOMETRY=AS_XY")

Adding NDMIR and converting to sraster object

list_images_NDMIR = c("S2A_L2A_20171002-113001_T29SND_NDMIR.tif",
                 "S2A_L2A_20171221-112810_T29SND_NDMIR.tif",
                 "S2A_L2A_20180321-112321_T29SND_NDMIR.tif",
                 "S2A_L2A_20180619-112602_T29SND_NDMIR.tif",
                 "S2A_L2A_20180729-112845_T29SND_NDMIR.tif",
                 "S2A_L2A_20180818-112627_T29SND_NDMIR.tif",
                 "S2A_L2A_20180927-112959_T29SND_NDMIR.tif")

paths_images_NDMIR = unlist(lapply(list_images_NDMIR,join_path,images_folder))

result_NDMIR = as_sraster(shape_buffer, paths_images_NDMIR, names_bands = c("NDMIR"))
plot(funct_plot((result_NDMIR$data[["2017-10-02"]][,,1])))
#st_write(as.data.frame(result_NDMIR),"output_NDMIR.csv",layer_options = "GEOMETRY=AS_XY")

list_images_con5 = c("S2A_L2A_20171002-113001_T29SND_CON3.tif",
                 "S2A_L2A_20171221-112810_T29SND_CON3.tif",
                 "S2A_L2A_20180321-112321_T29SND_CON3.tif",
                 "S2A_L2A_20180619-112602_T29SND_CON3.tif",
                 "S2A_L2A_20180729-112845_T29SND_CON3.tif",
                 "S2A_L2A_20180818-112627_T29SND_CON3.tif",
                 "S2A_L2A_20180927-112959_T29SND_CON3.tif")

paths_images_con5 = unlist(lapply(list_images_con5,join_path,images_folder))

result_con5 = as_sraster(shape_buffer, paths_images_con5, names_bands = c("CON5"))
plot(funct_plot((result_con5$data[["2017-10-02"]][,,1])))
#st_write(as.data.frame(result_NDMIR),"output_NDMIR.csv",layer_options = "GEOMETRY=AS_XY")

stacking layers

result_stack = stack(list(result, result_ndvi,result_NDBI, result_NDMIR,result_con5))
result_stack$bands
#st_write(as.data.frame(result_stack),"output_stack.csv",layer_options = "GEOMETRY=AS_XY")

Coniferos Mask HRL

mask_hrl_sraster = as_sraster(shape_buffer, file_mask, names_bands = c("mask"))
mask_hrl  = mask_hrl_sraster$data[[1]][,,1]
plot(funct_plot((mask_hrl)))

Extracting spectral temporal information using HRL boundaries

result_clip = clip_sraster(result_stack, mask = mask_hrl, type = "No rule")
plot(funct_plot((result_clip$data[["2017-10-02"]][,,11])))

K means

Internally the number of cluster is defined by the Calinski-Harabasz index. As result, we have n clusters

cluster_matrix = kmeans_sraster(result_clip,pca = TRUE)

mask_raster = funct_plot(cluster_matrix)
mask_raster = as.factor(mask_raster)
rat <- levels(mask_raster)[[1]]
name_clusters = names(table(cluster_matrix))
rat[["cluster"]] <- name_clusters
levels(mask_raster) <- rat

levelplot(mask_raster,col.regions=c("red","blue","yellow","green","black","orange"))

Extracting spectral temporal information with Majority rule

result_clip_cluster = clip_sraster(result_clip, mask = cluster_matrix, type ="Majority rule")
plot(funct_plot((result_clip_cluster$data[["2017-10-02"]][,,11])))
#st_write(as.data.frame(result_clip),"output_clip.csv",layer_options = "GEOMETRY=AS_XY")

Extracting spectral temporal information with ndvi rule for woody class

#result_clip_cluster = clip_sraster(result_clip, mask = cluster_matrix, type ="rule_ndvi_wood")
#plot(funct_plot((result_clip_cluster$data[["2017-10-02"]][,,11])))
##st_write(as.data.frame(result_clip),"output_clip_test2.csv",layer_options = "GEOMETRY=AS_XY")

Working with more polygons

Well, this workflow also attempts to construct a database with the spectral-temporal information that must be part of the trainig and validation modelling. So here we want to evaluate haw fast we can retrieve pseudotraining and put it a file that later on we will use for modelling.

n_samples = 150
set.seed(123)   #setting same random selection for testing
index = sample(1:dim(legend_class_buffer_q1)[1], n_samples,replace = FALSE)

#file_index = 'C:\\IPSTERS\\sraster\\ins4\\sampling_4\\output_broadleaf_No rule.csv'
#index_df = read.csv2(file_index)
#index = unique(index_df$Object)
#write.csv(data.frame(unique(result_df_rule$Object),class_analysis),file_index)

The following script contains the workflow that generalize the process done with the polygon of water done above. We require 3.5 seconds per polygon, so working with 20 this process can take one minute and 10 seconds more.

workflow <- function(shape, paths_images, type){
          result = as_sraster(shape,paths_images,names_bands=c("B2","B3","B4","B5","B6","B7","B8","B8A","B11","B12"))
          result_ndvi = as_sraster(shape, paths_images_ndvi, names_bands = c("NDVI"))
          result_NDBI = as_sraster(shape, paths_images_NDBI, names_bands = c("NDBI"))
          result_NDMIR = as_sraster(shape, paths_images_NDMIR, names_bands = c("NDMIR"))
          result_con5 = as_sraster(shape, paths_images_con5 , names_bands = c("CON5"))
          result_stack = stack(list(result, result_ndvi,result_NDBI, result_NDMIR, result_con5))
          #mask HRL
          mask_hrl_sraster = as_sraster(shape, file_mask, names_bands = c("mask"))
          mask_hrl  = mask_hrl_sraster$data[[1]][,,1]

          #checking if the intersection has information, otherwise return NULL
          if(all(is.na( mask_hrl))){
             return(NULL)
          }
          else if(length( which(!is.na(mask_hrl))) < 25){
             return(NULL)
          }
          else{
            result_clip = clip_sraster(result_stack, mask = mask_hrl, type = "No rule")
            #mask cluster
            cluster_matrix = kmeans_sraster(result_clip,pca = TRUE)
            if(all(is.na(cluster_matrix))){
              return(NULL)
            }
            else{
              result_clip_cluster = clip_sraster(result_clip, mask = cluster_matrix, type)
              return(as.data.frame(result_clip_cluster))
            }
          }
}

Cluster analysis

#query 2  eval= FALSE  legend_class_q1$OBJECTI %in% 
legend_class_buffer_q2 = legend_class_buffer_q1[,]
list_shapes = split(legend_class_buffer_q1,legend_class_buffer_q1$OBJECTI)
type_rule ="Majority rule"
result_list_rule = lapply(list_shapes, workflow, paths_images, type_rule)
result_df_rule = do.call("rbind",result_list_rule)

Plot signal

library(reshape2)
library(ggplot2)
index_indvi = grep("NDVI",colnames(result_df_rule))

result_df_rule_ndvi = result_df_rule[,c(1,index_indvi)]

st_geometry(result_df_rule_ndvi) = NULL

result_aggregate_time = aggregate(result_df_rule_ndvi, by = list(result_df_rule_ndvi$Object), FUN = median, na.rm =TRUE)
resultmelt <- melt(result_aggregate_time[,-1], id.vars = "Object")
#removing ndvi text
resultmelt$variable <- as.Date(substr(resultmelt[,c("variable")], 1, 10))
resultmelt$Object <- as.factor(resultmelt$Object)

#ggplot
p1 <- ggplot(resultmelt, aes(variable, value, group = Object)) + 
  geom_line(color="red") + theme(legend.position="top")
print(p1)

Bhattacharyya distance

y = result_df_rule[,c(1,index_indvi)]
st_geometry(y) <- NULL
index_preserve = b_distance(y, prob= 0.80)
result_df_rule2 = result_df_rule[result_df_rule$Object %in% index_preserve, ]

#graphic 2
resultmelt$Type = "Outlier"
resultmelt[resultmelt$Object %in% index_preserve,c("Type")] = "Data"

#ggplot
p1 <- ggplot(resultmelt, aes(variable, value, group = Object)) + 
  geom_line(aes(color=Type)) + theme(legend.position="top") +
  scale_linetype_manual(values=c("twodash", "dotted"))+
  scale_size_manual(values=c(4, 4))

print(p1)

ccccccccc

To save the results

#eval= FALSE
st_crs(result_df_rule)<-st_crs(legend_class)
st_crs(result_df_rule2)<-st_crs(legend_class)
st_write(result_df_rule, paste0("output_",class_analysis,"_",type_rule,".csv"), layer_options = "GEOMETRY=AS_XY")
st_write(result_df_rule2, paste0("output_",class_analysis,"_",type_rule,"_bdist.csv"), layer_options = "GEOMETRY=AS_XY")

#st_write(result_df_ndvirule2, paste0("output_",class_analysis,"_ndvirule.csv"), layer_options = "GEOMETRY=AS_XY")

saving pixel values without processing

workflow2 <- function(shape, paths_images, type){
          result = as_sraster(shape,paths_images,names_bands=c("B2","B3","B4","B5","B6","B7","B8","B8A","B11","B12"))
          result_ndvi = as_sraster(shape, paths_images_ndvi, names_bands = c("NDVI"))
          result_NDBI = as_sraster(shape, paths_images_NDBI, names_bands = c("NDBI"))
          result_NDMIR = as_sraster(shape, paths_images_NDMIR, names_bands = c("NDMIR"))
          result_stack = stack(list(result, result_ndvi,result_NDBI, result_NDMIR))
          return(as.data.frame(result_stack))
          }

indeces_polygons = unique(result_df_rule$Object)
legend_class_q3 = legend_class_q2[legend_class_q2$OBJECTI %in% indeces_polygons,]

list_shapes = split(legend_class_q3,legend_class_q3$OBJECTI)

result_list_nr = lapply(list_shapes, workflow2, paths_images, type ="No rule")
result_df_nr = do.call("rbind",result_list_nr)
st_crs(result_df_nr)<-st_crs(legend_class)
st_write(result_df_nr, paste0("output_",class_analysis,"_norule.csv"), layer_options = "GEOMETRY=AS_XY")