R/validate_alerts.R

Defines functions validate_alerts

Documented in validate_alerts

#' @title Create plots for facilitate validation of SATA alerts
#' @description This function creates from GEE preprocessing outputs, a series of plots which aims to help to validate SATA alerts.
#' @param alerts_shp Shapefile filename. This is the shapefile of alerts generated by SATA. This could be fire or deforestation alerts as always it is a point shapefile with a column indicating alert dates in the format 'YYYY-MM-DD'. It should be projected in UTM WGS84 coordinates system.
#' @param column_dates_name String. The name of the column indicating alerts dates should be specified here. Case-sensitive applies.
#' @param red_channel_GEE_ts GEE time series filename. The GEE time series to assign to the red channel in plots generation. It should be projected in UTM WGS84 coordinates system.
#' @param green_channel_GEE_ts GEE time series filename. The GEE time series to assign to the green channel in plots generation. It should be projected in UTM WGS84 coordinates system.
#' @param blue_channel_GEE_ts GEE time series filename. The GEE time series to assign to the blue channel in plots generation. It should be projected in UTM WGS84 coordinates system.
#' @param observations_evaluate Numeric. A number defining the obsevations to plot (options: 1-5)
#' @param buffer_distance Numeric. A buffer distance from alerts to visualize in plots (in meters)
#' @param stretch_perc Numeric. A value to enhance visualization in plots (range: 0-100)
#' @param size_jpg Numeric. A value which defines the size of plots (in pixels). Plots are based in an square, so one dimension defines its size.
#' @param ncores Numeric. A number defining how many cores will be used in processing.
#' @param output_folder Foldername. A folder to use for store plots.
#' @return Plots outputs are stored in: *output_folder*. Among them: 1) plots foreach SATA alert; and 2) a shapefile of the alerts that intesected GEE time series.
#' @examples
#' #Defining variables for alerts shapefile
#' alerts_shp <- "C:/DATA/GAF/demo/estadisticas/DEFORESTATION_alerts.shp"
#' column_dates_name <- "start" #case sensitive!
#'
#' #Defining variables for GEE ts files (normally as a TIF). Take into account that should exist in the same folder its version as CSV (dates files)
#' red_channel_GEE_ts <- "C:/DATA/GAF/demo/sentinel1/gee_outputs/coca_S1_ts_VV_P50_I14.tif"
#' green_channel_GEE_ts <- "C:/DATA/GAF/demo/sentinel1/gee_outputs/coca_S1_ts_VH_P50_I14.tif"
#' blue_channel_GEE_ts <- "C:/DATA/GAF/demo/sentinel1/gee_outputs/coca_S1_ts_VV_P50_I14.tif"
#'
#' #other variables required
#' observations_evaluate <- 5 #could be less if processing is slow
#' buffer_distance <- 500 #consider that is in meters and measured from the alert point
#' stretch_perc <- 5 #recommendable if plots do not have good contrast
#' size_jpg <- 1000 #in pixels
#' ncores <- 3 #modify if your resources are less than this
#'
#' #output folder to save plots
#' output_folder <- "C:/DATA/GAF/demo/test"
#'
#' #running the function
#' validate_alerts(alerts_shp = alerts_shp,
#'                 column_dates_name = column_dates_name,
#'                 red_channel_GEE_ts = red_channel_GEE_ts,
#'                 green_channel_GEE_ts = green_channel_GEE_ts,
#'                 blue_channel_GEE_ts = blue_channel_GEE_ts,
#'                 observations_evaluate = observations_evaluate,
#'                 buffer_distance = buffer_distance,
#'                 stretch_perc = stretch_perc,
#'                 size_jpg = size_jpg,
#'                 ncores = ncores,
#'                 output_folder = output_folder)
#' @export

##### MAIN FUNCTION #####

validate_alerts <- function(
  alerts_shp,
  column_dates_name,
  red_channel_GEE_ts,
  green_channel_GEE_ts,
  blue_channel_GEE_ts,
  observations_evaluate = 5,
  buffer_distance,
  stretch_perc = 5,
  size_jpg = 1000,
  ncores = 1,
  output_folder
){

  #****************************************************************************************************

  #(0)#### TEST #####

  test <- F
  if(test){
    alerts_shp <- "C:/DATA/GAF/demo/estadisticas/DEFORESTACION_puntos.shp"
    column_dates_name <- "inicio"
    red_channel_GEE_ts <- "C:/DATA/GAF/demo/sentinel1/gee_outputs/coca_S1_ts_VV_P50_I14.tif"
    green_channel_GEE_ts <- "C:/DATA/GAF/demo/sentinel1/gee_outputs/coca_S1_ts_VH_P50_I14.tif"
    blue_channel_GEE_ts <- "C:/DATA/GAF/demo/sentinel1/gee_outputs/coca_S1_ts_VV_P50_I14.tif"
    observations_evaluate <- 5
    buffer_distance <- 500
    stretch_perc <- 5
    size_jpg <- 1000
    ncores <- 3
    output_folder <- "C:/DATA/GAF/demo/test"
  }

  #(1)#### CONFIG #####

  check.libs <- c("rgdal","raster","sp","foreach","doParallel",
                  "lubridate","rgeos","scales")
  get.libraries <- function(libraries){
    for (i in libraries){
      if(!require(i,character.only=T)){
        install.packages(i)
        library(i,character.only=T)}
    }
  }
  get.libraries(check.libs)

  #(2)#### READ DATA #####

  #check alerts
  if(!file.exists(alerts_shp)){
    stop("alerts_shp do not exists")
  }
  ale.shp <- shapefile(alerts_shp)
  ale.shp@data <- cbind(data.frame(id_pos=1:nrow(ale.shp)), ale.shp@data)
  ale.prj <- ale.shp@proj4string@projargs
  ale.nam <- grep(column_dates_name,names(ale.shp))
  if(length(ale.nam)==0){
    stop("column_units_name do not found in spatial_units_shp")
  }
  #check GEE time series
  unique.rgb.files <- unique(c(red_channel_GEE_ts,green_channel_GEE_ts,blue_channel_GEE_ts))
  if(length(unique.rgb.files)!=1){
    gee.files <- c(red_channel_GEE_ts,green_channel_GEE_ts,blue_channel_GEE_ts)
  }else{
    gee.files <- unique.rgb.files
  }
  gee.ts <- list()
  for(i in 1:length(gee.files)){
    ts.file <- gee.files[i]
    dates.file <- gsub(".tif",".csv",basename(ts.file))
    dates.file <- paste0(dirname(gee.files[i]),"/",gsub("ts","dates",dates.file))
    if(file.exists(ts.file) & file.exists(dates.file)){
      gee.ts[[i]] <- c(ts.file,dates.file)
    }else{
      gee.ts[[i]] <- NA
      warning(paste0("This file is missing: ", basename(dates.file), " and the time series is ignored"))
    }
  }
  if(all(is.na(gee.ts))){
    stop('GEE files are missing, check for its csv or if the filename is correct')
  }
  gee.ts <- gee.ts[!is.na(gee.ts)]
  #read dates & read tif
  if(length(gee.ts)==3){
    gee.dates <- sapply(gee.ts,"[[",2)
    gee.dates <- lapply(gee.dates,function(x){
      x <- read.csv(x,header=T,stringsAsFactors=F)$dates
      x <- unlist(strsplit(gsub("\\[|\\]| ","",x),"[,]"))
      x <- sapply(strsplit(x,"_"),"[[",2)
    })
    if(length(unique(sapply(gee.dates,length)))==1){
      #select the first dats list (assumed that all dates are equal in the RGB)
      gee.dates <- gee.dates[[1]]
      gee.ts <- lapply(sapply(gee.ts,"[[",1),raster::stack)
      gee.sta <- list()
      for(i in 1:length(gee.dates)){
        gee.sta[[i]] <- raster::stack(gee.ts[[1]][[i]],
                              gee.ts[[2]][[i]],
                              gee.ts[[3]][[i]])
      }
      rm(gee.ts)
    }else{
      ts.max <- which.max(sapply(gee.dates,length))
      gee.dates <- gee.dates[[ts.max]]
      gee.ts <- gee.ts[[ts.max]][1]
      gee.ts <- raster::stack(gee.ts)
      if(nlayers(gee.ts)!=length(gee.dates)){
        stop("the .csv file is incorrect and do not match the number of observations in the GEE time series")
      }
      gee.sta <- list()
      for(i in 1:length(gee.dates)){
        gee.sta[[i]] <- gee.ts[[i]]
      }
      rm(gee.ts)
      warning("GEE time series are different in extension. Only the largest one is used")
    }
  }else{
    gee.dates <- sapply(gee.ts,"[[",2)
    gee.dates <- lapply(gee.dates,function(x){
      x <- read.csv(x,header=T,stringsAsFactors=F)$dates
      x <- unlist(strsplit(gsub("\\[|\\]| ","",x),"[,]"))
      x <- sapply(strsplit(x,"_"),"[[",2)
    })[[1]]
    gee.ts <- raster::stack(gee.ts[[1]][1])
    if(nlayers(gee.ts)!=length(gee.dates)){
      stop("the .csv file is incorrect and do not match the number of observations in the GEE time series")
    }
    gee.sta <- list()
    for(i in 1:length(gee.dates)){
      gee.sta[[i]] <- gee.ts[[i]]
    }
    rm(gee.ts)
  }
  #test area GEE and samples. First stack is assumed.
  gee.ext <- extent(gee.sta[[1]])
  gee.prj <- gee.sta[[1]]@crs@projargs
  if(gee.prj != ale.prj){
    ale.shp <- spTransform(ale.shp, gee.prj)
  }
  ale.shp <- raster::intersect(ale.shp,gee.ext)
  if(length(ale.shp)==0){
    stop("alerts_shp do not match spatially with GEE ts")
  }
  ale.dates <- ymd(ale.shp@data[,ale.nam])
  ale.shp <- ale.shp[order(ale.dates),]
  ale.dates <- ale.dates[order(ale.dates)]
  #get interval
  gee.dates <- ymd(gee.dates)
  gee.interval <- interval(min(gee.dates),max(gee.dates))
  #get plot config
  if(observations_evaluate==1){
    matrix.plot <- matrix(1:2, 2, 1, byrow = TRUE)
    size.jpg <- c(size_jpg*1,size_jpg*2)
  }else if(observations_evaluate==2){
    matrix.plot <- matrix(1:4, 2, 2, byrow = TRUE)
    size.jpg <- c(size_jpg*2,size_jpg*2)
  }else if(observations_evaluate==3){
    matrix.plot <- matrix(1:6, 2, 3, byrow = TRUE)
    size.jpg <- c(size_jpg*3,size_jpg*2)
  }else if(observations_evaluate==4){
    matrix.plot <- matrix(1:8, 2, 4, byrow = TRUE)
    size.jpg <- c(size_jpg*4,size_jpg*2)
  }else if(observations_evaluate==5){
    matrix.plot <- matrix(1:10, 2, 5, byrow = TRUE)
    size.jpg <- c(size_jpg*5,size_jpg*2)
  }else{
    stop("more than 5 observations to evaluate are not supported")
  }
  #create ouput folder
  dir.create(output_folder,showWarnings = F, recursive = T)

  #(3)#### FUNCTIONS #####

  #for stretch
  rescale.band <- function(band.ras,stretch_perc=1){
    #stretch
    stretch.val <- c(stretch_perc,100-stretch_perc)/100
    #isolate values
    band.val <- raster::values(band.ras)
    #get stretch range & rescale
    band.range <- as.double(quantile(na.omit(band.ras),probs=stretch.val))
    band.val[band.val<=band.range[1]] <- band.range[1]
    band.val[band.val>=band.range[2]] <- band.range[2]
    band.val <- round(scales::rescale(band.val,c(1,255)))
    band.val[is.na(band.val)] <- 0
    values(band.ras) <- band.val
    return(band.ras)
  }

  #chip plot
  plot.chip <- function(x=ale.shp[1,],
                        x.date=ale.dates[1],
                        y.down=down.img,
                        down.dates=down.dates,
                        y.up=up.img,
                        up.dates=up.dates){

    ##### DOWN DATES ####

    if(length(y.down)==0 | all(is.na(y.down))){
      y.down.img <- rep(NULL,observations_evaluate)
    }else{
      x.down.buff <- list()
      x.down.pts <- list()
      y.down.img <- list()
      for(j in 1:length(y.down)){
        #collect data
        y.sta <- y.down[[j]]
        x.down.pts[[j]] <- x
        x.down.buff[[j]] <- gBuffer(x,width=buffer_distance)
        #same study area
        y.sta <- tryCatch(crop(y.sta,extent(x.down.buff[[j]])), error=function(e){return(NULL)})
        #if valid rescale values
        if(is.null(y.sta)){
          y.down.img[[j]] <- NULL
        }else{
          if(nlayers(y.sta)>=3){
            y.sta <- lapply(1:3,function(z){rescale.band(y.sta[[z]],stretch_perc)})
            y.down.img[[j]] <- raster::stack(y.sta)
          }else{
            y.sta <- rescale.band(y.sta,stretch_perc)
            y.down.img[[j]] <- y.sta
          }
        }
      }
      #get dates
      y.down.dates <- down.dates
    }

    ##### UP DATES ####

    if(length(y.up)==0 | all(is.na(y.up))){
      y.up.img <- rep(NULL,observations_evaluate)
    }else{
      x.up.buff <- list()
      x.up.pts <- list()
      y.up.img <- list()
      for(j in 1:length(y.up)){
        #collect data
        y.sta <- y.up[[j]]
        x.up.pts[[j]] <- x
        x.up.buff[[j]] <- gBuffer(x,width=buffer_distance)
        #same study area
        y.sta <- tryCatch(crop(y.sta,extent(x.up.buff[[j]])), error=function(e){return(NULL)})
        #if valid rescale values
        if(is.null(y.sta)){
          y.up.img[[j]] <- NULL
        }else{
          if(nlayers(y.sta)>=3){
            y.sta <- lapply(1:3,function(z){rescale.band(y.sta[[z]],stretch_perc)})
            y.up.img[[j]] <- raster::stack(y.sta)
          }else{
            y.sta <- rescale.band(y.sta,stretch_perc)
            y.up.img[[j]] <- y.sta
          }
        }
      }
      #get dates
      y.up.dates <- up.dates
    }

    ##### PLOT ####

    if(length(y.up.img)==0 & length(y.down.img)==0){
      return(NULL)
    }else{
      #config plot
      out.name <- paste0(output_folder,"/",x@data$id_pos,"_SAMPLE_",x.date,".jpg")
      jpeg(out.name,width=size.jpg[1],height=size.jpg[2],res=300)
      layout(matrix.plot)
      #fix numbers
      if(length(y.down.img)!=observations_evaluate){
        na.obs <- observations_evaluate - length(y.down.img)
        y.down.img <- c(y.down.img,rep(NA,na.obs))
      }
      if(length(y.up.img)!=observations_evaluate){
        na.obs <- observations_evaluate - length(y.up.img)
        y.up.img <- c(y.up.img,rep(NA,na.obs))
      }
      #plot down observations
      for(j in 1:observations_evaluate){
        if(is.na(y.down.img)[j]){
          plot(extent(gBuffer(x,width=buffer_distance)),type='n',xlab="Image not found",ylab='',xaxt='n',yaxt='n')
        }else{
          if(nlayers(y.down.img[[j]])>=3){
            #plot RGB
            plot(extent(x.down.buff[[j]]),type='n',xlab=y.down.dates[j],ylab='',xaxt='n',yaxt='n')
            try(plotRGB(y.down.img[[j]],axes=F,add=T))
            plot(x.down.pts[[j]],lwd=2,cex=5,col="red",add=T)
            plot(x.down.buff[[j]],lwd=2,add=T)
          }else{
            #plot raster
            plot(extent(x.down.buff[[j]]),type='n',xlab=y.down.dates[j],ylab='',xaxt='n',yaxt='n')
            try(plot(y.down.img[[j]],col=gray.colors(255),axes=F,legend=F,add=T))
            plot(x.down.pts[[j]],lwd=2,cex=5,col="red",add=T)
            plot(x.down.buff[[j]],lwd=2,add=T)
          }
        }
      }
      #plot up observations
      for(j in 1:observations_evaluate){
        if(is.na(y.up.img)[j]){
          plot(extent(gBuffer(x,width=buffer_distance)),type='n',xlab="Image not found",ylab='',xaxt='n',yaxt='n')
        }else{
          if(nlayers(y.up.img[[j]])>=3){
            #plot RGB
            plot(extent(x.up.buff[[j]]),type='n',xlab=y.up.dates[j],ylab='',xaxt='n',yaxt='n')
            try(plotRGB(y.up.img[[j]],axes=F,add=T))
            plot(x.up.pts[[j]],lwd=2,cex=5,col="red",add=T)
            plot(x.up.buff[[j]],lwd=2,add=T)
          }else{
            #plot raster
            plot(extent(x.up.buff[[j]]),type='n',xlab=y.up.dates[j],ylab='',xaxt='n',yaxt='n')
            try(plot(y.up.img[[j]],col=gray.colors(255),axes=F,legend=F,add=T))
            plot(x.up.pts[[j]],lwd=2,cex=5,col="red",add=T)
            plot(x.up.buff[[j]],lwd=2,add=T)
          }
        }
      }
      #close plot
      title(paste0("id_pos: ",x@data$id_pos,"    date: ",x.date),outer=T,line=-2)
      dev.off()
      return("plot created...")
    }
  }

  #(4)#### PROCESS #####

  cl<-makeCluster(ncores)
  registerDoParallel(cl)
  #execute
  foreach(i=1:length(ale.shp),.packages=c("raster","rgdal","scales","lubridate","rgeos")) %dopar% {
    #initial variables
    x <- ale.shp[i,]
    x.date <- ale.dates[i]
    #proceed if data is within
    if(x.date %within% gee.interval){
      #down dates
      down.img <- gee.sta[which(gee.dates <= x.date,gee.dates)]
      down.img <- tail(down.img,observations_evaluate)
      down.dates <- gee.dates[which(gee.dates <= x.date,gee.dates)]
      down.dates <- tail(down.dates,observations_evaluate)
      #up dates
      up.img <- gee.sta[which(gee.dates > x.date,gee.dates)]
      up.img <- head(up.img,observations_evaluate)
      up.dates <- gee.dates[which(gee.dates > x.date,gee.dates)]
      up.dates <- head(up.dates,observations_evaluate)
      #make plots
      plot.chip(x,x.date,
                down.img,down.dates,
                up.img,up.dates)
    }else{
      print(paste0("id_pos: ",x@data$id_pos," date: ",x.date, " -> dates of reference images do not overlay"))
    }
  }
  stopCluster(cl)
  #save shapefile
  shapefile(ale.shp,paste0(output_folder,"/intersected_samples.shp"),overwrite=T)

  ###### close ######
  print("****script end*****")

}
FSantosCodes/SATAtools documentation built on May 5, 2019, 11:06 p.m.