AVOCADO: Anomaly Vegetation Change Detection

Documented in PhenRef2d PLUGPhenAnoRFDMapPLUS PLUGPhenAnoRFDPLUS

#' @title Avocado algorithm - calculate likelihood of vegetation-index–time space for reference vegetation 
#' @name PhenRef2d
#' @description Calculate the cumulative density distribution and their likelihood values based on reference vegetation - input for Wall-to-wall map version (i.e. PLUGPhenAnoRFDPLUS function)
#' @author  Roberto O. Chavez, Mathieu Decuyper
#' @param phen Numeric vector with the values of the reference vegetation
#' @param dates A date vector. The number of dates must be equal to the number of values of time series.
#' @param h Numeric. Geographic hemisphere to define the starting date of the growing season. h=1 Northern Hemisphere; h=2 Southern Hemisphere.
#' @param anop Numeric vector with the number of values that are in x. For those values the anomalies and likelihoods will be calculated based on the phen. For example a time series has 450 values, so anop=c(1:450).
#' @param rge A vector containing minimum and maximum values of the response variable used in the analysis. We suggest the use of theoretically based limits. For example in the case of MODIS NDVI or EVI, it ranges from 0 to 10,000, so rge =c(0,10000)
#' @return List which contains cumulative bivariate density distribution (cumDensity), and maximum likelihood of the vegetation-index–time space (MAXY) 
#' @import npphen
#' @import stats
#' @importFrom lubridate yday
#' @seealso \code{\link{PLUGPhenAnoRFDMapPLUS}} \code{\link{PLUGPhenAnoRFDPLUS}}
#' @examples
#' \dontrun{
#' # Loading raster data
#' library(terra)
#' library(npphen)
#' 
#' MDD <- rast(system.file("extdata", "MDD_NDMI_1990_2020.tif", package = "AVOCADO"))
#' # load dates vector
#' load(system.file("extdata", "MDD_dates.RData", package = "AVOCADO"))
#' # load  reference forest shapefile
#' MDDref <- vect(system.file("extdata", "MDDref.gpkg", package = "AVOCADO"))
#' # Create the reference curve
#' ref.ext <- ext(MDDref)
#' ref.brick <- crop(MDD, ref.ext)
#' fin <- nrow(ref.brick) * ncol(ref.brick)
#' phen <- as.numeric(terra::extract(ref.brick, 1))
#' d1 <- MDD_dates
#' for (i in 2:fin) {
#'   pp <- as.numeric(terra::extract(ref.brick, i))
#'   phen <- c(phen, pp)
#'   d1 <- c(d1, MDD_dates)
#' }
#' MDD_fref <- PhenRef2d(phen, d1, h = 1, anop = c(1:1063), rge = c(0, 10000))
#' 
#' # plot reference curve + probabilities (same result as using PhenKplot())
#' image(MDD_fref$x, MDD_fref$y, MDD_fref$cumDensity, xlab = "DOY", ylab = "NMDI", 
#' font.lab = 2, breaks = c(0, 0.5, 0.75, 0.9, 0.95), col = grDevices::heat.colors(n = 4, alpha = 0.6))
#' contour(MDD_fref$x, MDD_fref$y, MDD_fref$cumDensity, levels = c(0, 0.5, 0.75, 0.9, 0.95), add = T, col = grDevices::grey(0.25), labcex = 1)
#' lines(seq(1, 365), MDD_fref$MAXY, lwd = 3, col = "dark red")
#'
#' }
#' 
#' @export
PhenRef2d <-
  function(phen, dates, h, anop, rge) {
    # a.Preparing dataset
    
    if (length(rge) != 2) {
      stop("rge must be a vector of length 2")
    }
    if (rge[1] > rge[2]) {
      stop("rge vector order must be minimum/maximum")
    }
    if (length(dates) != length(phen)) {
      stop("N of dates and files do not match")
    }
    
    ano.min <- min(anop)
    ano.max <- max(anop)
    ano.len <- ano.max - ano.min + 1
    len2 <- 2 * ano.len
    
    if (ano.min >= ano.max) {
      stop("for anop, lower value > upper value")
    }
    
    DOY <- lubridate::yday(dates)
    DOY[which(DOY == 366)] <- 365
    D1 <- cbind(DOY, phen)
    
    if (length(unique(D1[, 2])) < 10 | (nrow(D1) - sum(is.na(D1))) < (0.1 * length(D1))) {
      return(rep(NA, len2))
    }
    
    # b. Kernel calculation using the reference period (D1)
    
    if (h != 1 && h != 2) {
      stop("Invalid h")
    }
    DOGS <- cbind(seq(1, 365), c(seq(185, 365), seq(1, 184)))
    if (h == 2) {
      for (i in 1:nrow(D1)) {
        D1[i, 1] <- DOGS[which(DOGS[, 1] == D1[i, 1], arr.ind = TRUE), 2]
      }
    }
    
    Hmat <- ks::Hpi(na.omit(D1))
    Hmat[1, 2] <- Hmat[2, 1]
    K1 <- ks::kde(na.omit(D1), H = Hmat, xmin = c(1, rge[1]), xmax = c(365, rge[2]), gridsize = c(365, 500))
    K1Con <- K1$estimate
    for (j in 1:365) {
      Kdiv <- sum(K1$estimate[j, ])
      ifelse(Kdiv == 0, K1Con[j, ] <- 0, K1Con[j, ] <- K1$estimate[j, ] / sum(K1$estimate[j, ]))
    }
    
    first.no.NA.DOY <- min(D1[, 1][which(is.na(D1[, 2]) == FALSE)])
    last.no.NA.DOY <- max(D1[, 1][which(is.na(D1[, 2]) == FALSE)])
    
    MAXY <- apply(K1Con, 1, max)
    for (i in 1:365) {
      n.select <- which(K1Con[i, ] == MAXY[i], arr.ind = TRUE)
      if (length(n.select) > 1) {
        n <- n.select[1]
        MAXY[i] <- NA
      }
      if (length(n.select) == 1) {
        n <- n.select
        MAXY[i] <- median(K1$eval.points[[2]][n])
      }
      if (i < first.no.NA.DOY) {
        MAXY[i] <- NA
      }
      if (i > last.no.NA.DOY) {
        MAXY[i] <- NA
      }
    }
    
    # c. Calculating cumulative bivariate density distribution
    
    h2d <- list()
    h2d$x <- seq(1, 365)
    h2d$y <- seq(rge[1], rge[2], len = 500)
    h2d$density <- K1Con / sum(K1Con)
    uniqueVals <- rev(unique(sort(h2d$density)))
    cumRFDs <- cumsum(uniqueVals)
    names(cumRFDs) <- uniqueVals
    h2d$cumDensity <- matrix(nrow = nrow(h2d$density), ncol = ncol(h2d$density))
    h2d$cumDensity[] <- cumRFDs[as.character(h2d$density)]
    na.sta <- first.no.NA.DOY - 1
    na.end <- last.no.NA.DOY + 1
    if (na.sta >= 1) {
      h2d$cumDensity[1:na.sta, ] <- NA
    }
    if (na.end <= 365) {
      h2d$cumDensity[na.end:365, ] <- NA
    }
    
    # d. prepare output: attach MAXY to h2d list
    
    h2d$MAXY <- MAXY
    h2d
    
  }

#' @title Avocado algorithm - Single pixel version
#' @name PLUGPhenAnoRFDPLUS
#' @description Calculate the Anomaly and their likelihood values (based on the difference between the pixel values and reference vegetation) for a numeric vector
#' @author  Roberto O. Chavez, Mathieu Decuyper
#' @param x numeric vector. Time series of vegetation index (e.g. NDVI, EVI, NDMI)
#' @param phenref List which contains cumulative bivariate density distribution and maximum likelihood of the vegetation-index–time space based on reference vegetation obtained from \code{\link{PhenRef2d}}
#' @param dates A date vector. The number of dates must be equal to the number of values of time series.
#' @param h Numeric. Geographic hemisphere to define the starting date of the growing season. h=1 Northern Hemisphere; h=2 Southern Hemisphere.
#' @param anop Numeric vector with the number of values that are in x. For those values the anomalies and likelihoods will be calculated based on the phen. For example a time series has 450 values, so anop=c(1:450).
#' @param rge A vector containing minimum and maximum values of the response variable used in the analysis. We suggest the use of theoretically based limits. For example in the case of MODIS NDVI or EVI, it ranges from 0 to 10,000, so rge =c(0,10000)
#' @return vector of length([x]*2) containing all anomalies, followed by their position within the reference frequency distribution (RFD)
#' @seealso \code{\link{PLUGPhenAnoRFDMapPLUS}}
#' @import npphen
#' @import stats
#' @importFrom lubridate yday
#' @importFrom graphics abline
#' @examples
#' \dontrun{
#' # ===================================================================================================================
#' # Loading raster data
#' library(terra)
#' library(npphen)
#' MDD <- rast(system.file("extdata", "MDD_NDMI_1990_2020.tif", package = "AVOCADO"))
#' # load dates vector
#' load(system.file("extdata", "MDD_dates.RData", package = "AVOCADO"))
#' # load  reference forest data (output from PhenRef2d)
#' load(system.file("extdata", "MDD_forestReference.RData", package = "AVOCADO"))
#' ## time series extraction for a single pixel
#' px <- vect(cbind(-69.265, -12.48))
#' plot(MDD[[1]])
#' plot(px, add = T)
#'
#' # extract series
#' px_series <- as.numeric(terra::extract(MDD, px, ID = F))
#' plot(MDD_dates, px_series, type = "b", xlab = "", ylab = "NDMI")
#' ## Anomaly calculation
#' anom_rfd <- PLUGPhenAnoRFDPLUS(x = px_series, phenref = MDD_fref, dates = MDD_dates, h = 2, anop = c(1:1063), rge = c(1, 10000))
#' }
#'
#' @export
PLUGPhenAnoRFDPLUS <-
  function(x, phenref, dates, h, anop, rge) {
    # a.Preparing dataset

    if (length(rge) != 2) {
      stop("rge must be a vector of length 2")
    }
    if (rge[1] > rge[2]) {
      stop("rge vector order must be minimum/maximum")
    }
    if (length(dates) != length(x)) {
      stop("N of dates and files do not match")
    }

    # ref.min <- min(refp)
    # ref.max <- max(refp)
    ano.min <- min(anop)
    ano.max <- max(anop)
    ano.len <- ano.max - ano.min + 1
    len2 <- 2 * ano.len

    if (ano.min >= ano.max) {
      stop("for anop, lower value > upper value")
    }

    if (all(is.na(x))) {
      return(rep(NA, len2))
    }

    DOY <- lubridate::yday(dates)
    DOY[which(DOY == 366)] <- 365
    D2 <- cbind(DOY[ano.min:ano.max], x[ano.min:ano.max])


    if (all(is.na(D2[, 2]))) {
      return(rep(NA, len2))
    }

    # b. Calculating anomalies AND their RFD based on D2
    if (h != 1 && h != 2) {
      stop("Invalid h")
    }
    DOGS <- cbind(seq(1, 365), c(seq(185, 365), seq(1, 184)))
    if (h == 2) {
      for (i in 1:nrow(D2)) {
        D2[i, 1] <- DOGS[which(DOGS[, 1] == D2[i, 1], arr.ind = TRUE), 2]
      }
    }

    
    # b.1 Anomalies 
    MAXY <- phenref$MAXY
    
    Anoma <- rep(NA, ano.len)
    for (i in 1:nrow(D2)) {
      Anoma[i] <- as.integer(D2[i, 2] - MAXY[D2[i, 1]])
    }
    Anoma[1:ano.len]
    
    # b.2 RFD
    rowAnom <- matrix(NA, nrow = nrow(D2), ncol = 500)
    for (i in 1:nrow(D2)) {
      rowAnom[i, ] <- abs(phenref$y - D2[i, 2])
    }
    rowAnom2 <- unlist(apply(rowAnom, 1, function(x) {
      if (all(is.na(x))) {
        NA
      } else {
        which.min(x)
      }
    }))
    
    
    AnomRFD <- rep(NA, nrow(D2))
    for (i in 1:nrow(D2)) {
      AnomRFD[i] <- phenref$cumDensity[D2[i, 1], rowAnom2[i]]
    }
    AnomRFDPerc <- round(100 * (AnomRFD))
    
    
    plot(AnomRFDPerc[1:ano.len], xlab = "Time", ylab = "RFD (%)", font.lab = 2)
    abline(h = 90, col = "red")
    abline(h = 95, col = "red")
    abline(h = 99, col = "red")
    
    AnomRFDPerc[1:ano.len]
    
    # Two results in a single numeric vector
    AnomPlusRFD <- c(Anoma[1:ano.len], AnomRFDPerc[1:ano.len])
    AnomPlusRFD[1:len2]
  }




#' @title Avocado algorithm - Wall-to-wall map version
#' @name PLUGPhenAnoRFDMapPLUS
#' @description Calculate the Anomaly and their likelihood values (based on the difference between the pixel values and reference vegetation) for a RasterStack
#' @author  Roberto O. Chavez, Mathieu Decuyper
#' @param s SpatRaster time series of vegetation index (e.g. NDVI, EVI, NDMI)
#' @param anop Numeric vector with the number of values that are in x. For those values the anomalies and likelihoods will be calculated based on the phen. For example a time series has 450 values, so anop=c(1:450).
#' @param phenref List which contains cumulative bivariate density distribution and maximum likelihood of the vegetation-index–time space based on reference vegetation obtained from \code{\link{PhenRef2d}}
#' @param dates A date vector. The number of dates must be equal to the number of values of time series.
#' @param h Numeric. Geographic hemisphere to define the starting date of the growing season. h=1 Northern Hemisphere; h=2 Southern Hemisphere.
#' @param nCluster Numeric. Number of CPU's to be used for the job.
#' @param outname Character vector with the output path and filename with extension or only the filename and extension if work directory was set. More information: See writeRaster
#' @param datatype Character vector that determines the interpretation of values written to disk. More information: See \code{\link[terra]{writeRaster}}
#' @return SpatRaster of nlayers([s]stack*2) containing all anomalies, followed by their likelihoods
#' @import npphen
#' @import terra
#' @import stats
#' @importFrom lubridate yday
#' @seealso \code{\link{PLUGPhenAnoRFDPLUS}}
#' @examples
#' \dontrun{
#' # Loading raster data
#' library(terra)
#' library(npphen)
#' 
#' MDD <- rast(system.file("extdata", "MDD_NDMI_1990_2020.tif", package = "AVOCADO"))
#' # load dates vector
#' load(system.file("extdata", "MDD_dates.RData", package = "AVOCADO"))
#' # load  reference forest data (output from PhenRef2d)
#' load(system.file("extdata", "MDD_forestReference.RData", package = "AVOCADO"))
#'
#' ## Anomaly calculation
#' # checking availiable cores and leave one free
#' nc1 <- parallel::detectCores() - 1
#' PLUGPhenAnoRFDMapPLUS(
#'   s = MDD, dates = MDD_dates, h = 1, phenref = MDD_fref, anop = c(1:1063),
#'   nCluster = nc1, outname = "YourDirectory/MDD_AnomalyLikelihood.tif",
#'   datatype = "INT2S")
#' )
#' # The output file contains all the anomalies, followed by all their likelihoods
#' # and thus has twice the number of layers as the input raster stack.
#' }
#' @export
PLUGPhenAnoRFDMapPLUS <-
  function(s, phenref, dates, h, anop, nCluster, outname, datatype) {
    ff <- function(x) {
      # a.Preparing dataset

      if (length(dates) != length(x)) {
        stop("N of dates and files do not match")
      }
      if (length(x) < length(anop)) {
        stop("Inconsistent anop. Argument anop can't be grater than length(x)")
      }

      # ref.min <- min(refp)
      # ref.max <- max(refp)
      ano.min <- min(anop)
      ano.max <- max(anop)
      ano.len <- ano.max - ano.min + 1
      len2 <- 2 * ano.len

      if (ano.min >= ano.max) {
        stop("for anop, lower value > upper value")
      }

      if (all(is.na(x))) {
        return(rep(NA, len2))
      }

      DOY <- lubridate::yday(dates)
      DOY[which(DOY == 366)] <- 365
      
      D2 <- cbind(DOY[ano.min:ano.max], x[ano.min:ano.max])

      if (all(is.na(D2[, 2]))) {
        return(rep(NA, len2))
      }

      if (h != 1 && h != 2) {
        stop("Invalid h")
      }
      DOGS <- cbind(seq(1, 365), c(seq(185, 365), seq(1, 184)))

      # b. Calculating anomalies AND their RFD based on D2
      if (h == 2) {
        for (i in 1:nrow(D2)) {
          D2[i, 1] <- DOGS[which(DOGS[, 1] == D2[i, 1], arr.ind = TRUE), 2]
        }
      }

      # b.1 Anomalies
      MAXY <- phenref$MAXY
      
      Anoma <- rep(NA, ano.len)
      for (i in 1:nrow(D2)) {
        Anoma[i] <- as.integer(D2[i, 2] - MAXY[D2[i, 1]])
      }
      Anoma[1:ano.len]

      # b.2 RFD
      rowAnom <- matrix(NA, nrow = nrow(D2), ncol = 500)
      for (i in 1:nrow(D2)) {
        rowAnom[i, ] <- abs(phenref$y - D2[i, 2])
      }
      rowAnom2 <- unlist(apply(rowAnom, 1, function(x) {
        if (all(is.na(x))) {
          NA
        } else {
          which.min(x)
        }
      }))
      AnomRFD <- rep(NA, nrow(D2))
      for (i in 1:nrow(D2)) {
        AnomRFD[i] <- phenref$cumDensity[D2[i, 1], rowAnom2[i]]
      }
      AnomRFDPerc <- round(100 * (AnomRFD))
      AnomRFDPerc[1:ano.len]
      # Two results in a single numeric vector
      AnomPLUSRFD <- c(Anoma[1:ano.len], AnomRFDPerc[1:ano.len])
      AnomPLUSRFD[1:len2]
    }

    #----------------------------------------------------------------------------------------
    # cluster processing
    app(s, fun = ff, filename = outname, cores = nCluster, overwrite = T, wopt = list(datatype = datatype))
  }