R/6.calculate_doy_flags.R

Defines functions calculate_doy_flags

Documented in calculate_doy_flags

#' Calculate DOY-Based Flags and Summary Statistics from Raster and Fire Polygons
#' @description
#' This function extracts Day-of-Year (DOY) values from a raster (e.g., from a Landsat composite),
#' masked by burned area polygons, and computes per-polygon statistics: mode, median,
#' and optionally percentiles (e.g., 5th, 25th, 95th). These DOY values are converted to calendar dates,
#' and their components (day, month, year) are also included.
#'
#' Optionally, the function can threshold the DOY band around the median and/or mode of each polygon
#' using user-defined DOY windows (e.g., \\pm10 days), create binary raster masks, and vectorize them using GDAL.
#'
#' Use this function to assign a representative burning date (DOY) to each fire event and to discard events with unusually high internal variation in DOY among their pixels.
#'
#' The function supports either a single shapefile (original burned areas) or a named list
#' of shapefiles or `sf` objects generated with different Otsu thresholds.
#'
#' @name calculate_doy_flags
#' @rdname calculate_doy_flags
#'
#' @param raster A multi-band `SpatRaster` object. One of the bands must contain DOY values.
#' @param doy_band Integer. The index of the DOY band in the raster (default is 2).
#' @param polygons_sf A single shapefile path, an `sf` object, or a named list of shapefiles or `sf` objects.
#'                    Names are used to label outputs (e.g., `"ge50"`, `"ge100"`).
#' @param output_dir Directory where output raster and shapefile files will be saved.
#' @param year Calendar year (numeric or character) used for DOY-to-date conversion and filenames.
#' @param doy_thresholds Numeric vector of DOY windows (e.g., `c(10, 15)`) to apply around the selected statistic(s).
#' @param stats Statistic(s) to use for thresholding. One of `"median"`, `"mode"`, or `"both"` (default `"both"`).
#' @param calc_percentiles Logical. If `TRUE`, compute additional DOY percentiles. Default is `TRUE`.
#' @param percentiles Numeric vector of percentiles to calculate (default: `c(0.05, 0.25, 0.95)`).
#' @param polygonize Logical. If `TRUE`, vectorize the DOY binary masks using GDAL (`gdal_polygonize.py`). Default is `TRUE`.
#' @param python_exe Path to the Python executable. Required if `polygonize = TRUE`.
#' @param gdal_polygonize_script Path to `gdal_polygonize.py`. Required if `polygonize = TRUE`.
#' @param get_mode Optional custom function to calculate the mode. Default function provided.
#' @param get_median Optional custom function to calculate the median. Default function provided.
#' @param get_quantile Optional custom function to calculate percentiles. Default function provided.
#'@param keep_all_polygons Logical. If `TRUE`, all polygons are kept and a flag column is added indicating which polygons meet the threshold condition.
#'       If `FALSE`, only polygons meeting the condition are saved. Default: `TRUE`.
#'
#' @return A named list with two elements:
#' \describe{
#'   \item{sf_objects}{Named list of `sf` outputs containing DOY statistics and date components for each input polygon set.}
#'   \item{shp_paths}{Named list of output shapefile paths, including DOY summary shapefiles and any polygonized threshold layers.}
#' }
#'
#' @note Examples require large external raster files (hosted on Zenodo)
#' and depend on external software (Python, GDAL). Therefore, they are wrapped
#' in dontrun{} to avoid errors during R CMD check and to ensure portability.
#'
#' @examples
#' \dontrun{
#' # Case 1: Multiple Otsu-based burned area shapefiles
#' rast <- terra::rast("rbr_doy_stack_1987.tif")
#' polys <- list(
#'   ge50 = "burned_areas_1987_otsu_ge50.shp",
#'   ge100 = "burned_areas_1987_otsu_ge100.shp"
#' )
#' calculate_doy_flags(
#'   raster = rast,
#'   doy_band = 2,
#'   polygons_sf = polys,
#'   output_dir = "output/",
#'   year = 1987,
#'   stats = "both",
#'   doy_thresholds = c(10, 15),
#'   calc_percentiles = TRUE,
#'   percentiles = c(0.05, 0.25, 0.95),
#'   polygonize = TRUE,
#'   python_exe = "/usr/bin/python",
#'   gdal_polygonize_script = "/usr/bin/gdal_polygonize.py"
#' )
#'
#' # Case 2: Single shapefile without polygonization
#' calculate_doy_flags(
#'   raster = rast,
#'   polygons_sf = "burned_areas_1987_origval.shp",
#'   output_dir = "output/",
#'   year = 1987,
#'   stats = "median",
#'   doy_thresholds = 10,
#'   calc_percentiles = FALSE,
#'   polygonize = FALSE
#' )
#' }
#'
#' @note The DOY raster band should be derived from a Landsat composite generated in Google Earth Engine (GEE),
#' following the approach described by Quintero et al. (2025).
#'
#' @references
#' Quintero, N., Viedma, O., Veraverbeke, S., & Moreno, J. M. (2025).
#' *Optimising Regional Fire Severity Mapping Using Pixel-Based Image Compositing*. SSRN 4929831.
#'
#' @importFrom sf st_read st_write st_transform st_crs
#' @importFrom terra crop mask extract classify writeRaster rasterize vect ncell
#' @importFrom data.table as.data.table setnames
#' @importFrom stats median quantile
#' @importFrom stats na.omit setNames
#' @importFrom utils glob2rx
#' @importFrom grDevices dev.off png
#' @importFrom graphics abline axis legend mtext par
#' @importFrom dplyr filter first
#' @importFrom data.table :=
#' @importFrom magrittr %>%
#' @importFrom stringr str_detect str_replace
#' @export

utils::globalVariables(c(
  ".", "ID", "P1_id", "abline", "area_ha", "area_m2", "axis",
  "burn_area_ha", "burned_id", "dev.off", "doy", "fid_final",
  "first", "glob2rx", "h_w_ratio", "int_area_ha", "legend", "mnbbx_wd",
  "mtext", "na.omit", "ornt_bbox", "p_w_ratio", "par", "png",
  "regen_area_ha", "regen_area_ha_sum", "setNames", "total_burned_area",
  "year", "%>%", ":="
))

calculate_doy_flags <- function(
    raster,
    doy_band = 2,
    polygons_sf,
    output_dir,
    year,
    doy_thresholds = c(10, 15, 20, 30),
    stats = c("both", "median", "mode"),
    calc_percentiles = TRUE,
    percentiles = c(0.05, 0.25, 0.95),
    polygonize = TRUE,
    keep_all_polygons = TRUE,
    python_exe = NULL,
    gdal_polygonize_script = NULL,
    get_mode = function(x) {
      if (length(x) == 0 || all(is.na(x))) return(NA_real_)
      ux <- unique(na.omit(x))
      ux[which.max(tabulate(match(x, ux)))]
    },
    get_median = function(x) {
      if (length(x) == 0 || all(is.na(x))) return(NA_real_)
      as.numeric(median(x, na.rm = TRUE))
    },
    get_quantile = function(x, prob) {
      if (length(x) == 0 || all(is.na(x))) return(NA_real_)
      as.numeric(quantile(x, probs = prob, na.rm = TRUE, names = FALSE, type = 7))
    }
) {
  stats <- match.arg(stats)
  sf_outputs <- list(stats = list())
  shp_paths  <- list(stats = list())

  # Organizar entrada
  if (is.character(polygons_sf) && length(polygons_sf) > 1) {
    names(polygons_sf) <- tools::file_path_sans_ext(basename(polygons_sf))
    polygons_sf <- as.list(polygons_sf)
  } else if (!is.list(polygons_sf)) {
    label <- tools::file_path_sans_ext(basename(polygons_sf))
    polygons_sf <- setNames(list(polygons_sf), label)
  } else if (is.null(names(polygons_sf)) || any(names(polygons_sf) == "")) {
    names(polygons_sf) <- sapply(polygons_sf, function(x) {
      if (is.character(x)) tools::file_path_sans_ext(basename(x)) else "sf_input"
    })
  }

  # Procesar cada poligono
  for (label in names(polygons_sf)) {
    shp <- polygons_sf[[label]]
    if (is.character(shp) && file.exists(shp)) {
      shp <- sf::st_read(shp, quiet = TRUE)
    }
    if (!inherits(shp, "sf")) {
      warning("Input invalido para: ", label)
      next
    }

    shp$fire_id <- seq_len(nrow(shp))
    rbr_crop <- terra::mask(terra::crop(raster, shp), shp)
    doy_raster <- rbr_crop[[doy_band]]

    if (!sf::st_crs(shp) == terra::crs(doy_raster)) {
      shp <- sf::st_transform(shp, terra::crs(doy_raster))
    }

    vals <- terra::extract(doy_raster, terra::vect(shp), bind = FALSE)
    vals_dt <- data.table::as.data.table(vals)
    names(vals_dt)[2] <- "doy"
    vals_dt <- vals_dt[!is.na(doy)]
    if (!"doy" %in% names(vals_dt) || nrow(vals_dt) == 0) next

    # Calcular estadisticas
    mode_by_fire   <- vals_dt[, .(mode_doy   = get_mode(doy)), by = ID]
    median_by_fire <- vals_dt[, .(median_doy = get_median(doy)), by = ID]
    merge_list <- list(mode_by_fire, median_by_fire)

    if (calc_percentiles && length(percentiles) > 0) {
      for (p in percentiles) {
        pname <- paste0("q", round(p * 100))
        q_by_fire <- vals_dt[, .(val = get_quantile(doy, p)), by = ID]
        data.table::setnames(q_by_fire, "val", paste0(pname, "_doy"))
        merge_list <- c(merge_list, list(q_by_fire))
      }
    }

    doy_stats <- Reduce(function(x, y) merge(x, y, by = "ID", all = TRUE), merge_list)
    shp_stats <- merge(shp, doy_stats, by.x = "fire_id", by.y = "ID", all.x = TRUE)

    if (!"area_m2" %in% names(shp_stats)) {
      shp_stats$area_m2 <- as.numeric(sf::st_area(shp_stats))
    }
    shp_stats$area_m2 <- round(shp_stats$area_m2, 2)

        # Convertir DOY a fecha
    date_vars <- c("mode_doy", "median_doy")
    if (calc_percentiles && length(percentiles) > 0) {
      q_names <- paste0("q", round(percentiles * 100), "_doy")
      date_vars <- c(date_vars, q_names)
    }

    for (var in date_vars) {
      shp_stats[[paste0(var, "_date")]]  <- as.Date(shp_stats[[var]] - 1, origin = paste0(year, "-01-01"))
      shp_stats[[paste0(var, "_day")]]   <- as.integer(format(shp_stats[[paste0(var, "_date")]], "%d"))
      shp_stats[[paste0(var, "_month")]] <- as.integer(format(shp_stats[[paste0(var, "_date")]], "%m"))
      shp_stats[[paste0(var, "_year")]]  <- as.integer(format(shp_stats[[paste0(var, "_date")]], "%Y"))
    }

    label_clean <- sub(".*otsu_", "", label)
    short_label <- label_clean

    # LOOP de umbrales
    for (doy_thresh in doy_thresholds) {
      label_clean <- sub(".*otsu_", "", label)
      short_label <- label_clean

      # --- MEDIAN ---
      if (stats %in% c("median", "both")) {
        stat_name <- "median"
        flag_mask <- doy_raster >= (terra::rasterize(terra::vect(shp_stats), doy_raster, field = "median_doy") - doy_thresh)
        flag_mask1 <- terra::classify(flag_mask, cbind(0, NA))

        out_tif <- file.path(output_dir, sprintf("DOY_%d_%s_%s.tif", doy_thresh, short_label, stat_name))
        if (file.exists(out_tif)) file.remove(out_tif)
        terra::writeRaster(flag_mask1, out_tif, overwrite = TRUE, datatype = "INT1U")

        if (polygonize && !is.null(python_exe) && !is.null(gdal_polygonize_script)) {
          shp_mask <- sub("\\.tif$", ".shp", out_tif)

          # --- Asignar flag_doy ---
          vals_shp <- terra::extract(flag_mask1, terra::vect(shp_stats), fun = max, na.rm = TRUE, ID = FALSE)
          shp_stats$flag_doy <- ifelse(vals_shp == 1, 1L, 0L)

          # --- Filtrar si el usuario quiere ---
          shp_stats_out <- if (keep_all_polygons) shp_stats else shp_stats[shp_stats$flag_doy == 1, ]

          if (nrow(shp_stats_out) > 0) {
            suppressWarnings(sf::st_write(shp_stats_out, shp_mask, delete_dsn = TRUE, quiet = TRUE))
            sf_outputs$stats[[paste0("thresh_", doy_thresh, "_", stat_name, "_", label)]] <- shp_stats_out
            shp_paths$stats[[paste0("thresh_", doy_thresh, "_", stat_name, "_", label)]] <- shp_mask
          }
        }
      }

      # --- MODE ---
      if (stats %in% c("mode", "both")) {
        stat_name <- "mode"
        flag_mask <- doy_raster >= (terra::rasterize(terra::vect(shp_stats), doy_raster, field = "mode_doy") - doy_thresh)
        flag_mask1 <- terra::classify(flag_mask, cbind(0, NA))

        out_tif <- file.path(output_dir, sprintf("DOY_%d_%s_%s.tif", doy_thresh, short_label, stat_name))
        if (file.exists(out_tif)) file.remove(out_tif)
        terra::writeRaster(flag_mask1, out_tif, overwrite = TRUE, datatype = "INT1U")

        if (polygonize && !is.null(python_exe) && !is.null(gdal_polygonize_script)) {
          shp_mask <- sub("\\.tif$", ".shp", out_tif)

          # --- Asignar flag_doy ---
          vals_shp <- terra::extract(flag_mask1, terra::vect(shp_stats), fun = max, na.rm = TRUE, ID = FALSE)
          shp_stats$flag_doy <- ifelse(vals_shp == 1, 1L, 0L)

          # --- Filtrar si el usuario quiere ---
          shp_stats_out <- if (keep_all_polygons) shp_stats else shp_stats[shp_stats$flag_doy == 1, ]

          if (nrow(shp_stats_out) > 0) {
            suppressWarnings(sf::st_write(shp_stats_out, shp_mask, delete_dsn = TRUE, quiet = TRUE))
            sf_outputs$stats[[paste0("thresh_", doy_thresh, "_", stat_name, "_", label)]] <- shp_stats_out
            shp_paths$stats[[paste0("thresh_", doy_thresh, "_", stat_name, "_", label)]] <- shp_mask
          }
        }
      }
    }

  }

  return(list(
    sf_objects = sf_outputs,
    shp_paths  = shp_paths
  ))
}

Try the OtsuFire package in your browser

Any scripts or data that you put into this service are public.

OtsuFire documentation built on June 13, 2025, 3:01 a.m.