R/assign_season.R

Defines functions assign_season

Documented in assign_season

#' @title Filter phenological cycles
#' @description Filter detected cycles basing on dates of begin / end / peak,
#' or limit the number of cycles allowed for any specified season.
#' @param pheno Cycle allocation (data table generated by `extract_pheno()`
#'  or `cut_cycles()`).
#' @param seasons Character vector with the season names to be used.
#' @param pop Vector with the expected dates of cycle peaks (one per season,
#'  it must be of the same length of `seasons`), in the form `"mm-dd"` 
#'  (month-day). If provided, it is used to filter cycles among seasons
#'  (see argument `max_n_cycles`).
#' @param pop_win List with the allowed ranges for the dates of
#'  cycle peaks (one per season): each element is a two-length character vector 
#'  with two elements, in the form `"mm-dd"` (month-day), representing the range 
#'  of the temporal window (see default value as example) within the
#'  corresponding season. The list must be of the same length of `seasons`.
#'  If `season` is one-length, `pop_seasons` can be also a two-length vector.
#'  Each element of the list can be left to NA in order not to specify any
#'  filtering condition.
#' @param sos_win List with the allowed ranges for the dates of
#'  start of cycle (see `pop_seasons` for details about the format).
#' @param eos_win Named list with the allowed ranges for the dates of
#'  end of cycle (see `pop_seasons` for details about the format).
#' @param pop_name Character name of the field in `pheno` to be used as metric
#'  for the date of peak (associated with `pop_seasons`).
#' @param sos_name Character name of the field in `pheno` to be used as metric
#'  for the date of start of cycle (associated with `sos_seasons`).
#'  Set to NULL in order not to apply.
#' @param eos_name Character name of the field in `pheno` to be used as metric
#'  for the date of end of cycle (associated with `pop_seasons`).
#'  Set to NULL in order not to apply.
#' @param max_n_cycles (optional) Maximum number of cycles to be detected in one 
#'  season (default: Inf, meaning that all the identified cycles are kept).
#'  If `pop` is provided, the cycles with the corresponding dates of peak closer
#'  to the dates set in argument `pop` (for each seasons) are selected;
#'  otherwise, the field `weight` of the input `pheno` dataset is considered
#'  (cycles with the higher values are selected).
#' @param rm_unassigned (optional) Logical: should cycles which do not match with 
#'  any season (basing on the settings) be dropped from the output? 
#'  (Default: TRUE)
#' @return The input data table, filtered basing on arguments and with the 
#' addition of the field `season`, containing the name of each season 
#' (one among the ones specified in argument `seasons`) associated to each 
#' cycle.
#' If `seasons = NA` this field is not returned.
#' @author Luigi Ranghetti, PhD (2020) \email{luigi@@ranghetti.info}
#' @import data.table
#' @export
#' @examples 
#' # Load input data
#' data("dt_cycles")
#' data("dt_pheno")
#' data("ts_filled") # used for plots
#' 
#' # Filter one cycle per year, standard parameters (keep the most relevant cycle)
#' dt_cycles
#' dt_cycles_seas <- assign_season(dt_cycles, max_n_cycles = 1)
#' dt_cycles_seas
#' plot(ts_filled, pheno = dt_cycles_seas)
#' 
#' # Filter one cycle per year, keep the one with the peak clostest to 1st August
#' dt_pheno_seas1 <- assign_season(dt_pheno, max_n_cycles = 1, pop = "08-01")
#' plot(ts_filled, pheno = dt_pheno_seas1)
#' 
#' # Filter cycles with start of season between 1st February and 30th April
#' dt_pheno_seas2 <- assign_season(
#'   dt_pheno, 
#'   sos_win = c("02-01", "04-30"),
#' )
#' plot(ts_filled, pheno = dt_pheno_seas2)
#' 
#' # Assign season names: "winter" for winter crops, "summer" for summer crops
#' # (defining winter crops as crops seeded between October and March,
#' # summer crops as crops seeded between April and August)
#' dt_pheno_seas3 <- assign_season(
#'   dt_pheno, 
#'   seasons = c("winter", "summer"),
#'   sos_win = list(c("10-01", "03-31"), c("04-01", "08-31")),
#'   sos_name = "begin",
#'   rm_unassigned = FALSE
#' )
#' # notice the new column "season"
#' dt_pheno_seas3


assign_season <- function(
  pheno,
  seasons = NA,
  # seasons = c("winter", "summer"),
  # pop = list("winter" = "04-01", "summer" = "08-01"),
  # pop_win = list("winter" = c("12-01","05-31"), "summer" = c("06-01","11-30")),
  pop = NULL,
  pop_win = NULL,
  sos_win = NULL,
  eos_win = NULL,
  pop_name = "pop",
  sos_name = "sos",
  eos_name = "eos",
  max_n_cycles = 2,
  rm_unassigned = TRUE
) {
  
  # Avoid check notes for data.table related variables
  season <- weight <- id <- pop_diff <- NULL
  
  ## Check arguments
  
  # change seasons if not provided
  if (anyNA(seasons)) {
    seasons <- "noseasons"
  }
  
  # convert argyuments which must be lists
  if (!inherits(pop, "list") & !is.null(pop)) {pop <- as.list(pop)}
  if (!inherits(pop_win, "list") & !is.null(pop_win)) {pop_win <- list(pop_win)}
  if (!inherits(sos_win, "list") & !is.null(sos_win)) {sos_win <- list(sos_win)}
  if (!inherits(eos_win, "list") & !is.null(eos_win)) {eos_win <- list(eos_win)}
  
  # correspondence between "seasons" and associated arguments
  if (is.null(pop)) {} else if (length(pop) == length(seasons)) {
    names(pop) <- seasons # TODO check existing names
  } else {
    print_message(
      type = "error",
      "Arguments 'pop' must be of the same length of 'seasons' ",
      "(see documentation)."
    )
  }
  if (is.null(pop_win)) {} else if (length(pop_win) == length(seasons)) {
    names(pop_win) <- seasons # TODO check existing names
  } else {
    print_message(
      type = "error",
      "Arguments 'pop_win' must be of the same length of 'seasons' ",
      "(see documentation)."
    )
  }
  if (is.null(sos_win)) {} else if (length(sos_win) == length(seasons)) {
    names(sos_win) <- seasons # TODO check existing names
  } else {
    print_message(
      type = "error",
      "Arguments 'sos_win' must be of the same length of 'seasons' ",
      "(see documentation)."
    )
  }
  if (is.null(eos_win)) {} else if (length(eos_win) == length(seasons)) {
    names(eos_win) <- seasons # TODO check existing names
  } else {
    print_message(
      type = "error",
      "Arguments 'eos_win' must be of the same length of 'seasons' ",
      "(see documentation)."
    )
  }
  
  ## 1. Assign season names
  pheno_dt <- copy(pheno)
  for (s in seasons) {
    pheno_dt[
      c(if (is.null(pop_win)) rep(TRUE, .N) else do.call(
        if (package_version(pop_win[[s]][1]) < package_version(pop_win[[s]][2])) `&` else `|`, 
        list(
          package_version(strftime(get(pop_name), "%m.%d"), strict = FALSE) >= package_version(pop_win[[s]][1]),
          package_version(strftime(get(pop_name), "%m.%d"), strict = FALSE) <= package_version(pop_win[[s]][2])
        ))) &
        c(if (is.null(sos_win)) rep(TRUE, .N) else do.call(
          if (package_version(sos_win[[s]][1]) < package_version(sos_win[[s]][2])) `&` else `|`, 
          list(
            package_version(strftime(get(sos_name), "%m.%d"), strict = FALSE) >= package_version(sos_win[[s]][1]),
            package_version(strftime(get(sos_name), "%m.%d"), strict = FALSE) <= package_version(sos_win[[s]][2])
          ))) &
        c(if (is.null(eos_win)) rep(TRUE, .N) else do.call(
          if (package_version(eos_win[[s]][1]) < package_version(eos_win[[s]][2])) `&` else `|`, 
          list(
            package_version(strftime(get(eos_name), "%m.%d"), strict = FALSE) >= package_version(eos_win[[s]][1]),
            package_version(strftime(get(eos_name), "%m.%d"), strict = FALSE) <= package_version(eos_win[[s]][2])
          ))),
      "season" := s
    ]
  }
  
  ## 2. Exclude undetected seasons
  if (rm_unassigned == TRUE) {pheno_dt <- pheno_dt[!is.na(season),]}
  
  ## 3. Filter cycles basing on numbers
  # (field "weight" must be present)
  pop_date <- pop
  if (is.null(pop)) {
    # filter basing on weight
    pheno_dt[,rank:=1+.N-rank(weight),by=list(id,year,season)]
    pheno_dt <- pheno_dt[rank<=max_n_cycles,]
    pheno_dt[,c("rank"):=NULL]
  } else {
    for (sel_season in seasons) {
      pheno_dt[
        season == sel_season, 
        pop_diff := as.integer(pmin(
          abs(get(pop_name) - as.Date(paste0(as.integer(year)-1,"-",pop_date[[sel_season]]))),
          abs(get(pop_name) - as.Date(paste0(year,"-",pop_date[[sel_season]]))),
          abs(get(pop_name) - as.Date(paste0(as.integer(year)+1,"-",pop_date[[sel_season]])))
        ))
      ]
    } 
    # filter basing on this metric
    pheno_dt[,rank:=rank(pop_diff),by=list(id,year,season)]
    pheno_dt <- pheno_dt[rank<=max_n_cycles,]
    pheno_dt[,c("pop_diff","rank"):=NULL]
  }
  
  first_rows <- match(c("id","year","cycle","season"),names(pheno_dt))
  setcolorder(
    pheno_dt,
    c(first_rows,seq_len(ncol(pheno_dt))[!seq_len(ncol(pheno_dt)) %in% first_rows])
  )
  if (all(seasons == "noseasons")) {pheno_dt$season <- NULL}
  attr(pheno_dt, "gen_by") <- "assign_season"
  pheno_dt  
  
}
ranghetti/sen2rts documentation built on March 31, 2024, 1:18 a.m.