TopHRU: Threshold optimization for HRUs in SWAT

Documented in evaluate_hru

#' topHRU - threshold optimization for HRUs in SWAT
#'
#' @param hru_table data.frame with the unaggregated information for the HRUs
#' @param luse_thrs Land use threshold vector with min, max and stepwidth.
#' @param soil_thrs Soil threshold vector with min, max and stepwidth.
#' @param slp_thrs Slope threshold vector with min, max and stepwidth.
#' @param thrs_type Threshold type. All threshold vectors must be given as
#'   percentages when set as 'P' or area in ha when set to 'A'.
#' @param weight Weights for land use, soil and slope for the calculation of
#'   aREA. Any given numbers will be normalized.
#'
#' @importFrom abind abind
#' @importFrom emoa is_dominated
#'
#' @return \code{.$result_all}: data.frame with the complete results of the
#'   analysis.\cr \code{.$result_nondominated}: Results for the non dominated threshold
#'   combinations.
#' @export
#'
#' @examples
#' head(hru_demo)
#' hru_analysis <- evaluate_hru(hru_table = hru_demo)

evaluate_hru <- function(hru_table, luse_thrs = c(0,20,5), soil_thrs = c(0,20,5),
                   slp_thrs = c(0,20,5), thrs_type = c("P", "A"),
                   weight = c(1,1,1)) {


# General queries to do function settings ----------------------------------
  if(any( c(luse_thrs, soil_thrs, slp_thrs) < 0)){
    stop("Any of the input threshold values is negative.
          All values must be positive!")
  }
  thrs_type <- thrs_type[1]
  switch(thrs_type,
         "A" = {thrs_fact <- 1},
         "P" = {thrs_fact <- 1/100},
         stop("Invalid threshold type. Must be either 'A', or 'P'!"))

  luse_multi <- TRUE
  soil_multi <- TRUE
  slp_multi  <- TRUE
  if (length(unique(hru_table$LANDUSE)) == 1) {
    warning("Only one unique land use class found. Input land use thresholds nwill be ignored!")
    luse_thrs <- c(0,0,1)
    luse_multi <- FALSE
  }

  if (length(unique(hru_table$SOIL)) == 1) {
    warning("Only one unique soil class found. Input soil thresholds will be ignored!")
    soil_thrs <- c(0,0,1)
    soil_multi <- FALSE
  }

  if (length(unique(hru_table$SLP)) == 1) {
    warning("Only one unique slope class found. Input slope thresholds will be ignored!")
    slp_thrs <- c(0,0,1)
    slp_multi <- FALSE
  }


# Generate threshold combinations ------------------------------------------
  luse_seq <- seq(thrs_fact*luse_thrs[1],
                  thrs_fact*luse_thrs[2],
                  thrs_fact*luse_thrs[3])
  soil_seq <- seq(thrs_fact*soil_thrs[1],
                  thrs_fact*soil_thrs[2],
                  thrs_fact*soil_thrs[3])
  slp_seq  <- seq(thrs_fact*slp_thrs[1],
                  thrs_fact*slp_thrs[2],
                  thrs_fact*slp_thrs[3])

  thrs_comb <- expand.grid(luse_seq, soil_seq, slp_seq)
  colnames(thrs_comb) <- c("LUSE_thrs", "SOIL_thrs", "SLOPE_thr")


# Read out all existing categories for subbasin, landuse, soil and slope ---
  sub_id  <- sort(unique(hru_table$SUBBASIN))
  luse_id <- sort(unique(hru_table$LANDUSE))
  soil_id <- sort(unique(hru_table$SOIL))
  slp_id  <- sort(unique(hru_table$SLP))


# Generate data structures -------------------------------------------------
  # For the three attributes land use, soil, and slope the following
  # structures are generated for further calculation:
  # - ..._default:
  #   default distribution of the areas without any aggregation applied.
  #   These are calculated for all hirachy levels. NA valuse are set to 0
  # - ..._ref:
  #   Structure for the reference distribution of each hierachy level.
  #   Reference distribution is the distribution without thresholds applied,
  #   aggregated for each hierarchy level per subbasin.
  # - ..._cal:
  #   Structure for the computed area distribution of each hierachy level.
  #   Computed distribution is the distribution with thresholds applied,
  #   aggregated for each hierarchy level per sub
  # - ..._fac:
  #   Structure for factors that are applied according default distribution.
  # Factors are generated by applying thresholds to default distribution
  # if a factor = 0, then the element is erased,
  # if a factor = 1, then all elements remain unchanged,
  # if a factor > 1, then the element is reapportionated by the factor due to
  # loss of other elements
  # - ..._err:
  #   Structure for the residuals between the reference andcomputed
  #   distribution
  if(luse_multi){
    luse_default <- matrix(data = NA, ncol = length(sub_id),
                           nrow = length(luse_id), dimnames = list(luse_id,
                                                                   sub_id))
    luse_default <- with(hru_table, tapply(ARSLP, list(LANDUSE, SUBBASIN),
                                          sum))
    luse_default[is.na(luse_default)] <- 0

    luse_ref <- array(data = NA, dim = c(length(sub_id), length(luse_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, luse_id, rownames(thrs_comb)))

    luse_cal <- array(data = NA, dim = c(length(sub_id), length(luse_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, luse_id, rownames(thrs_comb)))

    luse_fac <- array(data = NA, dim = c(length(luse_id), length(sub_id),
                                         length(luse_seq)),
                      dimnames = list(luse_id, sub_id, luse_seq))

    luse_err <- array(data = NA, dim = c(length(sub_id), length(luse_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, luse_id, rownames(thrs_comb)))
  } else {
    luse_fac <- array(data = 1, dim = c(length(luse_id), length(sub_id),
                                         length(luse_seq)),
                      dimnames = list(luse_id, sub_id, luse_seq))

    luse_err <- array(data = 0, dim = c(length(sub_id), length(luse_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, luse_id, rownames(thrs_comb)))
  }

  if(soil_multi){
    soil_default <- array(data = NA, dim = c(length(soil_id), length(sub_id),
                                             length(luse_id)),
                          dimnames = list(soil_id, sub_id, luse_id))
    soil_default[,,] <- with(hru_table, tapply(ARSLP, list(SOIL, SUBBASIN,
                                                          LANDUSE), sum))
    soil_default[,,][is.na(soil_default[,,])] <- 0

    soil_ref <- array(data = NA, dim = c(length(sub_id), length(soil_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, soil_id, rownames(thrs_comb)))

    soil_cal <- array(data = NA, dim = c(length(sub_id), length(soil_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, soil_id, rownames(thrs_comb)))

    soil_fac <- array(data = NA, dim = c(length(soil_id), length(sub_id),
                                         length(luse_id), length(soil_seq)),
                      dimnames = list(soil_id, sub_id, luse_id, soil_seq))

    soil_err <- array(data = NA, dim = c(length(sub_id), length(soil_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, soil_id, rownames(thrs_comb)))
  } else {
    soil_fac <- array(data = 1, dim = c(length(soil_id), length(sub_id),
                                         length(luse_id), length(soil_seq)),
                      dimnames = list(soil_id, sub_id, luse_id, soil_seq))

    soil_err <- array(data = 0, dim = c(length(sub_id), length(soil_id),
                                         nrow(thrs_comb)),
                      dimnames = list(sub_id, soil_id, rownames(thrs_comb)))
  }

  if(slp_multi){
    slp_default <- array(data = NA, dim = c(length(slp_id), length(sub_id),
                                            length(luse_id), length(soil_id)),
                         dimnames = list(slp_id, sub_id, luse_id, soil_id))
    slp_default[,,,] <- with(hru_table, tapply(ARSLP, list(SLP, SUBBASIN,
                                                          LANDUSE, SOIL),
                                              sum))
    slp_default[,,,][is.na(slp_default[,,,])] <- 0

    slp_ref <- array(data = NA, dim = c(length(sub_id), length(slp_id),
                                        nrow(thrs_comb)),
                     dimnames = list(sub_id, slp_id, rownames(thrs_comb)))

    slp_cal <- array(data = NA, dim = c(length(sub_id), length(slp_id),
                                        nrow(thrs_comb)),
                     dimnames = list(sub_id, slp_id, rownames(thrs_comb)))

    slp_fac <- array(data = NA, dim = c(length(slp_id), length(sub_id),
                                        length(luse_id),length(soil_id),
                                        length(slp_seq)),
                     dimnames = list(slp_id, sub_id, luse_id, soil_id,
                                     slp_seq))

    slp_err <- array(data = NA, dim = c(length(sub_id), length(slp_id),
                                        nrow(thrs_comb)),
                     dimnames = list(sub_id, slp_id, rownames(thrs_comb)))
  } else {
    slp_fac <- array(data = 1, dim = c(length(slp_id), length(sub_id),
                                        length(luse_id),length(soil_id),
                                        length(slp_seq)),
                     dimnames = list(slp_id, sub_id, luse_id, soil_id,
                                     slp_seq))

    slp_err <- array(data = 0, dim = c(length(sub_id), length(slp_id),
                                        nrow(thrs_comb)),
                     dimnames = list(sub_id, slp_id, rownames(thrs_comb)))
  }

  # Generate data structure for the results of the analysis
  result <- data.frame(matrix(data = NA, ncol = 6, nrow = nrow(thrs_comb),
                              dimnames = list(NULL, c("thrs_comb",
                                                      "n_HRU",
                                                      "luse_res",
                                                      "soil_res",
                                                      "slp_res",
                                                      "aREA"))))


# Computation --------------------------------------------------------------
  # Calculation of factors for each hirarchical level,
  # for LANDUSE the factors depend on landuse, subbasin and threshold
  # (in that order in array dimensions)
  # for SOIL the factors depend on soil, subbasin, landuse and threshold
  # for SLOPE the factors depend on slope, subbasin, landuse, soil and
  # threshold
  if(luse_multi){
    for (i in seq(along = luse_seq)){
      luse_fac[,,i] <- apply(luse_default, 2, reapp_fac,
                             thrs = luse_seq[i], thrs_type = thrs_type)
      }
  }

  if(soil_multi){
    for (i in seq(along = soil_seq)){
      soil_fac[,,,i] <- apply(soil_default, c(2, 3), reapp_fac,
                              thrs = soil_seq[i], thrs_type = thrs_type)
    }
  }

  if(slp_multi){
    for (i in seq(along = slp_seq)){
      slp_fac[,,,,i] <- apply(slp_default, c(2, 3, 4), reapp_fac,
                              thrs = slp_seq[i], thrs_type = thrs_type)
    }
  }

  # Calculation of area distributions for each hierarchy level depending on
  # threshold combination
  # AREA for each default HRU is multiplied by the factors for each level
  # Factors are selected from factor data structure, e.g. SLOPE.fac
  # Using the columns LANDUSE, SOIL, SLOPE in hru_table and the according
  # threshold as indices
  # the so "updated AREA" is then aggregated according to each hierarchy level
  # The number of resulting HRU is calculated by counting all cells of "updated
  # AREA" that are > 0


  #Set up a progress bar that documents the progress of calculation
  print(paste("Calculating aREA for", nrow(thrs_comb),
              "threshold combinations:"))
  prgr_bar <- txtProgressBar(max = nrow(thrs_comb), initial = 0, style = 3)

  for (j in 1:nrow(thrs_comb)){
    area_mod <- with(hru_table, ARSLP *
                            luse_fac[cbind(LANDUSE, SUBBASIN,
                                     which(luse_seq == thrs_comb[j, 1]))]*
                            soil_fac[cbind(SOIL, SUBBASIN, LANDUSE,
                                     which(soil_seq == thrs_comb[j, 2]))]*
                            slp_fac[cbind(SLP, SUBBASIN, LANDUSE, SOIL,
                                    which(slp_seq == thrs_comb[j, 3]))])
    if(luse_multi){
      luse_cal[,,j] <- with(hru_table, tapply(area_mod,
                                             list(SUBBASIN, LANDUSE), sum))
    }
    if(soil_multi){
      soil_cal[,,j] <- with(hru_table, tapply(area_mod,
                                             list(SUBBASIN, SOIL), sum))
    }
    if(slp_multi){
      slp_cal[,,j]  <- with(hru_table, tapply(area_mod,
                                             list(SUBBASIN, SLP), sum))
    }

    result[j,2] <- with(hru_table, sum(area_mod > 0))

    # Update progress bar
    setTxtProgressBar(prgr_bar, j)
  }

  # Set non existing values from NA to 0.
  if(luse_multi){ luse_cal[,,][is.na(luse_cal[,,])] <- 0}
  if(soil_multi){ soil_cal[,,][is.na(soil_cal[,,])] <- 0}
  if(slp_multi) { slp_cal[,,] [is.na(slp_cal[,,])]  <- 0}

  # Calculate reference area distribution of each hierachy level, extend
  # format of matrix by the number of threshold combinations to match the
  # format of data structure for calculated values set non existing values
  # from NA to 0.
  # Calculate residuals between computed and reference values and divide
  # by two.

  if(luse_multi){
    luse_ref <- array(rep(with(hru_table,
                               tapply(ARSLP, list(SUBBASIN, LANDUSE), sum)),
                          times = nrow(thrs_comb)),
                      dim = c(length(sub_id),length(luse_id),nrow(thrs_comb)))
    luse_ref[is.na(luse_ref)] <- 0

    luse_err <- (luse_cal - luse_ref)/2
  }

  if(soil_multi){
    soil_ref <- array(rep(with(hru_table,
                               tapply(ARSLP, list(SUBBASIN, SOIL), sum)),
                          times = nrow(thrs_comb)),
                      dim = c(length(sub_id),length(soil_id),nrow(thrs_comb)))
    soil_ref[is.na(soil_ref)] <- 0

      soil_err <- (soil_cal - soil_ref)/2
  }

  if(slp_multi) {
    slp_ref <- array(rep(with(hru_table,
                              tapply(ARSLP, list(SUBBASIN, SLP), sum)),
                         times = nrow(thrs_comb)),
                       dim = c(length(sub_id),length(slp_id),nrow(thrs_comb)))
    slp_ref[is.na(slp_ref)] <- 0

    slp_err  <- (slp_cal  - slp_ref)/2
  }

# Results ------------------------------------------------------------------

  # Combine single thresholds to single character
  thrs_comb <- thrs_comb / thrs_fact

  result[,1] <- paste(thrs_comb$LUSE_thrs,
                      thrs_comb$SOIL_thrs,
                      thrs_comb$SLOPE_thr,
                      sep = "_")

  # Write single residual contributions of the three attributes
  if(luse_multi){
    result[,3] <- apply(luse_err, 3, aREA) / with(hru_table, sum(ARSLP))
  }
  if(soil_multi){
  result[,4] <- apply(soil_err, 3, aREA) / with(hru_table, sum(ARSLP))
  }
  if(slp_multi){
  result[,5] <- apply(slp_err,  3, aREA) / with(hru_table, sum(ARSLP))
  }

  # Apply error measure
  weight <- 3*weight/sum(weight)
  n_cat  <- 3 - sum(c(!luse_multi, !soil_multi, !slp_multi))

  result[,6] <- apply(abind(weight[1]*luse_err,
                            weight[2]*soil_err,
                            weight[3]*slp_err, along = 2), 3, aREA) /
    (n_cat * with(hru_table, sum(ARSLP)))

  dom_set <- is_dominated(as.matrix(t(result[,c(2,6)])))
  result_nondom <- result[!dom_set,]

  result$Pareto_front <- NA
  result$Pareto_front[!dom_set] <- "non dominated"
  result$Pareto_front[dom_set] <- "dominated"

  out_list <- list(result_all = result,
                   result_nondominated = result_nondom)

  return(out_list)
}