R/scoring.R

Defines functions score_all score_custom score_trapezoid score_optimum score_less score_more

Documented in score_all score_custom score_less score_more score_optimum score_trapezoid

#' Score a Variable Where Higher Values Are Better
#'
#' @description
#' Applies a "more is better" linear scoring function, transforming raw
#' variable values to a 0--1 score.  This is appropriate for soil
#' indicators where higher values improve soil function, such as organic
#' carbon, microbial biomass, or cation exchange capacity
#' (Andrews et al., 2004; Karlen & Stott, 1994).
#'
#' The score is computed as:
#' \deqn{S_i = \frac{x_i - x_{\min}}{x_{\max} - x_{\min}}}
#'
#' where \eqn{x_{\min}} and \eqn{x_{\max}} are taken from the observed
#' data (or from user-supplied bounds).
#'
#' @param x Numeric vector of raw variable values.
#' @param x_min Numeric. Lower bound for scoring. Defaults to
#'   \code{min(x, na.rm = TRUE)}.
#' @param x_max Numeric. Upper bound for scoring. Defaults to
#'   \code{max(x, na.rm = TRUE)}.
#'
#' @return Numeric vector of scores in [0, 1].
#'
#' @references
#' Andrews, S.S., Karlen, D.L., & Cambardella, C.A. (2004). The soil
#' management assessment framework. \emph{Soil Science Society of America
#' Journal}, 68(6), 1945--1962. \doi{10.2136/sssaj2004.1945}
#'
#' Karlen, D.L., & Stott, D.E. (1994). A framework for evaluating physical
#' and chemical indicators of soil quality. In J.W. Doran et al. (Eds.),
#' \emph{Defining Soil Quality for a Sustainable Environment}, pp. 53--72.
#' SSSA Special Publication 35. \doi{10.2136/sssaspecpub35.c4}
#'
#' @examples
#' oc <- c(0.5, 1.2, 2.1, 3.4, 4.5)   # Organic Carbon (%)
#' score_more(oc)
#'
#' # With user-defined bounds (e.g., 0 to 5%)
#' score_more(oc, x_min = 0, x_max = 5)
#'
#' @export
score_more <- function(x, x_min = NULL, x_max = NULL) {
  if (!is.numeric(x)) stop("`x` must be numeric.", call. = FALSE)
  lo <- if (is.null(x_min)) min(x, na.rm = TRUE) else x_min
  hi <- if (is.null(x_max)) max(x, na.rm = TRUE) else x_max
  if (is.na(lo) || is.na(hi)) stop("Cannot compute bounds: all values are NA.", call. = FALSE)
  if (lo >= hi) return(rep(0.5, length(x)))  # constant variable
  pmin(pmax((x - lo) / (hi - lo), 0), 1)
}


#' Score a Variable Where Lower Values Are Better
#'
#' @description
#' Applies a "less is better" linear scoring function, transforming raw
#' variable values to a 0--1 score.  Suitable for soil indicators where
#' lower values denote better soil quality, such as bulk density,
#' electrical conductivity, or heavy metal concentrations
#' (Andrews et al., 2004).
#'
#' The score is computed as:
#' \deqn{S_i = \frac{x_{\max} - x_i}{x_{\max} - x_{\min}}}
#'
#' @param x Numeric vector of raw variable values.
#' @param x_min Numeric. Lower bound. Defaults to \code{min(x)}.
#' @param x_max Numeric. Upper bound. Defaults to \code{max(x)}.
#'
#' @return Numeric vector of scores in [0, 1].
#'
#' @references
#' Andrews, S.S., Karlen, D.L., & Cambardella, C.A. (2004). The soil
#' management assessment framework. \emph{Soil Science Society of America
#' Journal}, 68(6), 1945--1962. \doi{10.2136/sssaj2004.1945}
#'
#' @examples
#' bd <- c(0.9, 1.1, 1.3, 1.5, 1.7)   # Bulk Density (g/cm3)
#' score_less(bd)
#'
#' # With domain bounds
#' score_less(bd, x_min = 0.8, x_max = 2.0)
#'
#' @export
score_less <- function(x, x_min = NULL, x_max = NULL) {
  if (!is.numeric(x)) stop("`x` must be numeric.", call. = FALSE)
  lo <- if (is.null(x_min)) min(x, na.rm = TRUE) else x_min
  hi <- if (is.null(x_max)) max(x, na.rm = TRUE) else x_max
  if (is.na(lo) || is.na(hi)) stop("Cannot compute bounds: all values are NA.", call. = FALSE)
  if (lo >= hi) return(rep(0.5, length(x)))  # constant variable
  pmin(pmax((hi - x) / (hi - lo), 0), 1)
}


#' Score a Variable With an Optimum Value or Range (Bell Curve)
#'
#' @description
#' Applies a bell-shaped (peaked) scoring function appropriate for
#' soil variables that have an optimum range, beyond which both higher
#' and lower values reduce soil quality.  Classic examples include pH
#' (optimal 6.0--7.0 for most crops) and clay content
#' (Liebig et al., 1996; Karlen & Stott, 1994).
#'
#' The scoring rules are:
#' \itemize{
#'   \item \eqn{S = 1} if \eqn{x \in [\code{opt\_low}, \code{opt\_high}]}
#'   \item \eqn{S = (x - x_{\min}) / (\code{opt\_low} - x_{\min})}
#'     if \eqn{x < \code{opt\_low}}
#'   \item \eqn{S = (x_{\max} - x) / (x_{\max} - \code{opt\_high})}
#'     if \eqn{x > \code{opt\_high}}
#' }
#'
#' @param x Numeric vector of raw variable values.
#' @param opt_low Numeric. Lower bound of the optimum range.
#' @param opt_high Numeric. Upper bound of the optimum range.
#' @param x_min Numeric. Absolute minimum (score = 0). Defaults to
#'   \code{min(x)}.
#' @param x_max Numeric. Absolute maximum (score = 0). Defaults to
#'   \code{max(x)}.
#'
#' @return Numeric vector of scores in [0, 1].
#'
#' @references
#' Karlen, D.L., & Stott, D.E. (1994). A framework for evaluating physical
#' and chemical indicators of soil quality. In J.W. Doran et al. (Eds.),
#' \emph{Defining Soil Quality for a Sustainable Environment}, pp. 53--72.
#' SSSA Special Publication 35. \doi{10.2136/sssaspecpub35.c4}
#'
#' Liebig, M.A., Varvel, G., & Doran, J.W. (1996). A simple performance-
#' based index for assessing multiple agroecosystem functions.
#' \emph{Agronomy Journal}, 88, 739--745.
#' \doi{10.2134/agronj1996.00021962008800050011x}
#'
#' @examples
#' ph <- c(4.5, 5.5, 6.2, 6.8, 7.0, 7.5, 8.2)
#' score_optimum(ph, opt_low = 6.0, opt_high = 7.0)
#'
#' clay <- c(10, 18, 25, 32, 45, 60)
#' score_optimum(clay, opt_low = 20, opt_high = 35)
#'
#' @export
score_optimum <- function(x, opt_low, opt_high,
                          x_min = NULL, x_max = NULL) {
  if (!is.numeric(x))       stop("`x` must be numeric.", call. = FALSE)
  if (opt_low >= opt_high)
    stop("`opt_low` must be < `opt_high`.", call. = FALSE)

  lo <- if (is.null(x_min)) min(x, na.rm = TRUE) else x_min
  hi <- if (is.null(x_max)) max(x, na.rm = TRUE) else x_max

  score <- ifelse(
    x >= opt_low & x <= opt_high, 1,
    ifelse(x < opt_low,
           pmax((x - lo) / (opt_low - lo), 0),
           pmax((hi - x) / (hi - opt_high), 0)
    )
  )
  pmin(score, 1)
}


#' Score a Variable With a Trapezoidal Function
#'
#' @description
#' Applies a trapezoidal scoring function where scores are 1 within an
#' ideal plateau [\code{opt_low}, \code{opt_high}], rise linearly from 0
#' at \code{min_val} to 1 at \code{opt_low}, and fall linearly from 1 at
#' \code{opt_high} to 0 at \code{max_val}.  Values outside
#' [\code{min_val}, \code{max_val}] receive a score of 0.
#'
#' This function is more flexible than \code{\link{score_optimum}} because
#' the user explicitly controls the zero-score boundaries, making it
#' suitable for variables with well-established critical thresholds.
#'
#' @param x Numeric vector of raw variable values.
#' @param min_val Numeric. Value at which score becomes 0 on the low side.
#' @param opt_low Numeric. Lower bound of the plateau (score = 1).
#' @param opt_high Numeric. Upper bound of the plateau (score = 1).
#' @param max_val Numeric. Value at which score becomes 0 on the high side.
#'
#' @return Numeric vector of scores in [0, 1].
#'
#' @references
#' Wymore, A.W. (1993). \emph{Model-Based Systems Engineering}.
#' CRC Press, Boca Raton, FL.
#'
#' Buse, R., & Lele, S. (2003). Trapezoidal membership functions in
#' fuzzy soil quality assessment. \emph{Geoderma}, 114, 177--196.
#'
#' @examples
#' ph <- c(3.5, 5.0, 6.5, 7.0, 7.8, 8.5, 9.5)
#' # pH: absolute zero below 4 and above 9; ideal 6.0-7.0
#' score_trapezoid(ph, min_val = 4.0, opt_low = 6.0,
#'                 opt_high = 7.0, max_val = 9.0)
#'
#' @export
score_trapezoid <- function(x, min_val, opt_low, opt_high, max_val) {
  if (!is.numeric(x)) stop("`x` must be numeric.", call. = FALSE)
  if (!(min_val < opt_low && opt_low <= opt_high && opt_high < max_val))
    stop(paste("Parameters must satisfy: min_val < opt_low <= opt_high",
               "< max_val"), call. = FALSE)

  score <- ifelse(x <= min_val | x >= max_val, 0,
           ifelse(x >= opt_low & x <= opt_high, 1,
           ifelse(x < opt_low,
                  (x - min_val) / (opt_low - min_val),
                  (max_val - x) / (max_val - opt_high))))
  pmin(pmax(score, 0), 1)
}


#' Score a Variable With a User-Defined Function
#'
#' @description
#' Applies an arbitrary user-defined scoring function to a numeric vector.
#' The function must accept a numeric vector and return a numeric vector of
#' the same length with values in [0, 1].
#'
#' @param x Numeric vector of raw variable values.
#' @param FUN A function with signature \code{function(x)} that returns
#'   numeric scores in [0, 1].
#' @param ... Additional arguments passed to \code{FUN}.
#'
#' @return Numeric vector of scores in [0, 1].
#'
#' @examples
#' # Log-linear scoring for a skewed variable
#' mbc <- c(30, 80, 200, 400, 600)
#' score_custom(mbc, FUN = function(x) {
#'   s <- (log(x) - log(min(x))) / (log(max(x)) - log(min(x)))
#'   pmin(pmax(s, 0), 1)
#' })
#'
#' @export
score_custom <- function(x, FUN, ...) {
  if (!is.numeric(x))    stop("`x` must be numeric.", call. = FALSE)
  if (!is.function(FUN)) stop("`FUN` must be a function.", call. = FALSE)
  scores <- FUN(x, ...)
  if (!is.numeric(scores) || length(scores) != length(x))
    stop("`FUN` must return a numeric vector of the same length as `x`.",
         call. = FALSE)
  if (any(scores < 0 | scores > 1, na.rm = TRUE))
    warning("Some scores from `FUN` are outside [0, 1]; check your function.")
  scores
}


#' Score All Variables Using a Configuration Table
#'
#' @description
#' Applies the appropriate scoring function to each soil variable according
#' to a configuration table produced by \code{\link{make_config}}.  This
#' is the primary data-preparation step before computing any Soil Quality
#' Index.
#'
#' @param data A data frame containing the soil variables.
#' @param config A \code{sqi_config} data frame (see
#'   \code{\link{make_config}}).
#' @param group_cols Character vector of grouping column names to preserve
#'   unchanged. Default is \code{"LandUse"}.
#' @param custom_fns A named list of functions for variables with
#'   \code{type = "custom"}.  Names must match the \code{variable} column
#'   in \code{config}.
#'
#' @return A data frame with the same structure as \code{data}, but with
#'   each variable column replaced by its 0--1 score.  Group columns are
#'   preserved unchanged.
#'
#' @references
#' Andrews, S.S., Karlen, D.L., & Cambardella, C.A. (2004). The soil
#' management assessment framework. \emph{Soil Science Society of America
#' Journal}, 68(6), 1945--1962. \doi{10.2136/sssaj2004.1945}
#'
#' @examples
#' data(soil_data)
#' cfg <- make_config(
#'   variable = c("pH",  "EC",   "BD",   "OC",   "MBC",  "Clay"),
#'   type     = c("opt", "less", "less", "more", "more", "opt"),
#'   opt_low  = c(6.0,   NA,     NA,     NA,     NA,     20),
#'   opt_high = c(7.0,   NA,     NA,     NA,     NA,     35)
#' )
#' scored <- score_all(soil_data, cfg,
#'                     group_cols = c("LandUse", "Depth"))
#' head(scored)
#'
#' @export
score_all <- function(data, config, group_cols = "LandUse",
                      custom_fns = list()) {
  if (!inherits(config, c("sqi_config", "data.frame")))
    stop("`config` must be a data frame or sqi_config object.", call. = FALSE)

  scored <- data
  for (i in seq_len(nrow(config))) {
    var      <- config$variable[i]
    type     <- config$type[i]
    if (!var %in% names(data)) {
      warning("Variable '", var, "' not found in data; skipping.")
      next
    }
    x <- data[[var]]
    # Return NULL if NA so scoring fns use data-driven range
    na2null <- function(v) if (is.na(v)) NULL else v

    scored[[var]] <- switch(
      type,
      "more" = score_more(x,
                          x_min = na2null(config$min_val[i]),
                          x_max = na2null(config$max_val[i])),
      "less" = score_less(x,
                          x_min = na2null(config$min_val[i]),
                          x_max = na2null(config$max_val[i])),
      "opt"  = score_optimum(x,
                             opt_low  = config$opt_low[i],
                             opt_high = config$opt_high[i],
                             x_min    = na2null(config$min_val[i]),
                             x_max    = na2null(config$max_val[i])),
      "trap" = score_trapezoid(x,
                               min_val  = config$min_val[i],
                               opt_low  = config$opt_low[i],
                               opt_high = config$opt_high[i],
                               max_val  = config$max_val[i]),
      "custom" = {
        if (!var %in% names(custom_fns))
          stop("No custom function supplied for variable '", var, "'.",
               call. = FALSE)
        score_custom(x, FUN = custom_fns[[var]])
      },
      stop("Unknown scoring type '", type, "' for variable '", var, "'.",
           call. = FALSE)
    )
  }
  scored
}

Try the SQIpro package in your browser

Any scripts or data that you put into this service are public.

SQIpro documentation built on April 20, 2026, 5:06 p.m.