R/robustness_score.R
In TrustworthyMLR: Stability and Robustness Evaluation for Machine Learning Models

Documented in robustness_score

#' Robustness Score Under Input Perturbation
#'
#' Evaluates the robustness of a machine learning model by measuring how
#' much its predictions change when small amounts of noise are added to
#' the input data. A robustness score of 1 indicates that predictions are
#' completely unaffected by perturbations, while values near 0 indicate
#' high sensitivity to input noise.
#'
#' Gaussian noise proportional to each feature's standard deviation is
#' added to the input data. The magnitude of the noise is controlled by
#' \code{noise_level}. Predictions on the perturbed data are compared to
#' baseline predictions using normalised mean squared error. The process
#' is repeated \code{n_rep} times and the average score is returned.
#'
#' @param predict_fn A function that accepts a numeric matrix (observations
#'   in rows, features in columns) and returns a numeric vector of
#'   predictions with length equal to \code{nrow(X)}.
#' @param X A numeric matrix or data.frame of input features. Rows are
#'   observations and columns are features. Must contain at least two rows
#'   and no missing values.
#' @param noise_level A positive numeric scalar controlling the magnitude
#'   of Gaussian noise added to each feature, expressed as a fraction of
#'   the feature's standard deviation. Default is \code{0.05} (5 percent).
#' @param n_rep A positive integer specifying the number of perturbation
#'   repetitions. Default is \code{10L}.
#'
#' @return A numeric scalar between 0 and 1, where 1 indicates perfect
#'   robustness and values near 0 indicate high sensitivity to noise.
#'
#' @examples
#' # A simple linear prediction function
#' pred_fn <- function(X) X %*% c(1, 2, 3)
#' set.seed(42)
#' X <- matrix(rnorm(300), ncol = 3)
#' robustness_score(pred_fn, X, noise_level = 0.05, n_rep = 10)
#'
#' # A constant prediction function is perfectly robust
#' const_fn <- function(X) rep(5, nrow(X))
#' robustness_score(const_fn, X)
#'
#' @importFrom stats rnorm sd var
#' @export
robustness_score <- function(predict_fn, X, noise_level = 0.05, n_rep = 10L) {
  if (!is.function(predict_fn)) {
    stop("'predict_fn' must be a function.", call. = FALSE)
  }

  if (!is.matrix(X) && !is.data.frame(X)) {
    stop("'X' must be a matrix or data.frame.", call. = FALSE)
  }

  X <- as.matrix(X)

  if (!is.numeric(X)) {
    stop("'X' must contain numeric values.", call. = FALSE)
  }

  if (nrow(X) < 2L) {
    stop("'X' must have at least 2 rows.", call. = FALSE)
  }

  if (anyNA(X)) {
    stop("'X' must not contain NA values.", call. = FALSE)
  }

  if (!is.numeric(noise_level) || length(noise_level) != 1L ||
      noise_level <= 0) {
    stop("'noise_level' must be a single positive number.", call. = FALSE)
  }

  n_rep <- as.integer(n_rep)
  if (n_rep < 1L) {
    stop("'n_rep' must be a positive integer.", call. = FALSE)
  }

  baseline <- predict_fn(X)

  if (!is.numeric(baseline) || length(baseline) != nrow(X)) {
    stop(
      "'predict_fn' must return a numeric vector with length equal to nrow(X).",
      call. = FALSE
    )
  }

  baseline_var <- var(baseline)

  if (baseline_var == 0) {
    return(1)
  }

  col_sds <- apply(X, 2L, sd)

  scores <- vapply(seq_len(n_rep), function(i) {
    X_noisy <- X
    for (j in seq_len(ncol(X))) {
      if (col_sds[j] > 0) {
        X_noisy[, j] <- X[, j] + rnorm(
          nrow(X), mean = 0, sd = noise_level * col_sds[j]
        )
      }
    }
    perturbed <- predict_fn(X_noisy)
    mse <- mean((baseline - perturbed)^2)
    max(0, min(1, 1 - mse / baseline_var))
  }, numeric(1L))

  return(mean(scores))
}