R/statistics.R
In MetricsWeighted: Weighted Metrics and Performance Measures for Machine Learning

Documented in weighted_cor weighted_mean weighted_median weighted_quantile weighted_var

#' Weighted Mean
#'
#' Returns the weighted mean of a numeric vector.
#' In contrast to [stats::weighted.mean()], `w` does not need to be specified.
#'
#' @param x Numeric vector.
#' @param w Optional vector of non-negative case weights.
#' @param ... Further arguments passed to [mean()] or [stats::weighted.mean()].
#' @returns A length-one numeric vector.
#' @export
#' @examples
#' weighted_mean(1:10)
#' weighted_mean(1:10, w = NULL)
#' weighted_mean(1:10, w = 1:10)
#' @seealso [stats::weighted.mean()]
weighted_mean <- function(x, w = NULL, ...) {
  if (is.null(w)) {
    return(mean(x, ...))
  }
  stopifnot(
    all(w >= 0),
    length(x) == length(w)
  )
  if (all(w == 0)) {
    stop("All weights are zero")
  }
  stats::weighted.mean(x, w = w, ...)
}

#' Weighted Quantiles
#'
#' Calculates weighted quantiles based on the generalized inverse of the weighted ECDF.
#' If no weights are passed, uses [stats::quantile()].
#'
#' @inheritParams weighted_mean
#' @param probs Vector of probabilities.
#' @param na.rm Ignore missing data? Default is `TRUE`.
#' @param names Return names? Default is `TRUE`.
#' @param ... Further arguments passed to [stats::quantile()] in the unweighted case.
#'   Not used in the weighted case.
#' @returns A length-one numeric vector.
#' @export
#' @examples
#' n <- 10
#' x <- seq_len(n)
#' quantile(x)
#' weighted_quantile(x)
#' weighted_quantile(x, w = rep(1, n))
#' quantile(x, type = 1)
#' weighted_quantile(x, w = x) # same as Hmisc::wtd.quantile()
#' weighted_quantile(x, w = x, names = FALSE)
#' weighted_quantile(x, w = x, probs = 0.5, names = FALSE)
#'
#' # Example with integer weights
#' x <- c(1, 1:11, 11, 11)
#' w <- seq_along(x)
#' weighted_quantile(x, w)
#' quantile(rep(x, w)) # same
#' @seealso [weighted_median()]
weighted_quantile <- function(x, w = NULL, probs = seq(0, 1, 0.25),
                              na.rm = TRUE, names = TRUE, ...) {
  # Initial checks and subsetting
  if (is.null(w)) {
    return(stats::quantile(x = x, probs = probs, na.rm = na.rm, names = names, ...))
  }
  stopifnot(
    length(x) == length(w),
    all(probs >= 0 & probs <= 1)
  )

  if (isTRUE(na.rm) && anyNA(x)) {
    ok <- !is.na(x)
    x <- x[ok]
    w <- w[ok]
  }
  stopifnot(
    all(w >= 0),
    length(x) >= 1L
  )
  if (all(w == 0)) {
    stop("All weights are zero")

  }
  if (length(x) == 1L) {
    out <- rep(x, length(probs))
  } else {
    ord <- order(x)
    x <- x[ord]
    w <- w[ord]
    w <- w / sum(w)
    cs <- cumsum(w[-length(w)])
    out <- stats::stepfun(cs, x)(probs)
  }
  if (names) {
    names(out) <- paste0(format(100 * probs, trim = TRUE), "%")
  }
  out
}

#' Weighted Median
#'
#' Calculates weighted median based on [weighted_quantile()].
#'
#' @inheritParams weighted_mean
#' @param ... Further arguments passed to [weighted_quantile()].
#' @returns A length-one numeric vector.
#' @export
#' @examples
#' n <- 21
#' x <- seq_len(n)
#' quantile(x, probs = 0.5)
#' weighted_median(x, w = rep(1, n))
#' weighted_median(x, w = x)
#' quantile(rep(x, x), probs = 0.5)
#' @seealso [weighted_quantile()]
weighted_median <- function(x, w = NULL, ...) {
  weighted_quantile(x = x, w = w, probs = 0.5, names = FALSE, ...)
}

#' Weighted Variance
#'
#' Calculates weighted variance, see [stats::cov.wt()] or
#' \url{https://en.wikipedia.org/wiki/Sample_mean_and_covariance#Weighted_samples}
#' for details.
#'
#' @inheritParams weighted_mean
#' @param method Specifies how the result is scaled. If "unbiased", the denomiator
#'   is reduced by 1, see [stats::cov.wt()] for details.
#' @param na.rm Should missing values in `x` be removed? Default is `FALSE`.
#' @param ... Further arguments passed to [stats::cov.wt()].
#' @returns A length-one numeric vector.
#' @export
#' @examples
#' weighted_var(1:10)
#' weighted_var(1:10, w = NULL)
#' weighted_var(1:10, w = rep(1, 10))
#' weighted_var(1:10, w = 1:10)
#' weighted_var(1:10, w = 1:10, method = "ML")
#' @seealso [stats::cov.wt()]
weighted_var <- function(x, w = NULL, method = c("unbiased", "ML"),
                         na.rm = FALSE, ...) {
  method <- match.arg(method)
  cf <- 1
  if (is.null(w)) {
    if (method == "ML") {
      n <- if (na.rm) sum(!is.na(x)) else length(x)
      cf <- (n - 1) / n
    }
    return(cf * stats::var(x, na.rm = na.rm, ...))
  }
  stopifnot(
    all(w >= 0),
    length(x) == length(w)
  )
  if (all(w == 0)) {
    stop("All weights are zero")
  }
  if (na.rm) {
    ok <- !is.na(x)
    x <- x[ok]
    w <- w[ok]
  } else if (anyNA(x)) {
    return(NA)
  }
  as.numeric(stats::cov.wt(cbind(x), wt = w, method = method, ...)$cov)
}

#' Weighted Pearson Correlation
#'
#' Calculates weighted Pearson correlation coefficient between actual and predicted
#' values by the help of [stats::cov.wt()].
#'
#' @inheritParams regression
#' @param na.rm Should observations with missing values in `actual` or `predicted`
#'   be removed? Default is `FALSE`.
#' @param ... Further arguments passed to [stats::cov.wt()].
#' @returns A length-one numeric vector.
#' @export
#' @examples
#' weighted_cor(1:10, c(1, 1:9))
#' weighted_cor(1:10, c(1, 1:9), w = 1:10)
weighted_cor <- function(actual, predicted, w = NULL, na.rm = FALSE, ...) {
  stopifnot(length(actual) == length(predicted))
  if (is.null(w)) {
    return(stats::cor(actual, predicted, use = if (na.rm) "p" else "e"))
  }
  stopifnot(
    all(w >= 0),
    length(actual) == length(w)
  )
  if (all(w == 0)) {
    stop("All weights are zero")
  }
  if (na.rm) {
    ok <- !is.na(actual) & !is.na(predicted)
    actual <- actual[ok]
    predicted <- predicted[ok]
    w <- w[ok]
  } else if (anyNA(actual) || anyNA(predicted)) {
    return(NA)
  }
  stats::cov.wt(cbind(actual, predicted), wt = w, cor = TRUE, ...)$cor[1L, 2L]
}

Any scripts or data that you put into this service are public.

MetricsWeighted documentation built on Nov. 16, 2023, 5:09 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MetricsWeighted
Weighted Metrics and Performance Measures for Machine Learning

R/statistics.R
In MetricsWeighted: Weighted Metrics and Performance Measures for Machine Learning

Defines functions weighted_cor weighted_var weighted_median weighted_quantile weighted_mean

Documented in weighted_cor weighted_mean weighted_median weighted_quantile weighted_var

Try the MetricsWeighted package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MetricsWeighted Weighted Metrics and Performance Measures for Machine Learning

R/statistics.R In MetricsWeighted: Weighted Metrics and Performance Measures for Machine Learning

Defines functions weighted_cor weighted_var weighted_median weighted_quantile weighted_mean

Documented in weighted_cor weighted_mean weighted_median weighted_quantile weighted_var

Try the MetricsWeighted package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MetricsWeighted
Weighted Metrics and Performance Measures for Machine Learning

R/statistics.R
In MetricsWeighted: Weighted Metrics and Performance Measures for Machine Learning