R/functions.R

Defines functions spearman_rank_correlation pearson_correlation_coefficient empirical_covariance coefficient_of_contingency linear_interpolation_quantile approx_var approx_mean mids_from_breaks linear_deviation variance entropy harmonic_mean stat_mode

Documented in approx_mean approx_var coefficient_of_contingency empirical_covariance entropy harmonic_mean linear_deviation linear_interpolation_quantile mids_from_breaks pearson_correlation_coefficient spearman_rank_correlation stat_mode variance

#' Statistical Mode
#' @param x vector. A Vector containing any simple datatype.
#' @return A vector element.
#' @export
stat_mode <- function(x) {
  return(which.max(table(x)))
}


# ---------------------------

#' Harmonic Mean
#' @param x vector. A Vector containing numeric data.
#' @return A numeric scalar.
#' @export
harmonic_mean <- function(x) {
  return (length(x) / sum(1 / x))
}


# ---------------------------

#' Entropy
#' @param x vector. A Vector containing numeric data.
#' @return A numeric scalar.
#' @export
entropy <- function(x) {
  props <- prop.table(table(x))
  return (-sum(props * log(props)))
}


# ---------------------------

#' Variance
#' @param x vector. A Vector containing numeric data.
#' @return A numeric scalar.
#' @export
variance <- function(x) {
  return (1/length(x) * sum((x - mean(x)) ** 2))
}


# ---------------------------

#' Linear Deviation
#' @param x vector. A Vector containing numeric data.
#' @return A numeric scalar.
#' @importFrom stats median
#' @export
linear_deviation <- function(x) {
  return (1/length(x) * sum(abs(x - median(x))))
}


# ---------------------------

#' Get Midpoints from breaks
#' @param breaks vector. A Vector containing numeric data.
#' @return A numeric vector.
#' @export
mids_from_breaks <- function(breaks) {
  return (breaks[-length(breaks)] + (breaks[-1] - breaks[-length(breaks)]) / 2)
}


# ---------------------------

#' Approximate Arithmetic Mean
#' @param breaks vector. A Vector containing numeric data.
#' @param freq_table frequency table. A Table created with table(x)
#' @return A numeric scalar.
#' @export
approx_mean <- function(breaks, freq_table) {
  return (sum(freq_table / sum(freq_table) * mids_from_breaks(breaks)))
}


# ---------------------------

#' Approximate Variance
#' @param breaks vector. A Vector containing numeric data.
#' @param freq_table frequency table. A Table created with table(x)
#' @return A numeric scalar.
#' @export
approx_var <- function(breaks, freq_table) {
  mean_approx_kl <- sum(prop.table(freq_table) * mids_from_breaks(breaks))
  return (sum(prop.table(freq_table) * (mids_from_breaks(breaks) - mean_approx_kl)^2))
}


# ---------------------------

#' Quantile by Linear Interpolation
#' @param p percentile. The quantile percentile.
#' @param breaks vector. A Vector containing numeric data.
#' @param freq_table frequency table. A Table created with table(x)
#' @return A numeric scalar.
#' @export
linear_interpolation_quantile <- function(p, breaks, freq_table) {
  cumsum_prop_table <- cumsum(prop.table(freq_table))
  j <- rev(which(cumsum_prop_table <= p))[1]

  xj_1 <- breaks[j + 1]
  xj <- breaks[j + 2]

  fxj_1 <- as.numeric(cumsum_prop_table[j])
  fxj <- as.numeric(cumsum_prop_table[j + 1])

  return(xj_1 + (xj - xj_1) * ((p - fxj_1)/(fxj - fxj_1)))
}


# ---------------------------

#' Coefficient of Contingency
#' @param table table. A Table created with as.table(x) from matrix.
#' @return A list containing K_star, K_max, KK_star, M, Chi sq
#' @importFrom stats addmargins
#' @export
coefficient_of_contingency <- function(table) {
  addmargins(table)

  chisq <- summary(table)$statistic

  KK <- sqrt(chisq/(sum(table) + chisq))
  M <- min(dim(table))
  K_max <- sqrt((M-1)/M)
  K_star <- KK/K_max

  return(list(
    "K_star" = K_star,
    "K_max" = K_max,
    "KK_star" = KK,
    "M" = M,
    "Chisq" = chisq
  ))
}


# ---------------------------

#' Empirical Covariance
#' @param x vector. A Vector containing numeric data.
#' @param y vector. A Vector containing numeric data.
#' @return A numeric scalar.
#' @export
empirical_covariance <- function(x,y) {
  return(sum((x - mean(x)) * (y - mean(y))) * (1 / length(y)))
}


# ---------------------------

#' Pearson Correlation Coefficient
#' @param x vector. A Vector containing numeric data.
#' @param y vector. A Vector containing numeric data.
#' @return A numeric scalar.
#' @importFrom stats cor
#' @export
pearson_correlation_coefficient <- function(x,y) {
  return(cor(x, y, method = "pearson"))
}


# ---------------------------

#' Spearman's Rank Correlation
#' @param sum_rksq numeric scalar. Result of sum (Rk(x_i) - Rk(yi))^2.
#' @param n numeric scalar. Number of samples.
#' @param r numeric scalar. Number of ranks.
#' @return A numeric scalar.
#' @export
spearman_rank_correlation <- function(sum_rksq, n, r) {
  return(1 - (sum_rksq * r) / (n * (n^2 -1)))
}
smnmnkr/eos documentation built on Jan. 30, 2022, 12:36 a.m.