R/interval_uncertainty.R
In dataSDA: Datasets and Basic Statistics for Symbolic Data Analysis

Documented in int_cv int_dispersion int_entropy int_granularity int_imprecision int_information_content int_uniformity

#' @name interval_uncertainty
#' @title Uncertainty and Variability Measures for Interval Data
#' @description Functions to compute uncertainty and variability measures for interval-valued data.
#' @param x interval-valued data with symbolic_tbl class.
#' @param var_name the variable name or the column location (multiple variables are allowed).
#' @param method methods to calculate statistics: CM (default), VM, QM, SE, FV, EJD, GQ, SPT.
#' @param base logarithm base for entropy calculation (default: 2)
#' @param ... additional parameters
#' @return A numeric matrix or value
#' @details
#' These functions measure uncertainty and variability:
#' \itemize{
#'   \item \code{int_entropy}: Shannon entropy (information content)
#'   \item \code{int_cv}: Coefficient of variation (CV = SD / Mean)
#'   \item \code{int_dispersion}: General dispersion index
#'   \item \code{int_imprecision}: Imprecision based on interval width
#'   \item \code{int_granularity}: Variability in interval sizes
#'   \item \code{int_uniformity}: Uniformity of interval widths (inverse of granularity)
#'   \item \code{int_information_content}: Normalized entropy (entropy / log2(n))
#' }
#' @author Han-Ming Wu
#' @seealso int_var int_entropy int_cv
#' @examples
#' data(mushroom.int)
#' 
#' # Calculate entropy
#' int_entropy(mushroom.int, var_name = "Pileus.Cap.Width")
#' 
#' # Coefficient of variation
#' int_cv(mushroom.int, var_name = c("Stipe.Length", "Stipe.Thickness"), method = c("CM", "EJD"))
#' 
#' # Measure imprecision
#' int_imprecision(mushroom.int, var_name = c("Stipe.Length", "Stipe.Thickness"))
#' 
#' # Dispersion index
#' int_dispersion(mushroom.int, var_name = "Pileus.Cap.Width", method = "CM")
#'
#' # Check data granularity
#' int_granularity(mushroom.int, var_name = 2:4)
#'
#' # Check uniformity
#' int_uniformity(mushroom.int, var_name = 2:3)
#'
#' # Information content
#' int_information_content(mushroom.int, var_name = "Stipe.Length", method = "CM")
#' @importFrom graphics hist
#' @importFrom stats sd median
#' @export
int_entropy <- function(x, var_name, method = "CM", base = 2, ...) {
  .check_symbolic_tbl(x, "int_entropy")
  .check_var_name(var_name, x, "int_entropy")
  .check_interval_method(method, "int_entropy")
  
  options <- c("CM", "VM", "QM", "SE", "FV", "EJD", "GQ", "SPT")
  at <- options %in% method
  entropy_tmp <- matrix(0, nrow = length(options), ncol = length(var_name))
  
  idata <- symbolic_tbl_to_idata(x[, var_name])
  
  # Compute Shannon entropy using histogram bins
  compute_entropy <- function(X_tmp, breaks = 30) {
    if (length(var_name) == 1) {
      h <- hist(X_tmp, breaks = breaks, plot = FALSE)
      p <- h$counts / sum(h$counts)
      p <- p[p > 0]  # Remove zero probabilities
      H <- -sum(p * log(p, base = base))
    } else {
      H <- numeric(ncol(X_tmp))
      for (j in 1:ncol(X_tmp)) {
        h <- hist(X_tmp[, j], breaks = breaks, plot = FALSE)
        p <- h$counts / sum(h$counts)
        p <- p[p > 0]
        H[j] <- -sum(p * log(p, base = base))
      }
    }
    H
  }
  
  transforms <- .get_interval_transforms(idata, at)
  for (nm in names(transforms)) {
    idx <- which(options == nm)
    entropy_tmp[idx, ] <- compute_entropy(transforms[[nm]])
  }
  
  if (at[6] | at[7] | at[8]) {  # EJD, GQ, SPT
    X_tmp <- if (!is.null(transforms$CM)) transforms$CM else Interval_to_Center(idata)
    entropy_tmp[6, ] <- entropy_tmp[7, ] <- entropy_tmp[8, ] <- compute_entropy(X_tmp)
  }
  
  entropy_output <- matrix(entropy_tmp[at, ], 
                           nrow = length(method), 
                           ncol = length(var_name))
  
  if (is.numeric(var_name)) {
    colnames(entropy_output) <- colnames(x)[var_name]
  } else {
    colnames(entropy_output) <- var_name
  }
  rownames(entropy_output) <- options[at]
  
  entropy_output
}


#' @rdname interval_uncertainty
#' @export
int_cv <- function(x, var_name, method = "CM", ...) {
  .check_symbolic_tbl(x, "int_cv")
  .check_var_name(var_name, x, "int_cv")
  .check_interval_method(method, "int_cv")
  
  # Calculate CV = SD / Mean
  mean_val <- int_mean(x, var_name, method)
  var_val <- int_var(x, var_name, method)
  sd_val <- sqrt(var_val)
  
  # Handle division by zero
  cv_output <- sd_val / mean_val
  cv_output[is.infinite(cv_output)] <- NA
  cv_output[is.nan(cv_output)] <- NA
  
  cv_output
}


#' @rdname interval_uncertainty
#' @export
int_dispersion <- function(x, var_name, method = "CM", ...) {
  .check_symbolic_tbl(x, "int_dispersion")
  .check_var_name(var_name, x, "int_dispersion")
  .check_interval_method(method, "int_dispersion")
  
  options <- c("CM", "VM", "QM", "SE", "FV", "EJD", "GQ", "SPT")
  at <- options %in% method
  disp_tmp <- matrix(0, nrow = length(options), ncol = length(var_name))
  
  idata <- symbolic_tbl_to_idata(x[, var_name])
  n <- nrow(idata)
  
  # Dispersion index: Average absolute deviation from median
  compute_dispersion <- function(X_tmp) {
    if (length(var_name) == 1) {
      med <- median(X_tmp)
      disp <- mean(abs(X_tmp - med))
    } else {
      disp <- numeric(ncol(X_tmp))
      for (j in 1:ncol(X_tmp)) {
        med <- median(X_tmp[, j])
        disp[j] <- mean(abs(X_tmp[, j] - med))
      }
    }
    disp
  }
  
  transforms <- .get_interval_transforms(idata, at)
  for (nm in names(transforms)) {
    idx <- which(options == nm)
    disp_tmp[idx, ] <- compute_dispersion(transforms[[nm]])
  }
  
  if (at[6] | at[7] | at[8]) {  # EJD, GQ, SPT
    X_tmp <- if (!is.null(transforms$CM)) transforms$CM else Interval_to_Center(idata)
    disp_tmp[6, ] <- disp_tmp[7, ] <- disp_tmp[8, ] <- compute_dispersion(X_tmp)
  }
  
  disp_output <- matrix(disp_tmp[at, ], 
                        nrow = length(method), 
                        ncol = length(var_name))
  
  if (is.numeric(var_name)) {
    colnames(disp_output) <- colnames(x)[var_name]
  } else {
    colnames(disp_output) <- var_name
  }
  rownames(disp_output) <- options[at]
  
  disp_output
}


#' @rdname interval_uncertainty
#' @export
int_imprecision <- function(x, var_name, ...) {
  .check_symbolic_tbl(x, "int_imprecision")
  .check_var_name(var_name, x, "int_imprecision")
  
  idata <- symbolic_tbl_to_idata(x[, var_name])
  
  # Imprecision = Average interval width / Average interval center
  # Normalized measure of interval size
  widths <- idata[, , 2] - idata[, , 1]
  centers <- (idata[, , 1] + idata[, , 2]) / 2
  
  if (length(var_name) == 1) {
    widths <- as.vector(widths)
    centers <- as.vector(centers)
    avg_width <- mean(widths)
    avg_center <- mean(abs(centers))
    if (avg_center == 0) {
      imprecision <- avg_width
    } else {
      imprecision <- avg_width / avg_center
    }
    imprecision <- matrix(imprecision, nrow = 1, ncol = 1)
  } else {
    imprecision <- numeric(length(var_name))
    for (j in seq_along(var_name)) {
      avg_width <- mean(widths[, j])
      avg_center <- mean(abs(centers[, j]))
      if (avg_center == 0) {
        imprecision[j] <- avg_width
      } else {
        imprecision[j] <- avg_width / avg_center
      }
    }
    imprecision <- matrix(imprecision, nrow = 1)
  }
  
  if (is.numeric(var_name)) {
    colnames(imprecision) <- colnames(x)[var_name]
  } else {
    colnames(imprecision) <- var_name
  }
  rownames(imprecision) <- "Imprecision"
  
  imprecision
}


#' @rdname interval_uncertainty
#' @export
int_granularity <- function(x, var_name, ...) {
  .check_symbolic_tbl(x, "int_granularity")
  .check_var_name(var_name, x, "int_granularity")
  
  idata <- symbolic_tbl_to_idata(x[, var_name])
  
  # Granularity = CV of interval widths
  # Measures how variable the interval sizes are
  widths <- idata[, , 2] - idata[, , 1]
  
  if (length(var_name) == 1) {
    widths <- as.vector(widths)
    mean_width <- mean(widths)
    sd_width <- sd(widths)
    if (mean_width == 0) {
      granularity <- 0
    } else {
      granularity <- sd_width / mean_width
    }
    granularity <- matrix(granularity, nrow = 1, ncol = 1)
  } else {
    granularity <- numeric(length(var_name))
    for (j in seq_along(var_name)) {
      mean_width <- mean(widths[, j])
      sd_width <- sd(widths[, j])
      if (mean_width == 0) {
        granularity[j] <- 0
      } else {
        granularity[j] <- sd_width / mean_width
      }
    }
    granularity <- matrix(granularity, nrow = 1)
  }
  
  if (is.numeric(var_name)) {
    colnames(granularity) <- colnames(x)[var_name]
  } else {
    colnames(granularity) <- var_name
  }
  rownames(granularity) <- "Granularity"
  
  granularity
}


#' @rdname interval_uncertainty
#' @export
int_uniformity <- function(x, var_name, ...) {
  .check_symbolic_tbl(x, "int_uniformity")
  .check_var_name(var_name, x, "int_uniformity")
  
  # Uniformity = 1 - Granularity
  # Returns 1 for uniform interval widths, lower for varying widths
  gran <- int_granularity(x, var_name, ...)
  uniformity <- 1 / (1 + gran)  # Bounded between 0 and 1
  
  rownames(uniformity) <- "Uniformity"
  uniformity
}


#' @rdname interval_uncertainty
#' @export
int_information_content <- function(x, var_name, method = "CM", ...) {
  .check_symbolic_tbl(x, "int_information_content")
  .check_var_name(var_name, x, "int_information_content")
  
  # Information content = Entropy / log2(n)
  # Normalized entropy (between 0 and 1)
  n <- nrow(x)
  entropy <- int_entropy(x, var_name, method, base = 2, ...)
  
  max_entropy <- log2(n)
  if (max_entropy == 0) {
    info_content <- entropy * 0
  } else {
    info_content <- entropy / max_entropy
  }
  
  info_content
}

Any scripts or data that you put into this service are public.

dataSDA documentation built on June 12, 2026, 9:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

dataSDA
Datasets and Basic Statistics for Symbolic Data Analysis

R/interval_uncertainty.R
In dataSDA: Datasets and Basic Statistics for Symbolic Data Analysis

Defines functions int_information_content int_uniformity int_granularity int_imprecision int_dispersion int_cv int_entropy

Documented in int_cv int_dispersion int_entropy int_granularity int_imprecision int_information_content int_uniformity

Try the dataSDA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

dataSDA Datasets and Basic Statistics for Symbolic Data Analysis

R/interval_uncertainty.R In dataSDA: Datasets and Basic Statistics for Symbolic Data Analysis

Defines functions int_information_content int_uniformity int_granularity int_imprecision int_dispersion int_cv int_entropy

Documented in int_cv int_dispersion int_entropy int_granularity int_imprecision int_information_content int_uniformity

Try the dataSDA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

dataSDA
Datasets and Basic Statistics for Symbolic Data Analysis

R/interval_uncertainty.R
In dataSDA: Datasets and Basic Statistics for Symbolic Data Analysis