R/computation.R
In thisutils: Collection of Utility Functions for Data Analysis and Computing

Documented in check_sparsity normalization rescale r_square simulate_sparse_matrix

#' @title Generate a simulated sparse matrix
#'
#' @description This function generates a sparse matrix with a specified number of rows and columns,
#' a given sparsity level, and a distribution function for the non-zero values.
#'
#' @md
#' @param nrow Number of rows in the matrix.
#' @param ncol Number of columns in the matrix.
#' @param sparsity Proportion of zero elements (sparsity level).
#' Default is 0.95, meaning 95% of elements are zero (5% are non-zero).
#' @param distribution_fun Function to generate non-zero values.
#' @param decimal Numeric value, default is *`0`*.
#' Controls the number of decimal places in the generated values.
#' If set to *`0`*, values will be integers.
#' When decimal > 0, random decimal parts are uniformly distributed across the full range.
#' @param seed Random seed for reproducibility.
#'
#' @return A sparse matrix of class "dgCMatrix"
#' @export
#'
#' @examples
#' simulate_sparse_matrix(1000, 500) |>
#'   check_sparsity()
#'
#' simulate_sparse_matrix(10, 10, decimal = 1)
#' simulate_sparse_matrix(10, 10, decimal = 5)
simulate_sparse_matrix <- function(
    nrow,
    ncol,
    sparsity = 0.95,
    distribution_fun = function(n) stats::rpois(n, lambda = 0.5) + 1,
    decimal = 0,
    seed = 1) {
  set.seed(seed)

  nnz <- round(nrow * ncol * (1 - sparsity))

  total_positions <- nrow * ncol
  if (nnz > total_positions) {
    nnz <- total_positions
  }

  positions <- sample(total_positions, nnz, replace = FALSE)

  i <- ((positions - 1) %% nrow) + 1
  j <- ((positions - 1) %/% nrow) + 1

  x <- distribution_fun(nnz)

  if (decimal > 0) {
    decimal_part <- stats::runif(nnz, min = 0, max = 1)
    x <- x + decimal_part
  }

  x <- round(x, decimal)

  Matrix::sparseMatrix(
    i = i,
    j = j,
    x = x,
    dims = c(nrow, ncol),
    dimnames = list(
      paste0("row_", 1:nrow),
      paste0("col_", 1:ncol)
    )
  )
}

#' @title Check sparsity of matrix
#'
#' @param x A matrix.
#'
#' @return Sparsity of matrix
#' @export
check_sparsity <- function(x) {
  if (methods::is(x, "sparseMatrix")) {
    non_zero_count <- Matrix::nnzero(x)
    total_counts <- prod(dim(x))
  } else {
    non_zero_count <- sum(x != 0)
    total_counts <- length(x)
  }

  sparsity_ratio <- non_zero_count / total_counts

  sparsity <- 1 - sparsity_ratio

  return(sparsity)
}

.rmse <- function(true, pred) {
  return(
    sqrt(mean((true - pred)^2))
  )
}

.sse <- function(y_true, y_pred) {
  return(
    sum((y_true - y_pred)**2)
  )
}

.rse <- function(y_true, y_pred) {
  return(
    .sse(y_true, y_pred) / .sse(y_true, mean(y_true))
  )
}

#' @title coefficient of determination (\eqn{R^2})
#'
#' @md
#' @param y_true A numeric vector with ground truth values.
#' @param y_pred A numeric vector with predicted values.
#'
#' @return \eqn{R^2} value
#'
#' @export
#'
#' @examples
#' y <- rnorm(100)
#' y_pred <- y + rnorm(100, sd = 0.5)
#' r_square(y, y_pred)
r_square <- function(y_true, y_pred) {
  return(
    1 - .rse(y_true, y_pred)
  )
}

#' @title Normalize numeric vector
#'
#' @param x Input numeric vector.
#' @param method Method used for normalization.
#' @param na_rm Whether to remove `NA` values,
#' and if setting TRUE, using `0` instead.
#' @param ... Parameters for other methods.
#'
#' @md
#' @return Normalized numeric vector
#' @export
#'
#' @examples
#' x <- c(runif(2), NA, -runif(2))
#' x
#' normalization(x, method = "max_min")
#' normalization(x, method = "maximum")
#' normalization(x, method = "sum")
#' normalization(x, method = "softmax")
#' normalization(x, method = "z_score")
#' normalization(x, method = "mad")
#' normalization(x, method = "unit_vector")
#' normalization(x, method = "unit_vector", na_rm = FALSE)
normalization <- function(
    x,
    method = "max_min",
    na_rm = TRUE,
    ...) {
  method <- match.arg(
    method,
    c(
      "max_min",
      "maximum",
      "sum",
      "softmax",
      "z_score",
      "mad",
      "unit_vector",
      "robust_scale"
    )
  )
  na_index <- which(is.na(x))
  x[na_index] <- 0
  x <- switch(
    EXPR = method,
    "max_min" = {
      (x - min(x)) / (max(x) - min(x))
    },
    "maximum" = {
      x / max(abs(x))
    },
    "sum" = {
      x / sum(abs(x))
    },
    "softmax" = {
      # exp(x - max(x)) / sum(exp(x - max(x)))
      temp <- (x - mean(x)) / stats::sd(x)
      exp(temp) / sum(exp(temp))
    },
    "z_score" = {
      (x - mean(x)) / stats::sd(x)
    },
    "mad" = {
      (x - stats::median(x)) / stats::mad(x)
    },
    "unit_vector" = {
      x / sqrt(sum(x^2))
    }
  )

  if (!na_rm) {
    x[na_index] <- NA
  }

  return(x)
}

#' @title Rescale numeric vector
#'
#' @param x A numeric vector.
#' @param from The range of the original data.
#' @param to The range of the rescaled data.
#'
#' @return A numeric vector with rescaled values.
#' @export
#'
#' @examples
#' x <- rnorm(10)
#' rescale(x)
#' rescale(x, from = c(0, 1))
#' rescale(x, to = c(0, 2))
rescale <- function(
    x,
    from = range(x, na.rm = TRUE, finite = TRUE),
    to = c(0, 1)) {
  if (zero_range(from) || zero_range(to)) {
    return(ifelse(is.na(x), NA, mean(to)))
  } else {
    return((x - from[1]) / diff(from) * diff(to) + to[1])
  }
}

zero_range <- function(
    x,
    tol = 1000 * .Machine$double.eps) {
  if (length(x) == 1) {
    return(TRUE)
  }
  if (length(x) != 2) {
    log_message(
      "x must be length 1 or 2",
      message_type = "error"
    )
  }
  if (any(is.na(x))) {
    return(NA)
  }
  if (x[1] == x[2]) {
    return(TRUE)
  }
  if (all(is.infinite(x))) {
    return(FALSE)
  }
  m <- min(abs(x))
  if (m == 0) {
    return(FALSE)
  }
  abs((x[1] - x[2]) / m) < tol
}

Any scripts or data that you put into this service are public.

thisutils documentation built on July 3, 2025, 9:09 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

thisutils
Collection of Utility Functions for Data Analysis and Computing

R/computation.R
In thisutils: Collection of Utility Functions for Data Analysis and Computing

Defines functions zero_range rescale normalization r_square .rse .sse .rmse check_sparsity simulate_sparse_matrix

Documented in check_sparsity normalization rescale r_square simulate_sparse_matrix

Try the thisutils package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

thisutils Collection of Utility Functions for Data Analysis and Computing

R/computation.R In thisutils: Collection of Utility Functions for Data Analysis and Computing

Defines functions zero_range rescale normalization r_square .rse .sse .rmse check_sparsity simulate_sparse_matrix

Documented in check_sparsity normalization rescale r_square simulate_sparse_matrix

Try the thisutils package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

thisutils
Collection of Utility Functions for Data Analysis and Computing

R/computation.R
In thisutils: Collection of Utility Functions for Data Analysis and Computing