Nothing
#' @useDynLib splikit, .registration = TRUE
#' @importFrom Rcpp evalCpp
#' @importFrom methods as
#' @importFrom stats na.omit
NULL
# Declare global variables used in data.table operations to avoid R CMD check NOTEs
utils::globalVariables(c("unique_mapped", "i", "j", "x_1", "x_tot", "x_2", "group_id", "ID"))
#' Compute Pseudo-Correlation Using Beta-Binomial Model
#'
#' This function calculates a pseudo R²-like correlation metric using a beta-binomial model
#' implemented in C++. It takes in a data matrix `ZDB_matrix` and two model matrices
#' for inclusion and exclusion, respectively. The function now supports both sparse and dense
#' matrices for m1 and m2, and allows selection between Cox-Snell and Nagelkerke R² metrics.
#'
#' @param ZDB_matrix A numeric dense matrix of shape (events x samples). Should have rownames representing events.
#' @param m1_inclusion A numeric matrix (dense or sparse) of the same number of rows as `ZDB_matrix`, representing inclusion features.
#' @param m2_exclusion A numeric matrix (dense or sparse) of the same number of rows as `ZDB_matrix`, representing exclusion features.
#' @param metric Character string specifying which R² metric to compute. Options are "CoxSnell" (default) or "Nagelkerke".
#' @param suppress_warnings Logical. If \code{TRUE} (default), suppresses warnings during computation (e.g., due to ill-conditioned inputs).
#' @param verbose Logical. If \code{TRUE}, prints progress and informational messages. Default is \code{FALSE}.
#'
#' @return A `data.table` with the following columns:
#' \describe{
#' \item{event}{The event names from `ZDB_matrix` rownames.}
#' \item{pseudo_correlation}{The computed pseudo R² correlation values using the specified metric.}
#' \item{null_distribution}{Null correlation values from a permuted version of `ZDB_matrix`.}
#' }
#'
#' @examples
#' \donttest{
#' set.seed(42)
#' # get the m1 object
#' junction_abundance_object <- load_toy_SJ_object()
#' m1_obj <- make_m1(junction_ab_object = junction_abundance_object)
#'
#' # obtaining the m1 and eventdata
#' m1_inclusion <- m1_obj$m1_inclusion_matrix
#' eventdata <- m1_obj$event_data
#' m2_exclusion <- make_m2(m1_inclusion, eventdata)
#'
#' # creating a dummy ZDB
#' ZDB_matrix <- matrix(rnorm(n = (nrow(m1_inclusion) * ncol(m1_inclusion)), sd = 7),
#' nrow = nrow(m1_inclusion),
#' ncol = ncol(m1_inclusion))
#' rownames(ZDB_matrix) <- rownames(m1_inclusion)
#'
#' # m1 and m2 can now be either sparse or dense matrices
#' # Example with dense matrices (backward compatible)
#' m1_dense <- as.matrix(m1_inclusion)
#' m2_dense <- as.matrix(m2_exclusion)
#' pseudo_r_square_cox <- get_pseudo_correlation(ZDB_matrix, m1_dense, m2_dense)
#' print(pseudo_r_square_cox)
#'
#' # Example with sparse matrices (more memory efficient)
#' pseudo_r_square_sparse <- get_pseudo_correlation(ZDB_matrix, m1_inclusion, m2_exclusion)
#'
#' # Example using Nagelkerke R-squared instead of Cox-Snell
#' pseudo_r_square_nagel <- get_pseudo_correlation(ZDB_matrix, m1_inclusion, m2_exclusion,
#' metric = "Nagelkerke")
#' print(pseudo_r_square_nagel)
#' }
#'
#' @export
get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion = NULL, m2_exclusion = NULL, metric = "CoxSnell", suppress_warnings = TRUE, verbose = FALSE) {
# Handle SplikitObject input
if (inherits(ZDB_matrix, "SplikitObject")) {
# First arg is SplikitObject, second should be ZDB_matrix
obj <- ZDB_matrix
if (is.null(m1_inclusion) || !is.matrix(m1_inclusion)) {
stop("When using SplikitObject, provide ZDB_matrix as second argument.", call. = FALSE)
}
ZDB_matrix <- m1_inclusion
m1_inclusion <- obj$m1
m2_exclusion <- obj$m2
if (is.null(m2_exclusion)) {
stop("SplikitObject has no M2 matrix. Call obj$makeM2() first.", call. = FALSE)
}
}
# Check m1 and m2 are provided
if (is.null(m1_inclusion) || is.null(m2_exclusion)) {
stop("m1_inclusion and m2_exclusion are required.", call. = FALSE)
}
# Validate metric parameter
metric <- match.arg(metric, choices = c("CoxSnell", "Nagelkerke"))
# Check ZDB_matrix (must be dense)
if (!is.matrix(ZDB_matrix)) stop("ZDB_matrix must be a dense matrix.")
# Check m1 and m2 (can be sparse or dense)
if (!is.matrix(m1_inclusion) && !inherits(m1_inclusion, "Matrix")) {
stop("m1_inclusion must be either a dense matrix or a sparse Matrix.")
}
if (!is.matrix(m2_exclusion) && !inherits(m2_exclusion, "Matrix")) {
stop("m2_exclusion must be either a dense matrix or a sparse Matrix.")
}
if (nrow(m1_inclusion) != nrow(ZDB_matrix)) {
stop("m1_inclusion must have the same number of rows as ZDB_matrix.")
}
if (nrow(m2_exclusion) != nrow(ZDB_matrix)) {
stop("m2_exclusion must have the same number of rows as ZDB_matrix.")
}
if (verbose) message("Input dimension check passed. Proceeding with computation.")
# Define the computation function
run_computation <- function() {
# Determine which C++ function to call based on matrix types
is_m1_sparse <- inherits(m1_inclusion, "Matrix")
is_m2_sparse <- inherits(m2_exclusion, "Matrix")
if (!is_m1_sparse && !is_m2_sparse) {
# Both dense - ensure they are matrices
correls <- cppBetabinPseudoR2(Z = ZDB_matrix,
m1 = as.matrix(m1_inclusion),
m2 = as.matrix(m2_exclusion),
metric = metric)
} else if (is_m1_sparse && is_m2_sparse) {
# Both sparse
correls <- cppBetabinPseudoR2_sparse(Z = ZDB_matrix,
m1 = m1_inclusion,
m2 = m2_exclusion,
metric = metric)
} else if (is_m1_sparse && !is_m2_sparse) {
# m1 sparse, m2 dense
correls <- cppBetabinPseudoR2_mixed1(Z = ZDB_matrix,
m1 = m1_inclusion,
m2 = as.matrix(m2_exclusion),
metric = metric)
} else {
# m1 dense, m2 sparse
correls <- cppBetabinPseudoR2_mixed2(Z = ZDB_matrix,
m1 = as.matrix(m1_inclusion),
m2 = m2_exclusion,
metric = metric)
}
if (is.null(rownames(ZDB_matrix))) {
warning("ZDB_matrix has no row names; assigning default event names.")
events <- paste0("Event_", seq_len(nrow(ZDB_matrix)))
} else {
events <- rownames(ZDB_matrix)
}
# Calculate null distribution with the same metric and matrix types
if (!is_m1_sparse && !is_m2_sparse) {
null_dist <- cppBetabinPseudoR2(Z = ZDB_matrix[, sample.int(ncol(ZDB_matrix))],
m1 = as.matrix(m1_inclusion),
m2 = as.matrix(m2_exclusion),
metric = metric)
} else if (is_m1_sparse && is_m2_sparse) {
null_dist <- cppBetabinPseudoR2_sparse(Z = ZDB_matrix[, sample.int(ncol(ZDB_matrix))],
m1 = m1_inclusion,
m2 = m2_exclusion,
metric = metric)
} else if (is_m1_sparse && !is_m2_sparse) {
null_dist <- cppBetabinPseudoR2_mixed1(Z = ZDB_matrix[, sample.int(ncol(ZDB_matrix))],
m1 = m1_inclusion,
m2 = as.matrix(m2_exclusion),
metric = metric)
} else {
null_dist <- cppBetabinPseudoR2_mixed2(Z = ZDB_matrix[, sample.int(ncol(ZDB_matrix))],
m1 = as.matrix(m1_inclusion),
m2 = m2_exclusion,
metric = metric)
}
data.table::data.table(
event = events,
pseudo_correlation = correls,
null_distribution = null_dist
)
}
# Run computation with or without warning suppression
if (suppress_warnings) {
results <- suppressWarnings(run_computation())
} else {
results <- run_computation()
}
results <- na.omit(results)
if (verbose) message("Computation completed successfully.")
return(results)
}
#' Calculate Row-wise Variance for Dense or Sparse Matrices
#'
#' @description
#' Efficiently computes the variance of each row for either a base R dense matrix
#' or a sparse dgCMatrix, via a single Rcpp entry point. Logs progress messages
#' to the R console.
#' @param M A numeric matrix (base R matrix) or a sparse matrix of class \code{"dgCMatrix"}.
#' @param verbose Logical. If \code{TRUE}, prints progress and informational messages. Default is \code{FALSE}.
#' @return A numeric vector of length \code{nrow(M)} containing the variance of each row.
#' @details
#' Dispatches in C++ between dense and sparse implementations to avoid unnecessary
#' overhead or external dependencies. Uses compressed-column traversal for sparse inputs.
#' @examples
#' library(Matrix)
#' # Dense example
#' dm <- matrix(rnorm(1000), nrow = 100)
#' get_rowVar(dm)
#' # Sparse example
#' sm <- rsparsematrix(100, 10, density = 0.1)
#' get_rowVar(sm)
#' @note
#' Only 32-bit integer indices are supported, due to limitations in R's internal matrix representations.
#' This function will not work with matrices that exceed the 32-bit integer indexing range.
#' @export
get_rowVar <- function(M, verbose = FALSE) {
if (!(is.matrix(M) || inherits(M, "dgCMatrix"))) {
stop("`M` must be either a dense numeric matrix or a dgCMatrix.")
}
if (verbose) message("[get_rowVar] Starting computation")
result <- rowVariance_cpp(M)
if (verbose) message("[get_rowVar] Computation finished")
result
}
#' Compute Average Silhouette Width with Logging
#'
#' Computes the average silhouette width for a clustering solution using Euclidean distance.
#'
#' @param X A numeric matrix where rows are observations and columns are features.
#' @param cluster_assignments An integer vector of cluster assignments, which must be the same length as the number of rows in \code{X}.
#' @param n_threads Number of threads to use for parallel processing.
#' @param verbose Logical. If \code{TRUE}, prints progress and informational messages. Default is \code{FALSE}.
#'
#' @note This process can be very slow for large matrices if single-threaded. Use multiple threads to take advantage of parallel computation for significantly faster results.
#'
#' @return A single numeric value: the average silhouette score.
#'
#' @examples
#' # Preparing the inputs
#' set.seed(42)
#' pc_matrix <- matrix(data = rnorm(n = 10000 * 15, sd = 2), nrow = 10000, ncol = 15)
#' cluster_numbers <- as.integer(runif(n = 10000, min = 1, max = 10))
#'
#' # Getting the mean silhouette score
#' n_threads <- parallel::detectCores()
#' score <- get_silhouette_mean(pc_matrix, cluster_numbers, n_threads)
#' print(score)
#'
#' @export
get_silhouette_mean <- function(X, cluster_assignments, n_threads = 1, verbose = FALSE) {
stopifnot(is.matrix(X), is.numeric(X))
stopifnot(is.integer(cluster_assignments) || is.numeric(cluster_assignments))
stopifnot(nrow(X) == length(cluster_assignments))
if (verbose) message("[silhouette_avg] Starting computation...")
if (verbose) message("[silhouette_avg] Using ", n_threads, " threads...")
score <- silhouette_avg(X, as.integer(cluster_assignments), as.integer(n_threads))
return(score)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.