R/RcppExports.R

Defines functions bd_wproduct bdtCrossprod bdScalarwproduct bdCrossprod bdCorr_matrix bdblockSum bdblockSubstract bdblockMult bdWrite_hdf5_dimnames system_info get_cpu_cores can_allocate get_available_ram get_total_ram bdReduce_hdf5_dataset rcpp_hdf5_create_matrix rcpp_hdf5dataset_write_all rcpp_hdf5dataset_write_block rcpp_hdf5dataset_svd rcpp_hdf5dataset_read_all rcpp_hdf5dataset_subset rcpp_hdf5dataset_split rcpp_hdf5dataset_multiply_sparse rcpp_hdf5dataset_apply_function rcpp_hdf5dataset_reduce rcpp_hdf5dataset_qr rcpp_hdf5dataset_pseudoinv rcpp_hdf5dataset_pca rcpp_hdf5dataset_filter_maf rcpp_hdf5dataset_filter_low_coverage rcpp_hdf5dataset_impute_snps rcpp_hdf5dataset_normalize rcpp_hdf5dataset_tcrossprod rcpp_hdf5dataset_crossprod rcpp_hdf5dataset_multiply rcpp_hdf5_close_all_file_handles rcpp_hdf5_close_file_handles_safe rcpp_hdf5_close_file_handles rcpp_hdf5_close_at_paths rcpp_hdf5dataset_close rcpp_hdf5dataset_write_dimnames rcpp_hdf5dataset_read_dimnames rcpp_hdf5dataset_is_valid rcpp_hdf5dataset_info rcpp_hdf5dataset_dim rcpp_hdf5dataset_open rcpp_hdf5_close_all_registry rcpp_hdf5dataset_diag_scale rcpp_hdf5dataset_diag_op rcpp_hdf5dataset_diag_set rcpp_hdf5dataset_diag_get rcpp_hdf5dataset_sweep rcpp_hdf5dataset_eigen rcpp_hdf5dataset_cor rcpp_hdf5dataset_solve rcpp_hdf5dataset_chol rcpp_hdf5dataset_bind rcpp_hdf5dataset_div_ew rcpp_hdf5dataset_mul_ew rcpp_hdf5dataset_subtract rcpp_hdf5dataset_add rcpp_hdf5dataset_scalar_sd rcpp_hdf5dataset_scalar_var rcpp_hdf5dataset_scalar_max rcpp_hdf5dataset_scalar_min rcpp_hdf5dataset_scalar_mean rcpp_hdf5dataset_scalar_sum rcpp_hdf5dataset_rowSds rcpp_hdf5dataset_rowVars rcpp_hdf5dataset_rowMaxs rcpp_hdf5dataset_rowMins rcpp_hdf5dataset_rowMeans rcpp_hdf5dataset_rowSums rcpp_hdf5dataset_colSds rcpp_hdf5dataset_colVars rcpp_hdf5dataset_colMaxs rcpp_hdf5dataset_colMins rcpp_hdf5dataset_colMeans rcpp_hdf5dataset_colSums bdpseudoinv_hdf5 bdpseudoinv bdmove_hdf5_dataset bdImportTextFile_hdf5 bdgetDatasetsList_hdf5 bdCreate_hdf5_matrix bdCreate_hdf5_group bdapply_Function_hdf5

Documented in bdapply_Function_hdf5 bdblockMult bdblockSubstract bdblockSum bdCorr_matrix bdCreate_hdf5_group bdCreate_hdf5_matrix bdCrossprod bdgetDatasetsList_hdf5 bdImportTextFile_hdf5 bdmove_hdf5_dataset bdpseudoinv bdpseudoinv_hdf5 bdReduce_hdf5_dataset bdScalarwproduct bdtCrossprod bd_wproduct bdWrite_hdf5_dimnames can_allocate get_available_ram get_cpu_cores get_total_ram rcpp_hdf5_close_all_file_handles rcpp_hdf5_close_all_registry rcpp_hdf5_close_at_paths rcpp_hdf5_close_file_handles rcpp_hdf5_close_file_handles_safe rcpp_hdf5_create_matrix rcpp_hdf5dataset_add rcpp_hdf5dataset_close rcpp_hdf5dataset_colMaxs rcpp_hdf5dataset_colMeans rcpp_hdf5dataset_colMins rcpp_hdf5dataset_colSds rcpp_hdf5dataset_colSums rcpp_hdf5dataset_colVars rcpp_hdf5dataset_crossprod rcpp_hdf5dataset_dim rcpp_hdf5dataset_div_ew rcpp_hdf5dataset_info rcpp_hdf5dataset_is_valid rcpp_hdf5dataset_mul_ew rcpp_hdf5dataset_multiply rcpp_hdf5dataset_open rcpp_hdf5dataset_read_all rcpp_hdf5dataset_read_dimnames rcpp_hdf5dataset_rowMaxs rcpp_hdf5dataset_rowMeans rcpp_hdf5dataset_rowMins rcpp_hdf5dataset_rowSds rcpp_hdf5dataset_rowSums rcpp_hdf5dataset_rowVars rcpp_hdf5dataset_scalar_max rcpp_hdf5dataset_scalar_mean rcpp_hdf5dataset_scalar_min rcpp_hdf5dataset_scalar_sd rcpp_hdf5dataset_scalar_sum rcpp_hdf5dataset_scalar_var rcpp_hdf5dataset_subset rcpp_hdf5dataset_subtract rcpp_hdf5dataset_tcrossprod rcpp_hdf5dataset_write_all rcpp_hdf5dataset_write_block rcpp_hdf5dataset_write_dimnames system_info

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Apply function to different datasets inside a group
#'
#' This function provides a unified interface for applying various mathematical
#' operations to HDF5 datasets. It supports both single-dataset operations and
#' operations between multiple datasets.
#' 
#' @param filename Character array, indicating the name of the file to create
#' @param group Character array, indicating the input group where the data set
#'        to be imputed is
#' @param datasets Character array, indicating the input datasets to be used
#' @param outgroup Character array, indicating group where the data set will 
#'        be saved after imputation. If NULL, output dataset is stored 
#'        in the same input group
#' @param func Character array, function to be applied:
#'        - "QR": QR decomposition via bdQR()
#'        - "CrossProd": Cross product via bdCrossprod()
#'        - "tCrossProd": Transposed cross product via bdtCrossprod()
#'        - "invChol": Inverse via Cholesky decomposition
#'        - "blockmult": Matrix multiplication 
#'        - "CrossProd_double": Cross product with two matrices
#'        - "tCrossProd_double": Transposed cross product with two matrices
#'        - "solve": Matrix equation solving
#'        - "sdmean": Standard deviation and mean computation
#' @param b_group Optional character array indicating the input group for
#'        secondary datasets (used in two-matrix operations)
#' @param b_datasets Optional character array indicating the secondary datasets
#'        for two-matrix operations
#' @param overwrite Optional boolean. If true, overwrites existing results
#' @param transp_dataset Optional boolean. If true, transposes first dataset
#' @param transp_bdataset Optional boolean. If true, transposes second dataset
#' @param fullMatrix Optional boolean for Cholesky operations. If true, stores
#'        complete matrix; if false, stores only lower triangular
#' @param byrows Optional boolean for statistical operations. If true, computes
#'        by rows; if false, by columns
#' @param threads Optional integer specifying number of threads for parallel processing
#' 
#' @return Modifies the HDF5 file in place, adding computed results
#' 
#' @details
#'//' For matrix multiplication operations (`blockmult`, `CrossProd_double`, `tCrossProd_double`),
#' the `datasets` and `b_datasets` vectors must have the same length. Each operation is performed
#' element-wise between the corresponding pairs of datasets. Specifically, the `b_datasets` vector
#' defines the second operand for each matrix multiplication. For example, if
#' `datasets = {"A1", "A2", "A3"}` and `b_datasets = {"B1", "B2", "B3"}`, the operations
#' executed are: `A1 %*% B1`, `A2 %*% B2`, and `A3 %*% B3`.
#' 
#' Example: If `datasets = {"A1", "A2", "A3"}` and `b_datasets = {"B1", "B2", "B3"}`,
#' the function computes: `A1 %*% B1`, `A2 %*% B2`, and `A3 %*% B3`
#' 
#' @examples
#' \donttest{
#' fn <- tempfile(fileext = ".h5")
#' Y <- matrix(rnorm(100), 10, 10)
#' X <- matrix(rnorm(100), 10, 10)
#' Z <- matrix(rnorm(100), 10, 10)
#' 
#' hdf5_create_matrix(fn, "data/Y", data = Y)
#' hdf5_create_matrix(fn, "data/X", data = X)
#' hdf5_create_matrix(fn, "data/Z", data = Z)
#' 
#' dsets <- list_datasets(fn, group = "data")
#' 
#' bdapply_Function_hdf5(filename = fn,
#'                       group = "data", datasets = dsets,
#'                       outgroup = "QR", func = "QR",
#'                       overwrite = TRUE)
#' hdf5_close_all()
#' unlink(fn)
#' }
#' 
#' @note Performance is optimized through:
#'       - Block-wise processing for large datasets
#'       - Parallel computation where applicable
#'       - Memory-efficient matrix operations
#' 
#' @export
bdapply_Function_hdf5 <- function(filename, group, datasets, outgroup, func, b_group = NULL, b_datasets = NULL, overwrite = FALSE, transp_dataset = FALSE, transp_bdataset = FALSE, fullMatrix = FALSE, byrows = FALSE, threads = 2L) {
    invisible(.Call('_BigDataStatMeth_bdapply_Function_hdf5', PACKAGE = 'BigDataStatMeth', filename, group, datasets, outgroup, func, b_group, b_datasets, overwrite, transp_dataset, transp_bdataset, fullMatrix, byrows, threads))
}

#' Create Group in an HDF5 File
#'
#' @description
#' Create a (nested) group inside an HDF5 file. The operation is
#' idempotent: if the group already exists, no error is raised.
#'
#' @details
#' Intermediate groups are created when needed. The HDF5 file must
#' exist prior to the call (create it with a writer function).
#'
#' @param filename Character string. Path to the HDF5 file.
#' @param group Character string. Group path to create
#'   (e.g., `"MGCCA_OUT/scores"`).
#'
#' @return List with components:
#' \describe{
#'   \item{fn}{Character string with the HDF5 filename}
#'   \item{gr}{Character string with the full group path created within the 
#'   HDF5 file}
#' }
#'
#' @examples
#' \donttest{
#' fn <- tempfile(fileext = ".h5")
#' hdf5_create_matrix(fn, "tmp/seed", data = matrix(0, 1, 1))
#' bdCreate_hdf5_group(fn, "MGCCA_OUT/scores")
#' hdf5_close_all()
#' unlink(fn)
#' }
#'
#' @references
#' The HDF Group. HDF5 User's Guide.
#'
#' @seealso
#' \code{\link{hdf5_create_matrix}}.
#'
#' @export
bdCreate_hdf5_group <- function(filename, group) {
    .Call('_BigDataStatMeth_bdCreate_hdf5_group', PACKAGE = 'BigDataStatMeth', filename, group)
}

#' Create HDF5 data file and write data to it
#'
#' Creates a HDF5 file with numerical data matrix,
#' 
#' @param filename, character array indicating the name of the file to create
#' @param object numerical data matrix
#' @param group, character array indicating folder name to put the matrix in HDF5 file
#' @param dataset, character array indicating the dataset name to store the matrix data
#' @param transp boolean, if trans=true matrix is stored transposed in HDF5 file
#' @param overwriteFile, optional boolean by default overwriteFile = false, if 
#' true and file exists, removes old file and creates a new file with de dataset 
#' data.
#' @param overwriteDataset, optional boolean by default overwriteDataset = false,  
#' if true and dataset exists, removes old dataset and creates a new dataset.
#' @param unlimited, optional boolean by default unlimited = false, if true 
#' creates a dataset that can growth.
#' @return List with components:
#' \describe{
#'   \item{fn}{Character string with the HDF5 filename}
#'   \item{ds}{Character string with the full dataset path to the created matrix (group/dataset)}
#' }
#' 
#' @examples
#' \donttest{
#'     fn <- tempfile(fileext = ".h5")
#'     matA <- matrix(c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15), nrow = 3, byrow = TRUE)
#'     bdCreate_hdf5_matrix(filename = fn,
#'                          object = matA, group = "datasets",
#'                          dataset = "datasetA", transp = FALSE,
#'                          overwriteFile = TRUE,
#'                          overwriteDataset = TRUE,
#'                          unlimited = FALSE)
#'     hdf5_close_all()
#'     unlink(fn)
#' }
#' 
#' @export
bdCreate_hdf5_matrix <- function(filename, object, group = NULL, dataset = NULL, transp = NULL, overwriteFile = NULL, overwriteDataset = NULL, unlimited = NULL) {
    .Call('_BigDataStatMeth_bdCreate_hdf5_matrix', PACKAGE = 'BigDataStatMeth', filename, object, group, dataset, transp, overwriteFile, overwriteDataset, unlimited)
}

#' List Datasets in HDF5 Group
#'
#' @description
#' Retrieves a list of all datasets within a specified HDF5 group, with optional
#' filtering by prefix or suffix.
#'
#' @details
#' This function provides flexible dataset listing capabilities for HDF5 files.
#' Key features:
#' 
#' * Listing options:
#'   - All datasets in a group
#'   - Datasets matching a prefix
#'   - Datasets matching a suffix
#' 
#' * Implementation features:
#'   - Safe HDF5 file operations
#'   - Memory-efficient implementation
#'   - Comprehensive error handling
#'   - Read-only access to files
#'
#' The function opens the HDF5 file in read-only mode to ensure data safety.
#'
#' @param filename  Character string. Path to the HDF5 file.
#' @param group     Character string or \code{NULL}. Group path within the
#'   HDF5 file. If \code{NULL} (default), the entire file is traversed
#'   recursively and dataset paths are returned relative to the root
#'   (e.g. \code{"INPUT/A"}, \code{"RESULTS/SVD/d"}).
#' @param prefix    Optional character string. Only return datasets whose
#'   name starts with this prefix.
#' @param recursive Logical. If \code{TRUE}, recurse into subgroups and
#'   return full relative paths. Ignored when \code{group = NULL} (always
#'   recursive). Default \code{FALSE}.
#'
#' @return Character vector containing dataset names.
#'
#' @examples
#' \donttest{
#' fn <- tempfile(fileext = ".h5")
#' X  <- hdf5_create_matrix(fn, "INPUT/A",  data = matrix(rnorm(100), 10, 10))
#' Y  <- hdf5_create_matrix(fn, "INPUT/B",  data = matrix(rnorm(100), 10, 10))
#' Z  <- hdf5_create_matrix(fn, "RESULTS/C",data = matrix(rnorm(100), 10, 10))
#'
#' # All datasets in the file (recursive from root)
#' bdgetDatasetsList_hdf5(fn)
#'
#' # Only datasets in INPUT group
#' bdgetDatasetsList_hdf5(fn, group = "INPUT")
#'
#' # INPUT group, recursive (same result here, no subgroups)
#' bdgetDatasetsList_hdf5(fn, group = "INPUT", recursive = TRUE)
#'
#' # Filter by prefix
#' bdgetDatasetsList_hdf5(fn, group = "INPUT", prefix = "A")
#'
#' hdf5_close_all()
#' unlink(fn)
#' }
#'
#' @references
#' * The HDF Group. (2000-2010). HDF5 User's Guide.
#'
#' @export
bdgetDatasetsList_hdf5 <- function(filename, group = NULL, prefix = NULL, recursive = FALSE) {
    .Call('_BigDataStatMeth_bdgetDatasetsList_hdf5', PACKAGE = 'BigDataStatMeth', filename, group, prefix, recursive)
}

#' Import Text File to HDF5
#'
#' @description
#' Converts a text file (e.g., CSV, TSV) to HDF5 format, providing efficient
#' storage and access capabilities.
#'
#' @details
#' This function provides flexible text file import capabilities with support for:
#' 
#' * Input format options:
#'   - Custom field separators
#'   - Header row handling
#'   - Row names handling
#' 
#' * Processing options:
#'   - Parallel processing
#'   - Memory-efficient import
#'   - Configurable thread count
#' 
#' * File handling:
#'   - Safe file operations
#'   - Overwrite protection
#'   - Comprehensive error handling
#'
#' The function supports parallel processing for large files and provides
#' memory-efficient import capabilities.
#'
#' @param filename Character string. Path to the input text file.
#' @param outputfile Character string. Path to the output HDF5 file.
#' @param outGroup Character string. Name of the group to create in HDF5 file.
#' @param outDataset Character string. Name of the dataset to create.
#' @param sep Character string (optional). Field separator, default is "\\t".
#' @param header Logical (optional). Whether first row contains column names.
#' @param rownames Logical (optional). Whether first column contains row names.
#' @param overwrite Logical (optional). Whether to overwrite existing dataset.
#' @param paral Logical (optional). Whether to use parallel processing.
#' @param threads Integer (optional). Number of threads for parallel processing.
#' @param overwriteFile Logical (optional). Whether to overwrite existing HDF5 file.
#'
#' @return List with components:
#' \describe{
#'   \item{fn}{Character string with the HDF5 filename}
#'   \item{ds}{Character string with the full dataset path to the imported data (group/dataset)}
#'   \item{ds_rows}{Character string with the full dataset path to the row names}
#'   \item{ds_cols}{Character string with the full dataset path to the column names}
#' }
#'
#' @examples
#' \donttest{
#' 
#' hdf5_file <- tempfile(fileext = ".h5")
#' csv_file <- tempfile(fileext = ".csv")
#' 
#' # Create a test CSV file
#' data <- matrix(rnorm(100), 10, 10)
#' write.csv(data, csv_file, row.names = FALSE)
#' 
#' # Import to HDF5
#' bdImportTextFile_hdf5(
#'   filename = csv_file,
#'   outputfile = hdf5_file,
#'   outGroup = "data",
#'   outDataset = "matrix1",
#'   sep = ",",
#'   header = TRUE,
#'   overwriteFile = TRUE
#' )
#' 
#' # Cleanup
#' unlink(c(csv_file, hdf5_file))
#' }
#'
#' @references
#' * The HDF Group. (2000-2010). HDF5 User's Guide.
#'
#' @seealso
#' * \code{hdf5_create_matrix} for creating HDF5 matrices directly
#'
#' @export
bdImportTextFile_hdf5 <- function(filename, outputfile, outGroup, outDataset, sep = NULL, header = FALSE, rownames = FALSE, overwrite = FALSE, paral = NULL, threads = NULL, overwriteFile = NULL) {
    .Call('_BigDataStatMeth_bdImportTextFile_hdf5', PACKAGE = 'BigDataStatMeth', filename, outputfile, outGroup, outDataset, sep, header, rownames, overwrite, paral, threads, overwriteFile)
}

#' Move HDF5 Dataset
#'
#' @description
#' Moves an HDF5 dataset from one location to another within the same HDF5 file.
#' This function automatically handles moving associated rownames and colnames 
#' datasets, creates parent groups if needed, and updates all internal references.
#'
#' @param filename Character string. Path to the HDF5 file
#' @param source_path Character string. Current path to the dataset (e.g., "/group1/dataset1")
#' @param dest_path Character string. New path for the dataset (e.g., "/group2/new_name")
#' @param overwrite Logical. Whether to overwrite destination if it exists (default: FALSE)
#'
#' @return List with components. If an error occurs, all string values are 
#' returned as empty strings (""):
#' \describe{
#'   \item{fn}{Character string with the HDF5 filename}
#'   \item{ds}{Character string with the full dataset path to the moved dataset 
#'   in its new location (group/dataset)}
#' }
#'
#' @details
#' This function provides a high-level interface for moving datasets within HDF5 files.
#' The operation is efficient as it uses HDF5's native linking mechanism without 
#' copying actual data.
#'
#' Key features:
#' \itemize{
#'   \item Moves main dataset and associated rownames/colnames datasets
#'   \item Creates parent directory structure automatically
#'   \item Preserves all dataset attributes and properties
#'   \item Updates internal dataset references
#'   \item Efficient metadata-only operation
#'   \item Comprehensive error handling
#' }
#'
#' @section Behavior:
#' \itemize{
#'   \item If the destination parent groups don't exist, they will be created automatically
#'   \item Associated rownames and colnames datasets are moved to the same new group
#'   \item All dataset attributes and properties are preserved during the move
#'   \item The operation is atomic - either all elements move successfully or none do
#' }
#'
#' @section Requirements:
#' \itemize{
#'   \item The HDF5 file must exist and be accessible
#'   \item The source dataset must exist
#'   \item The file must not be locked by another process
#'   \item User must have read-write permissions on the file
#' }
#'
#' @examples
#' \donttest{
#' fn <- tempfile(fileext = ".h5")
#' 
#' # Create a dataset to move
#' hdf5_create_matrix(fn, "old_group/my_dataset",
#'                    data = matrix(rnorm(100), 10, 10))
#' 
#' # Move dataset to a different group
#' res <- bdmove_hdf5_dataset(fn,
#'                            source_path = "old_group/my_dataset",
#'                            dest_path   = "new_group/my_dataset")
#' 
#' # Rename dataset within the same group
#' hdf5_create_matrix(fn, "data/old_name",
#'                    data = matrix(rnorm(100), 10, 10))
#' res <- bdmove_hdf5_dataset(fn,
#'                            source_path = "data/old_name",
#'                            dest_path   = "data/new_name")
#' 
#' hdf5_close_all()
#' unlink(fn)
#' }
#'
#' @family BigDataStatMeth HDF5 utilities
#' @author BigDataStatMeth package authors
#' @export
bdmove_hdf5_dataset <- function(filename, source_path, dest_path, overwrite = FALSE) {
    .Call('_BigDataStatMeth_bdmove_hdf5_dataset', PACKAGE = 'BigDataStatMeth', filename, source_path, dest_path, overwrite)
}

#' Compute Matrix Pseudoinverse (In-Memory)
#'
#' @description
#' Computes the Moore-Penrose pseudoinverse of a matrix using SVD decomposition.
#' This implementation handles both square and rectangular matrices, and provides
#' numerically stable results even for singular or near-singular matrices.
#'
#' @details
#' The Moore-Penrose pseudoinverse (denoted A+) of a matrix A is computed using 
#' Singular Value Decomposition (SVD). 
#'
#' For a matrix A = U*Sigma*V^T (where ^T denotes transpose), the pseudoinverse is 
#' computed as:
#'
#' \deqn{A^+ = V \Sigma^+ U^T}
#'
#' where Sigma+ is obtained by taking the reciprocal of non-zero singular values.
#'
#' @section Mathematical Details:
#' \itemize{
#'   \item SVD decomposition: \eqn{A = U \Sigma V^T}
#'   \item Pseudoinverse: \eqn{A^+ = V \Sigma^+ U^T}
#'   \item \eqn{\Sigma^+_{ii} = 1/\Sigma_{ii}} if \eqn{\Sigma_{ii} > \text{tolerance}}
#'   \item \eqn{\Sigma^+_{ii} = 0} otherwise
#' }
#' 
#' Key features:
#' * Robust computation:
#'   - Handles singular and near-singular matrices
#'   - Automatic threshold for small singular values
#'   - Numerically stable implementation
#' 
#' * Implementation details:
#'   - Uses efficient SVD algorithms
#'   - Parallel processing support
#'   - Memory-efficient computation
#'   - Handles both dense and sparse inputs
#'
#' The pseudoinverse satisfies the Moore-Penrose conditions:
#' * \eqn{AA^+A = A}
#' * \eqn{A^+AA^+ = A^+}
#' * \eqn{(AA^+)^* = AA^+}
#' * \eqn{(A^+A)^* = A^+A}
#' 
#' @param X Numeric matrix or vector to be pseudoinverted.
#' @param threads Optional integer. Number of threads for parallel computation.
#'   If NULL, uses maximum available threads.
#'
#' @return The pseudoinverse matrix of X.
#'
#' @examples
#' 
#' # Create a singular matrix
#' X <- matrix(c(1,2,3,2,4,6), 2, 3)  # rank-deficient matrix
#' 
#' # Compute pseudoinverse
#' X_pinv <- bdpseudoinv(X)
#' 
#' # Verify Moore-Penrose conditions
#' # 1. X %*% X_pinv %*% X = X
#' all.equal(X %*% X_pinv %*% X, X)
#' 
#' # 2. X_pinv %*% X %*% X_pinv = X_pinv
#' all.equal(X_pinv %*% X %*% X_pinv, X_pinv)
#'
#' @references
#' * Golub, G. H., & Van Loan, C. F. (2013). Matrix Computations, 4th Edition.
#'   Johns Hopkins University Press.
#' * Ben-Israel, A., & Greville, T. N. E. (2003). Generalized Inverses:
#'   Theory and Applications, 2nd Edition. Springer.
#'
#'
#' @export
bdpseudoinv <- function(X, threads = NULL) {
    .Call('_BigDataStatMeth_bdpseudoinv', PACKAGE = 'BigDataStatMeth', X, threads)
}

#' Compute Matrix Pseudoinverse (HDF5-Stored)
#'
#' @description
#' Computes the Moore-Penrose pseudoinverse of a matrix stored in HDF5 format.
#' The implementation is designed for large matrices, using block-based processing
#' and efficient I/O operations.
#'
#' @details
#' This function provides an HDF5-based implementation for computing pseudoinverses
#' of large matrices. Key features:
#' 
#' * HDF5 Integration:
#'   - Efficient reading of input matrix
#'   - Block-based processing for large matrices
#'   - Memory-efficient computation
#'   - Direct output to HDF5 format
#' 
#' * Implementation Features:
#'   - SVD-based computation
#'   - Parallel processing support
#'   - Automatic memory management
#'   - Flexible output options
#'
#' The function handles:
#' * Data validation
#' * Memory management
#' * Error handling
#' * HDF5 file operations
#'
#' @param filename String. Path to the HDF5 file.
#' @param group String. Group containing the input matrix.
#' @param dataset String. Dataset name for the input matrix.
#' @param outgroup Optional string. Output group name (defaults to "PseudoInverse").
#' @param outdataset Optional string. Output dataset name (defaults to input dataset name).
#' @param overwrite Logical. Whether to overwrite existing results.
#' @param threads Optional integer. Number of threads for parallel computation.
#'
#' @return List with components. If an error occurs, all string values are returned as empty strings (""):
#' \describe{
#'   \item{fn}{Character string with the HDF5 filename}
#'   \item{ds}{Character string with the full dataset path to the pseudoinverse matrix (group/dataset)}
#' }
#'
#' @examples
#' \donttest{
#'     fn <- tempfile(fileext = ".h5")
#'     X <- matrix(c(1,2,3,2,4,6), 2, 3)
#'     hdf5_create_matrix(fn, "data/X", data = X)
#' 
#'     bdpseudoinv_hdf5(filename = fn,
#'                      group = "data",
#'                      dataset = "X",
#'                      outgroup = "results",
#'                      outdataset = "X_pinv",
#'                      overwrite = TRUE)
#'     hdf5_close_all()
#'     unlink(fn)
#' }
#'
#' @references
#' * Golub, G. H., & Van Loan, C. F. (2013). Matrix Computations, 4th Edition.
#'   Johns Hopkins University Press.
#' * The HDF Group. (2000-2010). HDF5 User's Guide.
#'
#' @seealso
#' * \code{\link{bdpseudoinv}} for in-memory computation
#' * \code{\link{bdCreate_hdf5_matrix}} for creating HDF5 matrices
#'
#' @export
bdpseudoinv_hdf5 <- function(filename, group, dataset, outgroup = NULL, outdataset = NULL, overwrite = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_bdpseudoinv_hdf5', PACKAGE = 'BigDataStatMeth', filename, group, dataset, outgroup, outdataset, overwrite, threads)
}

#' Column sums of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{colSums(X)} for an HDF5
#' matrix referenced by an external pointer.
#'
#' @param ptr     External pointer (SEXP) to an open hdf5Dataset.
#' @param paral   Logical or NULL; enable OpenMP parallelisation.
#' @param wsize   Integer or NULL; block size (NULL = auto).
#' @param threads Integer or NULL; thread count (NULL = auto).
#'
#' @return Numeric vector of length ncols_R.
#'
#' @keywords internal
rcpp_hdf5dataset_colSums <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_colSums', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Column means of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{colMeans(X)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length ncols_R.
#' @keywords internal
rcpp_hdf5dataset_colMeans <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_colMeans', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Column minimums of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 2, min)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length ncols_R.
#' @keywords internal
rcpp_hdf5dataset_colMins <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_colMins', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Column maximums of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 2, max)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length ncols_R.
#' @keywords internal
rcpp_hdf5dataset_colMaxs <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_colMaxs', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Column variances of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 2, var)}.
#' Uses Bessel's correction (n-1).
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length ncols_R.
#' @keywords internal
rcpp_hdf5dataset_colVars <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_colVars', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Column standard deviations of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 2, sd)}.
#' Uses Bessel's correction (n-1).
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length ncols_R.
#' @keywords internal
rcpp_hdf5dataset_colSds <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_colSds', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Row sums of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{rowSums(X)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length nrows_R.
#' @keywords internal
rcpp_hdf5dataset_rowSums <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_rowSums', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Row means of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{rowMeans(X)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length nrows_R.
#' @keywords internal
rcpp_hdf5dataset_rowMeans <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_rowMeans', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Row minimums of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 1, min)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length nrows_R.
#' @keywords internal
rcpp_hdf5dataset_rowMins <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_rowMins', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Row maximums of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 1, max)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length nrows_R.
#' @keywords internal
rcpp_hdf5dataset_rowMaxs <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_rowMaxs', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Row variances of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 1, var)}.
#' Uses Bessel's correction (n-1).
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length nrows_R.
#' @keywords internal
rcpp_hdf5dataset_rowVars <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_rowVars', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Row standard deviations of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise, OpenMP-parallel computation of \code{apply(X, 1, sd)}.
#' Uses Bessel's correction (n-1).
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Numeric vector of length nrows_R.
#' @keywords internal
rcpp_hdf5dataset_rowSds <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_rowSds', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Sum of all elements of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise computation of \code{sum(X)}.  Equivalent to
#' \code{sum(as.matrix(X))} but without loading the full matrix into RAM.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Scalar numeric.
#' @keywords internal
rcpp_hdf5dataset_scalar_sum <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_scalar_sum', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Mean of all elements of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise computation of \code{mean(X)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Scalar numeric.
#' @keywords internal
rcpp_hdf5dataset_scalar_mean <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_scalar_mean', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Minimum of all elements of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise computation of \code{min(X)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Scalar numeric.
#' @keywords internal
rcpp_hdf5dataset_scalar_min <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_scalar_min', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Maximum of all elements of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise computation of \code{max(X)}.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Scalar numeric.
#' @keywords internal
rcpp_hdf5dataset_scalar_max <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_scalar_max', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Variance of all elements of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise computation of \code{var(as.vector(X))}.
#' Uses Bessel's correction (N-1) where N is the total number of elements.
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Scalar numeric.
#' @keywords internal
rcpp_hdf5dataset_scalar_var <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_scalar_var', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Standard deviation of all elements of an HDF5 dataset (R6 wrapper)
#'
#' @description
#' Block-wise computation of \code{sd(as.vector(X))}.
#' Uses Bessel's correction (N-1).
#'
#' @inheritParams rcpp_hdf5dataset_colSums
#' @return Scalar numeric.
#' @keywords internal
rcpp_hdf5dataset_scalar_sd <- function(ptr, paral = NULL, wsize = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_scalar_sd', PACKAGE = 'BigDataStatMeth', ptr, paral, wsize, threads)
}

#' Element-wise addition of two HDF5 datasets (R6 wrapper)
#'
#' @description
#' Computes \code{A + B} element-wise for two HDF5 datasets referenced by
#' external pointers, using a block-wise algorithm.
#'
#' @param ptr_a External pointer (SEXP) for matrix A
#' @param ptr_b External pointer (SEXP) for matrix B
#' @param paral Logical or NULL; enable OpenMP parallelisation
#' @param block_size Integer or NULL; block size (NULL = auto)
#' @param threads Integer or NULL; thread count
#'
#' @return Named list with \code{filename} and \code{path} of the result.
#'   The result is stored in group \code{"OUTPUT"} with dataset name
#'   \code{"A_plus_B"} (resp. \code{"A_minus_B"}, \code{"A_times_B"},
#'   \code{"A_div_B"}) where A and B are the input dataset names.
#'
#' @keywords internal
rcpp_hdf5dataset_add <- function(ptr_a, ptr_b, paral = NULL, block_size = NULL, threads = NULL, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_add', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, paral, block_size, threads, compression)
}

#' Element-wise subtraction of two HDF5 datasets (R6 wrapper)
#'
#' @description
#' Computes \code{A - B} element-wise for two HDF5 datasets referenced by
#' external pointers, using a block-wise algorithm.
#'
#' @param ptr_a External pointer (SEXP) for matrix A
#' @param ptr_b External pointer (SEXP) for matrix B
#' @param paral Logical or NULL; enable OpenMP parallelisation
#' @param block_size Integer or NULL; block size (NULL = auto)
#' @param threads Integer or NULL; thread count
#'
#' @return Named list with \code{filename} and \code{path} of the result.
#'   The result is stored in group \code{"OUTPUT"} with dataset name
#'   \code{"A_plus_B"} (resp. \code{"A_minus_B"}, \code{"A_times_B"},
#'   \code{"A_div_B"}) where A and B are the input dataset names.
#'
#' @keywords internal
rcpp_hdf5dataset_subtract <- function(ptr_a, ptr_b, paral = NULL, block_size = NULL, threads = NULL, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_subtract', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, paral, block_size, threads, compression)
}

#' Element-wise multiplication of two HDF5 datasets (R6 wrapper)
#'
#' @description
#' Computes the Hadamard (element-wise) product \code{A * B} for two HDF5
#' datasets referenced by external pointers, using a block-wise algorithm.
#'
#' @param ptr_a External pointer (SEXP) for matrix A
#' @param ptr_b External pointer (SEXP) for matrix B
#' @param paral Logical or NULL; enable OpenMP parallelisation
#' @param block_size Integer or NULL; block size (NULL = auto)
#' @param threads Integer or NULL; thread count
#'
#' @return Named list with \code{filename} and \code{path} of the result.
#'   The result is stored in group \code{"OUTPUT"} with dataset name
#'   \code{"A_plus_B"} (resp. \code{"A_minus_B"}, \code{"A_times_B"},
#'   \code{"A_div_B"}) where A and B are the input dataset names.
#'
#' @keywords internal
rcpp_hdf5dataset_mul_ew <- function(ptr_a, ptr_b, paral = NULL, block_size = NULL, threads = NULL, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_mul_ew', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, paral, block_size, threads, compression)
}

#' Element-wise division of two HDF5 datasets (R6 wrapper)
#'
#' @description
#' Computes \code{A / B} element-wise for two HDF5 datasets referenced by
#' external pointers, using a block-wise algorithm.  Division by zero
#' produces \code{NaN} or \code{Inf}, matching base R behaviour.
#'
#' @param ptr_a External pointer (SEXP) for matrix A
#' @param ptr_b External pointer (SEXP) for matrix B
#' @param paral Logical or NULL; enable OpenMP parallelisation
#' @param block_size Integer or NULL; block size (NULL = auto)
#' @param threads Integer or NULL; thread count
#'
#' @return Named list with \code{filename} and \code{path} of the result.
#'   The result is stored in group \code{"OUTPUT"} with dataset name
#'   \code{"A_plus_B"} (resp. \code{"A_minus_B"}, \code{"A_times_B"},
#'   \code{"A_div_B"}) where A and B are the input dataset names.
#'
#' @keywords internal
rcpp_hdf5dataset_div_ew <- function(ptr_a, ptr_b, paral = NULL, block_size = NULL, threads = NULL, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_div_ew', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, paral, block_size, threads, compression)
}

rcpp_hdf5dataset_bind <- function(file_a, group_a, dataset_a, file_b, group_b, dataset_b, out_file, out_group, out_dataset, func, overwrite = FALSE, block_rows = 1000L, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_bind', PACKAGE = 'BigDataStatMeth', file_a, group_a, dataset_a, file_b, group_b, dataset_b, out_file, out_group, out_dataset, func, overwrite, block_rows, compression)
}

rcpp_hdf5dataset_chol <- function(filename, group, dataset, full_matrix = FALSE, overwrite = FALSE, threads = -1L, block_size = -1L, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_chol', PACKAGE = 'BigDataStatMeth', filename, group, dataset, full_matrix, overwrite, threads, block_size, compression)
}

rcpp_hdf5dataset_solve <- function(filename, group, dataset, full_matrix = TRUE, overwrite = FALSE, threads = -1L, block_size = -1L, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_solve', PACKAGE = 'BigDataStatMeth', filename, group, dataset, full_matrix, overwrite, threads, block_size, compression)
}

rcpp_hdf5dataset_cor <- function(in_file_x, in_group_x, in_dataset_x, in_file_y = "", in_group_y = "", in_dataset_y = "", out_file = "", out_group = "", trans_x = FALSE, trans_y = FALSE, method = "pearson", use_complete_obs = FALSE, compute_pvalues = TRUE, block_size = 1000L, copy_blockrows = 500L, threads = NULL, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_cor', PACKAGE = 'BigDataStatMeth', in_file_x, in_group_x, in_dataset_x, in_file_y, in_group_y, in_dataset_y, out_file, out_group, trans_x, trans_y, method, use_complete_obs, compute_pvalues, block_size, copy_blockrows, threads, compression)
}

rcpp_hdf5dataset_eigen <- function(filename, group, dataset, k = 0L, which = "LM", ncv = 0L, bcenter = FALSE, bscale = FALSE, tolerance = 1e-10, max_iter = 1000L, compute_vectors = TRUE, overwrite = FALSE, threads = -1L) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_eigen', PACKAGE = 'BigDataStatMeth', filename, group, dataset, k, which, ncv, bcenter, bscale, tolerance, max_iter, compute_vectors, overwrite, threads)
}

rcpp_hdf5dataset_sweep <- function(ptr_mat, ptr_vec, func = "*", byrows = FALSE, paral = NULL, threads = NULL, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_sweep', PACKAGE = 'BigDataStatMeth', ptr_mat, ptr_vec, func, byrows, paral, threads, compression)
}

rcpp_hdf5dataset_diag_get <- function(ptr_mat) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_diag_get', PACKAGE = 'BigDataStatMeth', ptr_mat)
}

rcpp_hdf5dataset_diag_set <- function(ptr_mat, values) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_diag_set', PACKAGE = 'BigDataStatMeth', ptr_mat, values)
}

rcpp_hdf5dataset_diag_op <- function(ptr_a, ptr_b, op = "+", paral = NULL, threads = NULL, compression = NULL, outgroup = NULL, outdataset = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_diag_op', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, op, paral, threads, compression, outgroup, outdataset)
}

rcpp_hdf5dataset_diag_scale <- function(ptr_mat, scalar, op_code = 2L, paral = NULL, threads = NULL, compression = NULL, outgroup = NULL, outdataset = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_diag_scale', PACKAGE = 'BigDataStatMeth', ptr_mat, scalar, op_code, paral, threads, compression, outgroup, outdataset)
}

#' Close all open HDF5Dataset objects and HDF5 handles
#'
#' @description
#' Closes all C++ \code{hdf5Dataset} objects tracked in the live-pointer
#' registry and then calls \code{BigDataStatMeth::closeAllHDF5Handles()}
#' to close any remaining HDF5 handles at the C library level (files,
#' datasets, groups, datatypes, attributes) that were not tracked by
#' the registry. Equivalent in effect to \code{rhdf5::h5closeAll()}.
#'
#' Called automatically from \code{.onUnload()} when the package is
#' unloaded. Can also be called manually for diagnostic purposes via
#' \code{BigDataStatMeth:::rcpp_hdf5_close_all_registry()}.
#'
#' @return \code{NULL} invisibly.
#'
#' @keywords internal
rcpp_hdf5_close_all_registry <- function() {
    .Call('_BigDataStatMeth_rcpp_hdf5_close_all_registry', PACKAGE = 'BigDataStatMeth')
}

#' Open HDF5 dataset and return external pointer (R6 wrapper)
#'
#' @param filename Path to HDF5 file
#' @param group Group path (e.g., "data" or "/data")
#' @param dataset Dataset name within the group (e.g., "matrix")
#'
#' @return External pointer to hdf5Dataset object
#'
#' @keywords internal
rcpp_hdf5dataset_open <- function(filename, group, dataset) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_open', PACKAGE = 'BigDataStatMeth', filename, group, dataset)
}

#' Get dimensions of HDF5 dataset (R6 wrapper)
#'
#' @param ptr_sexp External pointer to hdf5Dataset
#'
#' @return Integer vector c(nrows, ncols)
#'
#' @keywords internal
rcpp_hdf5dataset_dim <- function(ptr_sexp) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_dim', PACKAGE = 'BigDataStatMeth', ptr_sexp)
}

#' Get dataset information (R6 wrapper)
#'
#' @param ptr_sexp External pointer to hdf5Dataset
#'
#' @return Named list with filename, group, dataset, datatype
#'
#' @keywords internal
rcpp_hdf5dataset_info <- function(ptr_sexp) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_info', PACKAGE = 'BigDataStatMeth', ptr_sexp)
}

#' Check if dataset is valid and open (R6 wrapper)
#'
#' @param ptr_sexp External pointer to hdf5Dataset
#'
#' @return Logical: TRUE if valid and open, FALSE otherwise
#'
#' @keywords internal
rcpp_hdf5dataset_is_valid <- function(ptr_sexp) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_is_valid', PACKAGE = 'BigDataStatMeth', ptr_sexp)
}

#' Read dimension names (rownames / colnames) from an HDF5 dataset
#'
#' @description
#' Reads the row and column names stored alongside an HDF5 dataset following
#' the BigDataStatMeth convention:
#' \itemize{
#'   \item rownames stored at \code{group/.<dataset>_dimnames/1}
#'   \item colnames stored at \code{group/.<dataset>_dimnames/2}
#' }
#' When a component has not been written an empty \code{character(0)} is
#' returned for it.  The function uses \code{BigDataStatMeth::hdf5Dims} in
#' read mode (\code{bWrite = false}) so no data on disk is modified.
#'
#' @param ptr_sexp External pointer (SEXP) to an open \code{hdf5Dataset}
#'   object managed by the R6 class.
#'
#' @return Named list with two \code{character} elements:
#' \describe{
#'   \item{rownames}{Row names, or \code{character(0)} if absent}
#'   \item{colnames}{Column names, or \code{character(0)} if absent}
#' }
#'
#' @keywords internal
rcpp_hdf5dataset_read_dimnames <- function(ptr_sexp) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_read_dimnames', PACKAGE = 'BigDataStatMeth', ptr_sexp)
}

#' Write dimension names through the R6 dataset handle
#'
#' @description
#' Writes row and/or column names for an HDF5 dataset using the existing
#' open file handle managed by the R6 object.  Unlike
#' \code{bdWrite_hdf5_dimnames()}, this function operates through
#' \code{hdf5Dataset::writeDimnames()} so the long-lived R6 handle sees
#' the changes immediately - no metadata cache staleness.
#'
#' @param ptr_sexp External pointer (SEXP) to an open \code{hdf5Dataset}.
#' @param rownames Character vector of row names. Use \code{character(0)}
#'   to skip writing row names.
#' @param colnames Character vector of column names. Use \code{character(0)}
#'   to skip writing column names.
#'
#' @return \code{NULL} invisibly.
#'
#' @keywords internal
rcpp_hdf5dataset_write_dimnames <- function(ptr_sexp, rownames, colnames) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_write_dimnames', PACKAGE = 'BigDataStatMeth', ptr_sexp, rownames, colnames)
}

#' Close and destroy an HDF5 dataset handle immediately.
#'
#' Uses the live-pointer registry to prevent double-free: if the pointer
#' is no longer in the registry (already closed by close() or GC), this
#' is a safe no-op.  Clears the external pointer so the GC finalizer
#' becomes a no-op too.
#'
#' @param ptr_sexp External pointer to hdf5Dataset
#' @keywords internal
rcpp_hdf5dataset_close <- function(ptr_sexp) {
    invisible(.Call('_BigDataStatMeth_rcpp_hdf5dataset_close', PACKAGE = 'BigDataStatMeth', ptr_sexp))
}

#' Close all live HDF5Matrix handles pointing to specific dataset paths.
#'
#' @description
#' Scans the live-pointer registry for any open \code{hdf5Dataset} objects
#' that match the given \code{filename} and any of the \code{paths}.
#' Each matching object is closed and its external pointer cleared, so
#' that any R6 \code{HDF5Matrix} objects holding those pointers will
#' return \code{FALSE} from \code{is_valid()} immediately.
#'
#' This is called automatically by R6 methods that use
#' \code{overwrite = TRUE} (e.g. \code{$eigen()}, \code{$svd()},
#' \code{$qr()}, \code{$chol()}, \code{$prcomp()}) to ensure that
#' previous result objects are safely invalidated before the HDF5 datasets
#' they reference are deleted and recreated.
#'
#' @param filename  Canonical filesystem path to the HDF5 file.
#' @param paths     Character vector of HDF5-internal paths
#'   (e.g. \code{c("EIGEN/sym/values", "EIGEN/sym/vectors")}).
#'
#' @return \code{NULL} invisibly.
#'
#' @keywords internal
rcpp_hdf5_close_at_paths <- function(filename, paths) {
    .Call('_BigDataStatMeth_rcpp_hdf5_close_at_paths', PACKAGE = 'BigDataStatMeth', filename, paths)
}

#' Close all HDF5 handles for a specific file (R6 wrapper)
#'
#' @description
#' Closes all C++ objects tracked in the live-pointer registry that
#' belong to \code{filename}, then closes any remaining HDF5 handles
#' for that file at the HDF5 C library level.
#'
#' @param filename Absolute path to the HDF5 file (use
#'   \code{normalizePath()} in R before calling).
#'
#' @keywords internal
rcpp_hdf5_close_file_handles <- function(filename) {
    invisible(.Call('_BigDataStatMeth_rcpp_hdf5_close_file_handles', PACKAGE = 'BigDataStatMeth', filename))
}

#' Safely close all remaining HDF5 file handles (mid-session safe)
#' @keywords internal
rcpp_hdf5_close_file_handles_safe <- function() {
    .Call('_BigDataStatMeth_rcpp_hdf5_close_file_handles_safe', PACKAGE = 'BigDataStatMeth')
}

#' Close all open HDF5 file handles mid-session (safe)
#'
#' @description
#' Iterates over all currently open HDF5 file handles and calls
#' closeHDF5HandlesForFile() on each — closes datasets/groups/attrs
#' belonging to each file before closing the file handle itself.
#' Pre-defined HDF5 library types are never touched.
#'
#' @return NULL invisibly.
#' @keywords internal
rcpp_hdf5_close_all_file_handles <- function() {
    .Call('_BigDataStatMeth_rcpp_hdf5_close_all_file_handles', PACKAGE = 'BigDataStatMeth')
}

#' General matrix product for HDF5 datasets (R6 wrapper)
#'
#' @description
#' Computes \code{A \%*\% B} (or transposed variants) for two HDF5 datasets
#' referenced by external pointers, using the BigDataStatMeth block-wise
#' multiplication algorithm.
#'
#' @param ptr_a External pointer (SEXP) for matrix A
#' @param ptr_b External pointer (SEXP) for matrix B
#' @param transpose_a Logical; transpose A before multiplying
#' @param transpose_b Logical; transpose B before multiplying
#' @param paral Logical or NULL; enable OpenMP parallelisation
#' @param block_size Integer or NULL; block size (NULL = auto)
#' @param threads Integer or NULL; thread count when \code{paral = TRUE}
#' @param outgroup   Character or NULL. Output group in the HDF5 file.
#'   Default \code{"OUTPUT"}.
#' @param outdataset Character or NULL. Output dataset name.
#'   Default \code{"A_x_B"} where A and B are the input dataset names.
#'
#' @return Named list with \code{filename} (character) and \code{path}
#'   (character) locating the result dataset within the HDF5 file.
#'
#' @keywords internal
rcpp_hdf5dataset_multiply <- function(ptr_a, ptr_b, transpose_a = FALSE, transpose_b = FALSE, paral = NULL, block_size = NULL, threads = NULL, compression = NULL, outgroup = NULL, outdataset = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_multiply', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, transpose_a, transpose_b, paral, block_size, threads, compression, outgroup, outdataset)
}

#' Cross product for HDF5 datasets (R6 wrapper)
#'
#' @description
#' Computes \code{t(A) \%*\% B} using the dedicated BigDataStatMeth
#' block-wise cross-product algorithm. When A and B refer to the same
#' dataset, the symmetric optimisation (\code{bisSymetric = TRUE}) is
#' applied automatically.
#'
#' @param ptr_a External pointer (SEXP) for matrix A
#' @param ptr_b External pointer (SEXP) for matrix B
#' @param paral Logical or NULL; enable OpenMP parallelisation
#' @param block_size Integer or NULL; block size (NULL = auto)
#' @param threads Integer or NULL; thread count when \code{paral = TRUE}
#' @param outgroup   Character or NULL. Output group in the HDF5 file.
#'   Default \code{"OUTPUT"}.
#' @param outdataset Character or NULL. Output dataset name.
#'   Default \code{"CrossProd_A"} (single matrix) or
#'   \code{"CrossProd_A_x_B"} (two matrices).
#'
#' @return Named list with \code{filename} and \code{path} of the result.
#'
#' @keywords internal
rcpp_hdf5dataset_crossprod <- function(ptr_a, ptr_b, paral = NULL, block_size = NULL, threads = NULL, compression = NULL, outgroup = NULL, outdataset = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_crossprod', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, paral, block_size, threads, compression, outgroup, outdataset)
}

#' Transposed cross product for HDF5 datasets (R6 wrapper)
#'
#' @description
#' Computes \code{A \%*\% t(B)} using the dedicated BigDataStatMeth
#' block-wise transposed cross-product algorithm. When A and B refer to the
#' same dataset, the symmetric optimisation is applied automatically.
#'
#' @param ptr_a External pointer (SEXP) for matrix A
#' @param ptr_b External pointer (SEXP) for matrix B
#' @param paral Logical or NULL; enable OpenMP parallelisation
#' @param block_size Integer or NULL; block size (NULL = auto)
#' @param threads Integer or NULL; thread count when \code{paral = TRUE}
#' @param outgroup   Character or NULL. Output group in the HDF5 file.
#'   Default \code{"OUTPUT"}.
#' @param outdataset Character or NULL. Output dataset name.
#'   Default \code{"tCrossProd_A"} (single matrix) or
#'   \code{"tCrossProd_A_x_B"} (two matrices).
#'
#' @return Named list with \code{filename} and \code{path} of the result.
#'
#' @keywords internal
rcpp_hdf5dataset_tcrossprod <- function(ptr_a, ptr_b, paral = NULL, block_size = NULL, threads = NULL, compression = NULL, outgroup = NULL, outdataset = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_tcrossprod', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, paral, block_size, threads, compression, outgroup, outdataset)
}

rcpp_hdf5dataset_normalize <- function(in_file, in_group, in_dataset, out_file, out_group, out_dataset, center = TRUE, scale = TRUE, byrows = FALSE, wsize = NULL, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_normalize', PACKAGE = 'BigDataStatMeth', in_file, in_group, in_dataset, out_file, out_group, out_dataset, center, scale, byrows, wsize, compression)
}

rcpp_hdf5dataset_impute_snps <- function(in_file, in_group, in_dataset, out_group, out_dataset, by_cols = TRUE, threads = -1L, overwrite = FALSE, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_impute_snps', PACKAGE = 'BigDataStatMeth', in_file, in_group, in_dataset, out_group, out_dataset, by_cols, threads, overwrite, compression)
}

rcpp_hdf5dataset_filter_low_coverage <- function(in_file, in_group, in_dataset, out_group, out_dataset, pcent = 0.05, by_cols = TRUE, overwrite = FALSE, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_filter_low_coverage', PACKAGE = 'BigDataStatMeth', in_file, in_group, in_dataset, out_group, out_dataset, pcent, by_cols, overwrite, compression)
}

rcpp_hdf5dataset_filter_maf <- function(in_file, in_group, in_dataset, out_group, out_dataset, maf_threshold = 0.05, by_cols = FALSE, block_size = 100L, overwrite = FALSE, compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_filter_maf', PACKAGE = 'BigDataStatMeth', in_file, in_group, in_dataset, out_group, out_dataset, maf_threshold, by_cols, block_size, overwrite, compression)
}

rcpp_hdf5dataset_pca <- function(filename, group, dataset, ncomponents = 0L, bcenter = FALSE, bscale = FALSE, k = 2L, q = 1L, rankthreshold = 0.0, svdgroup = "SVD/", overwrite = FALSE, method = "auto", threads = -1L) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_pca', PACKAGE = 'BigDataStatMeth', filename, group, dataset, ncomponents, bcenter, bscale, k, q, rankthreshold, svdgroup, overwrite, method, threads)
}

rcpp_hdf5dataset_pseudoinv <- function(filename, group, dataset, out_group = "PseudoInverse", out_dataset = "", overwrite = FALSE, threads = -1L, compression = -1L) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_pseudoinv', PACKAGE = 'BigDataStatMeth', filename, group, dataset, out_group, out_dataset, overwrite, threads, compression)
}

rcpp_hdf5dataset_qr <- function(filename, group, dataset, thin = FALSE, block_size = -1L, overwrite = FALSE, threads = -1L, method = "auto", compression = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_qr', PACKAGE = 'BigDataStatMeth', filename, group, dataset, thin, block_size, overwrite, threads, method, compression)
}

rcpp_hdf5dataset_reduce <- function(filename, group, out_group = "REDUCED", out_dataset = "reduced", func = "+", overwrite = FALSE, remove_input = FALSE) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_reduce', PACKAGE = 'BigDataStatMeth', filename, group, out_group, out_dataset, func, overwrite, remove_input)
}

rcpp_hdf5dataset_apply_function <- function(filename, group, datasets, out_group = "APPLIED", func = "QR", b_group = "", b_datasets = NULL, overwrite = FALSE, transp_a = FALSE, transp_b = FALSE, full_matrix = FALSE, byrows = FALSE, threads = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_apply_function', PACKAGE = 'BigDataStatMeth', filename, group, datasets, out_group, func, b_group, b_datasets, overwrite, transp_a, transp_b, full_matrix, byrows, threads)
}

rcpp_hdf5dataset_multiply_sparse <- function(ptr_a, ptr_b, block_size = -1L, mix_block = -1L, paral = NULL, threads = NULL, compression = NULL, outgroup = NULL, outdataset = NULL) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_multiply_sparse', PACKAGE = 'BigDataStatMeth', ptr_a, ptr_b, block_size, mix_block, paral, threads, compression, outgroup, outdataset)
}

rcpp_hdf5dataset_split <- function(ptr, bycols = FALSE, n_blocks = -1L, block_size = -1L, out_group = "SPLIT", out_dataset = "", overwrite = FALSE) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_split', PACKAGE = 'BigDataStatMeth', ptr, bycols, n_blocks, block_size, out_group, out_dataset, overwrite)
}

#' Read block from HDF5 dataset (subsetting)
#'
#' @param ptr_sexp External pointer to hdf5Dataset
#' @param rows Integer vector with row indices (1-based, as in R)
#' @param cols Integer vector with column indices (1-based, as in R)
#' @return Numeric matrix with requested data
#' 
#' @details 
#' This function reads a subset of data from an HDF5 dataset.
#' Indices are 1-based (R convention) and converted internally to 0-based (C++ convention).
#' 
#' The function handles:
#' - Contiguous blocks (e.g., rows 1:10)
#' - Non-contiguous indices (e.g., rows c(1,3,5,7))
#' - Full dimensions (e.g., all rows, specific columns)
#' 
#' @keywords internal
rcpp_hdf5dataset_subset <- function(ptr_sexp, rows, cols) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_subset', PACKAGE = 'BigDataStatMeth', ptr_sexp, rows, cols)
}

#' Get full dataset as matrix (convenience function)
#'
#' @param ptr_sexp External pointer to hdf5Dataset
#' @return Numeric matrix with all data
#' 
#' @keywords internal
rcpp_hdf5dataset_read_all <- function(ptr_sexp) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_read_all', PACKAGE = 'BigDataStatMeth', ptr_sexp)
}

rcpp_hdf5dataset_svd <- function(filename, group, dataset, k = 2L, q = 1L, nev = 0L, bcenter = TRUE, bscale = TRUE, rankthreshold = 0.0, overwrite = FALSE, method = "auto", threads = -1L) {
    .Call('_BigDataStatMeth_rcpp_hdf5dataset_svd', PACKAGE = 'BigDataStatMeth', filename, group, dataset, k, q, nev, bcenter, bscale, rankthreshold, overwrite, method, threads)
}

#' Write data block to HDF5 dataset (R6 wrapper)
#'
#' @description
#' Writes a block of data to an HDF5 dataset at specified offset.
#' Supports writing scalars, vectors, and matrices.
#'
#' @param ptr_sexp External pointer (SEXP) to hdf5Dataset
#' @param value Data to write (numeric scalar, vector, or matrix)
#' @param row_offset Starting row (0-based in C++, but receives 1-based from R)
#' @param col_offset Starting column (0-based in C++, but receives 1-based from R)
#' @param nrows Number of rows to write
#' @param ncols Number of columns to write
#'
#' @return NULL (invisible)
#'
#' @keywords internal
rcpp_hdf5dataset_write_block <- function(ptr_sexp, value, row_offset, col_offset, nrows, ncols) {
    invisible(.Call('_BigDataStatMeth_rcpp_hdf5dataset_write_block', PACKAGE = 'BigDataStatMeth', ptr_sexp, value, row_offset, col_offset, nrows, ncols))
}

#' Write entire dataset (R6 wrapper)
#'
#' @description
#' Replaces entire HDF5 dataset contents with new data.
#'
#' @param ptr_sexp External pointer (SEXP) to hdf5Dataset
#' @param value Data to write (numeric matrix)
#'
#' @return NULL (invisible)
#'
#' @keywords internal
rcpp_hdf5dataset_write_all <- function(ptr_sexp, value) {
    invisible(.Call('_BigDataStatMeth_rcpp_hdf5dataset_write_all', PACKAGE = 'BigDataStatMeth', ptr_sexp, value))
}

#' Create an HDF5 dataset with configurable compression (R6 wrapper)
#'
#' @description
#' Creates an HDF5 dataset of size \code{nrows x ncols} and optionally writes
#' data to it. Replaces \code{bdCreate_hdf5_matrix()} /
#' \code{bdCreate_hdf5_emptyDataset()} in the R6+S3 interface so that
#' compression can be controlled from R.
#'
#' @param filename Character. Path to the HDF5 file.
#' @param group Character. Group path inside the file.
#' @param dataset Character. Dataset name.
#' @param nrows Integer. Number of rows (>= 1).
#' @param ncols Integer. Number of columns (>= 1).
#' @param data Optional numeric/integer matrix or data.frame; NULL creates
#'   an empty (zero-filled) dataset.
#' @param dtype Character. Element type: "real" (default), "int", "logical".
#' @param overwrite_file Logical. Recreate file if it already exists.
#' @param overwrite_dataset Logical. Replace dataset if it already exists.
#' @param compression Integer 0-9. gzip compression level (0 = no compression,
#'   6 = balanced default). Applied to the new dataset only.
#'
#' @return Named list with \code{filename} and \code{path} of the created dataset.
#'
#' @keywords internal
rcpp_hdf5_create_matrix <- function(filename, group, dataset, nrows, ncols, data = NULL, dtype = "real", overwrite_file = FALSE, overwrite_dataset = FALSE, compression = 6L) {
    .Call('_BigDataStatMeth_rcpp_hdf5_create_matrix', PACKAGE = 'BigDataStatMeth', filename, group, dataset, nrows, ncols, data, dtype, overwrite_file, overwrite_dataset, compression)
}

#' Reduce Multiple HDF5 Datasets
#'
#' @description
#' Reduces multiple datasets within an HDF5 group using arithmetic operations
#' (addition or subtraction).
#'
#' @details
#' This function provides efficient dataset reduction capabilities with:
#' 
#' * Operation options:
#'   - Addition of datasets
#'   - Subtraction of datasets
#' 
#' * Output options:
#'   - Custom output location
#'   - Configurable dataset name
#'   - Overwrite protection
#' 
#' * Implementation features:
#'   - Memory-efficient processing
#'   - Safe file operations
#'   - Optional source cleanup
#'   - Comprehensive error handling
#'
#' The function processes datasets efficiently while maintaining data integrity.
#'
#' @param filename Character string. Path to the HDF5 file.
#' @param group Character string. Path to the group containing datasets.
#' @param reducefunction Character. Operation to apply, either "+" or "-".
#' @param outgroup Character string (optional). Output group path. If NULL,
#'   uses input group.
#' @param outdataset Character string (optional). Output dataset name. If NULL,
#'   uses input group name.
#' @param overwrite Logical (optional). Whether to overwrite existing dataset.
#'   Default is FALSE.
#' @param remove Logical (optional). Whether to remove source datasets after
#'   reduction. Default is FALSE.
#'
#' @return List with components. If an error occurs, all string values are returned as empty strings (""):
#' \describe{
#'   \item{fn}{Character string with the HDF5 filename}
#'   \item{ds}{Character string with the full dataset path to the reduced dataset (group/dataset)}
#'   \item{func}{Character string with the reduction function applied}
#' }
#'
#' @examples
#' \donttest{
#' fn <- tempfile(fileext = ".h5")
#' hdf5_create_matrix(fn, "data/matrix1", data = matrix(1:100, 10, 10))
#' hdf5_create_matrix(fn, "data/matrix2", data = matrix(101:200, 10, 10))
#' hdf5_create_matrix(fn, "data/matrix3", data = matrix(201:300, 10, 10))
#' 
#' bdReduce_hdf5_dataset(
#'   filename   = fn,
#'   group      = "data",
#'   reducefunction = "+",
#'   outgroup   = "results",
#'   outdataset = "sum_matrix",
#'   overwrite  = TRUE
#' )
#' hdf5_close_all()
#' unlink(fn)
#' }
#'
#' @references
#' * The HDF Group. (2000-2010). HDF5 User's Guide.
#'
#' @export
bdReduce_hdf5_dataset <- function(filename, group, reducefunction, outgroup = NULL, outdataset = NULL, overwrite = FALSE, remove = FALSE) {
    .Call('_BigDataStatMeth_bdReduce_hdf5_dataset', PACKAGE = 'BigDataStatMeth', filename, group, reducefunction, outgroup, outdataset, overwrite, remove)
}

#' Get total system RAM
#'
#' @description
#' Returns the total physical RAM installed in the system.
#'
#' @return Numeric value with total RAM in gigabytes (GB)
#'
#' @details
#' This function queries the operating system to determine total RAM.
#' Works on Windows, Linux, and macOS.
#'
#' The value returned is the physical RAM available to the system:
#' - On physical machines: actual installed RAM
#' - On virtual machines: RAM allocated to the VM
#' - On containers: RAM limit set for the container
#'
#' @examples
#' \donttest{
#' # Check total RAM
#' total <- get_total_ram()
#' cat("System has", total, "GB of RAM\n")
#' 
#' # Returns 16.0 on a 16GB system
#' }
#'
#' @seealso \code{\link{get_available_ram}}, \code{\link{get_cpu_cores}}
#'
#' @export
get_total_ram <- function() {
    .Call('_BigDataStatMeth_get_total_ram', PACKAGE = 'BigDataStatMeth')
}

#' Get available (free) system RAM
#'
#' @description
#' Returns the amount of RAM currently available for allocation.
#'
#' @return Numeric value with available RAM in gigabytes (GB)
#'
#' @details
#' This function returns the RAM that can be allocated without swapping.
#' The value changes dynamically as processes allocate and free memory.
#'
#' **Important notes:**
#' - Value can change rapidly; don't cache it
#' - On Linux, uses MemAvailable (more accurate than MemFree)
#' - Includes memory that can be reclaimed from caches
#' - Actual allocatable memory may be slightly less
#'
#' **Use case:**
#' Check available RAM before loading large datasets into memory.
#'
#' @examples
#' \donttest{
#' available <- get_available_ram()
#' cat("Available RAM:", round(available, 2), "GB\n")
#' 
#' # Use it to decide how much data to load
#' fn <- tempfile(fileext = ".h5")
#' X  <- hdf5_create_matrix(fn, "data/M",
#'                           data = matrix(rnorm(1000), 100, 10))
#' 
#' size_gb <- prod(dim(X)) * 8 / 1e9
#' if (get_available_ram() > size_gb * 1.2) {
#'   mat <- as.matrix(X)
#' } else {
#'   mat <- X[1:50, ]
#' }
#' 
#' hdf5_close_all()
#' unlink(fn)
#' }
#'
#' @seealso \code{\link{get_total_ram}}, \code{\link{can_allocate}}
#'
#' @export
get_available_ram <- function() {
    .Call('_BigDataStatMeth_get_available_ram', PACKAGE = 'BigDataStatMeth')
}

#' Check if memory allocation is safe
#'
#' @description
#' Checks whether a given amount of memory can be safely allocated
#' while maintaining a safety margin.
#'
#' @param size_gb Size in gigabytes (GB) to check
#' @param safety_margin_pct Percentage of available RAM to keep free
#'   (default 20 percent)
#'
#' @return Logical. TRUE if allocation is likely safe, FALSE otherwise
#'
#' @details
#' This function checks if the requested memory can be allocated while
#' keeping a safety margin of free RAM. This helps prevent:
#' - System instability from memory exhaustion
#' - Swapping (which degrades performance)
#' - Out-of-memory errors from other processes
#'
#' **Formula:**
#' \code{can_allocate = (size_gb < available_ram * (1 - safety_margin / 100))}
#'
#' **Safety margin guidelines:**
#' \itemize{
#'   \item 20 percent (default): Conservative, recommended for most cases
#'   \item 10 percent: Moderate, for controlled environments
#'   \item 5 percent: Aggressive, only if you know what you're doing
#'   \item 0 percent: Maximum risk, not recommended
#' }
#'
#' @note
#' This is a heuristic check, not a guarantee. Allocation can still fail
#' due to memory fragmentation or competing processes.
#'
#' @examples
#' \donttest{
#' # Check if 1 GB can be safely allocated
#' if (can_allocate(1)) {
#'   message("1 GB allocation is safe")
#' } else {
#'   message("Not enough RAM for 1 GB allocation")
#' }
#' 
#' # Use it to decide how much data to load
#' fn <- tempfile(fileext = ".h5")
#' X  <- hdf5_create_matrix(fn, "data/M",
#'                           data = matrix(rnorm(1000), 100, 10))
#' 
#' size_gb <- prod(dim(X)) * 8 / 1e9   # estimate in GB
#' if (can_allocate(size_gb)) {
#'   mat <- as.matrix(X)
#' } else {
#'   mat <- X[1:50, ]   # load subset
#' }
#' 
#' hdf5_close_all()
#' unlink(fn)
#' }
#'
#' @seealso \code{\link{get_available_ram}}
#'
#' @export
can_allocate <- function(size_gb, safety_margin_pct = 20.0) {
    .Call('_BigDataStatMeth_can_allocate', PACKAGE = 'BigDataStatMeth', size_gb, safety_margin_pct)
}

#' Get number of CPU cores
#'
#' @description
#' Returns the number of logical CPU cores (processors) available.
#'
#' @return Integer with number of CPU cores
#'
#' @details
#' This function returns the number of logical processors, which includes
#' cores from hyperthreading/SMT. Useful for configuring parallel processing.
#'
#' **Typical values:**
#' - 4-core CPU without hyperthreading: 4
#' - 4-core CPU with hyperthreading: 8
#' - 8-core CPU with hyperthreading: 16
#'
#' **Usage for parallelization:**
#' Don't blindly use all cores. A common practice is to use 80-90 percent of
#' available cores to leave room for the OS and other processes.
#'
#' @note
#' - Returns logical cores (with hyperthreading), not physical cores
#' - On systems with CPU pinning, may return fewer cores
#' - Value reflects cores available to the process
#'
#' @examples
#' \donttest{
#' # Get CPU cores
#' cores <- get_cpu_cores()
#' cat("System has", cores, "CPU cores\n")
#'
#' # Configure parallel processing (use 80 percent of cores)
#' threads <- max(1, floor(cores * 0.8))
#' options(BigDataStatMeth.threads = threads)
#' }
#'
#' @seealso \code{\link{get_total_ram}}
#'
#' @export
get_cpu_cores <- function() {
    .Call('_BigDataStatMeth_get_cpu_cores', PACKAGE = 'BigDataStatMeth')
}

#' Get system information summary
#'
#' @description
#' Returns a comprehensive summary of system resources.
#'
#' @return Named list with system information:
#' \describe{
#'   \item{os}{Operating system name}
#'   \item{total_ram_gb}{Total RAM in GB}
#'   \item{available_ram_gb}{Available RAM in GB}
#'   \item{ram_used_pct}{Percentage of RAM currently used}
#'   \item{cpu_cores}{Number of CPU cores}
#' }
#'
#' @details
#' Convenience function that calls all system info methods and
#' returns a summary. Useful for debugging and logging.
#'
#' @examples
#' \donttest{
#' # Get full system info
#' info <- system_info()
#' print(info)
#' }
#'
#' @export
system_info <- function() {
    .Call('_BigDataStatMeth_system_info', PACKAGE = 'BigDataStatMeth')
}

#' Write dimnames to an HDF5 dataset
#'
#' @description
#' Write row and/or column names metadata for an existing dataset in an
#' HDF5 file. Empty vectors skip the corresponding dimnames.
#'
#' @param filename Character string. Path to the HDF5 file.
#' @param group Character string. Group containing the dataset.
#' @param dataset Character string. Dataset name inside \code{group}.
#' @param rownames Character vector of row names. Use \code{character(0)}
#'   to skip writing row names. If provided, length must equal nrow.
#' @param colnames Character vector of column names. Use
#'   \code{character(0)} to skip writing column names. If provided,
#'   length must equal ncol.
#'
#' @details
#' The dataset \code{group/dataset} must already exist. When non-empty,
#' \code{rownames} and \code{colnames} lengths are validated against the
#' dataset dimensions.
#'
#' @return List with components. If an error occurs, all string values are returned as empty strings (""):
#' \describe{
#'   \item{fn}{Character string with the HDF5 filename}
#'   \item{dsrows}{Character string with the full dataset path to the row names,
#'    stored as ".\code{dataset}_dimnames/1" within the specified group}
#'   \item{dscols}{Character string with the full dataset path to the column 
#'   names, stored as ".\code{dataset}_dimnames/2" within the specified group}
#' }
#'
#' @examples
#' \donttest{
#' fn <- tempfile(fileext = ".h5")
#' hdf5_create_matrix(fn, "MGCCA_IN/X",
#'                    data = matrix(rnorm(5000), 100, 50))
#' 
#' bdWrite_hdf5_dimnames(
#'   filename = fn,
#'   group    = "MGCCA_IN",
#'   dataset  = "X",
#'   rownames = paste0("r", seq_len(100)),
#'   colnames = paste0("c", seq_len(50))
#' )
#' hdf5_close_all()
#' unlink(fn)
#' }
#'
#' @export
bdWrite_hdf5_dimnames <- function(filename, group, dataset, rownames, colnames) {
    .Call('_BigDataStatMeth_bdWrite_hdf5_dimnames', PACKAGE = 'BigDataStatMeth', filename, group, dataset, rownames, colnames)
}

#' Block-Based Matrix Multiplication
#'
#' @description
#' Performs efficient matrix multiplication using block-based algorithms. The function
#' supports various input combinations (matrix-matrix, matrix-vector, vector-vector)
#' and provides options for parallel processing and block-based computation.
#'
#' @details
#' This function implements block-based matrix multiplication algorithms optimized
#' for cache efficiency and memory usage. Key features:
#' 
#' * Input combinations supported:
#'   - Matrix-matrix multiplication
#'   - Matrix-vector multiplication (both left and right)
#'   - Vector-vector multiplication
#' 
#' * Performance optimizations:
#'   - Block-based computation for cache efficiency
#'   - Parallel processing for large matrices
#'   - Automatic block size selection
#'   - Memory-efficient implementation
#'
#' The function automatically selects the appropriate multiplication method based
#' on input types and sizes. For large matrices (>2.25e+08 elements), block-based
#' computation is used by default.
#'
#' @param A Matrix or vector. First input operand.
#' @param B Matrix or vector. Second input operand.
#' @param block_size Integer. Block size for computation. If NULL, uses maximum
#'   allowed block size.
#' @param paral Logical. If TRUE, enables parallel computation. Default is FALSE.
#' @param byBlocks Logical. If TRUE (default), forces block-based computation for
#'   large matrices. Can be set to FALSE to disable blocking.
#' @param threads Integer. Number of threads for parallel computation. If NULL,
#'   uses half of available threads or maximum allowed threads.
#'
#' @return Matrix or vector containing the result of A * B.
#'
#' @examples
#' \donttest{
#' 
#' # Matrix-matrix multiplication
#' N <- 2500
#' M <- 400
#' nc <- 4
#' 
#' set.seed(555)
#' mat <- matrix(rnorm(N*M, mean=0, sd=10), N, M)
#' 
#' # Parallel block multiplication
#' result <- bdblockMult(mat, mat,
#'                       paral = TRUE,
#'                       threads = nc)
#' 
#' # Matrix-vector multiplication
#' vec <- rnorm(M)
#' result_mv <- bdblockMult(mat, vec,
#'                          paral = TRUE,
#'                          threads = nc)
#' }
#'
#' @references
#' * Golub, G. H., & Van Loan, C. F. (2013). Matrix Computations, 4th Edition.
#'   Johns Hopkins University Press.
#' * Kumar, V. et al. (1994). Introduction to Parallel Computing: Design and
#'   Analysis of Algorithms. Benjamin/Cummings Publishing Company.
#'
#' @seealso
#' * \code{\link{bdblockSum}} for block-based matrix addition
#' * \code{\link{bdblockSubstract}} for block-based matrix subtraction
#'
#' @export
bdblockMult <- function(A, B, block_size = NULL, paral = NULL, byBlocks = TRUE, threads = NULL) {
    .Call('_BigDataStatMeth_bdblockMult', PACKAGE = 'BigDataStatMeth', A, B, block_size, paral, byBlocks, threads)
}

#' Block-Based Matrix Subtraction
#'
#' @description
#' Performs efficient matrix subtraction using block-based algorithms. The function
#' supports various input combinations (matrix-matrix, matrix-vector, vector-vector)
#' and provides options for parallel processing and block-based computation.
#'
#' @details
#' This function implements block-based matrix subtraction algorithms optimized
#' for cache efficiency and memory usage. Key features:
#' 
#' * Input combinations supported:
#'   - Matrix-matrix subtraction
#'   - Matrix-vector subtraction (both left and right)
#'   - Vector-vector subtraction
#' 
#' * Performance optimizations:
#'   - Block-based computation for cache efficiency
#'   - Parallel processing for large matrices
#'   - Automatic method selection based on input size
#'   - Memory-efficient implementation
#'
#' The function automatically selects the appropriate subtraction method based
#' on input types and sizes. For large matrices (>2.25e+08 elements), block-based
#' computation is used by default.
#'
#' @param A Matrix or vector. First input operand.
#' @param B Matrix or vector. Second input operand.
#' @param block_size Integer. Block size for computation. If NULL, uses maximum
#'   allowed block size.
#' @param paral Logical. If TRUE, enables parallel computation. Default is FALSE.
#' @param byBlocks Logical. If TRUE (default), forces block-based computation for
#'   large matrices. Can be set to FALSE to disable blocking.
#' @param threads Integer. Number of threads for parallel computation. If NULL,
#'   uses half of available threads.
#'
#' @return Matrix or vector containing the result of A - B.
#'
#' @examples
#' \donttest{
#' 
#' # Matrix-matrix subtraction
#' N <- 2500
#' M <- 400
#' nc <- 4
#' 
#' set.seed(555)
#' mat1 <- matrix(rnorm(N*M, mean=0, sd=10), N, M)
#' mat2 <- matrix(rnorm(N*M, mean=0, sd=10), N, M)
#' 
#' # Parallel block subtraction
#' result <- bdblockSubstract(mat1, mat2,
#'                           paral = TRUE,
#'                           threads = nc)
#' 
#' # Matrix-vector subtraction
#' vec <- rnorm(M)
#' result_mv <- bdblockSubstract(mat1, vec,
#'                              paral = TRUE,
#'                              threads = nc)
#' }
#'
#' @references
#' * Golub, G. H., & Van Loan, C. F. (2013). Matrix Computations, 4th Edition.
#'   Johns Hopkins University Press.
#' * Kumar, V. et al. (1994). Introduction to Parallel Computing: Design and
#'   Analysis of Algorithms. Benjamin/Cummings Publishing Company.
#'
#' @seealso
#' * \code{\link{bdblockSum}} for block-based matrix addition
#' * \code{\link{bdblockMult}} for block-based matrix multiplication
#'
#' @export
bdblockSubstract <- function(A, B, block_size = NULL, paral = NULL, byBlocks = TRUE, threads = NULL) {
    .Call('_BigDataStatMeth_bdblockSubstract', PACKAGE = 'BigDataStatMeth', A, B, block_size, paral, byBlocks, threads)
}

#' Block-Based Matrix Addition
#'
#' @description
#' Performs efficient matrix addition using block-based algorithms. The function
#' supports various input combinations (matrix-matrix, matrix-vector, vector-vector)
#' and provides options for parallel processing and block-based computation.
#'
#' @details
#' This function implements block-based matrix addition algorithms optimized
#' for cache efficiency and memory usage. Key features:
#' 
#' * Input combinations supported:
#'   - Matrix-matrix addition
#'   - Matrix-vector addition (both left and right)
#'   - Vector-vector addition
#' 
#' * Performance optimizations:
#'   - Block-based computation for cache efficiency
#'   - Parallel processing for large matrices
#'   - Automatic method selection based on input size
#'   - Memory-efficient implementation
#'
#' The function automatically selects the appropriate addition method based
#' on input types and sizes. For large matrices (>2.25e+08 elements), block-based
#' computation is used by default.
#'
#' @param A Matrix or vector. First input operand.
#' @param B Matrix or vector. Second input operand.
#' @param block_size Integer. Block size for computation. If NULL, uses maximum
#'   allowed block size.
#' @param paral Logical. If TRUE, enables parallel computation. Default is FALSE.
#' @param byBlocks Logical. If TRUE (default), forces block-based computation for
#'   large matrices. Can be set to FALSE to disable blocking.
#' @param threads Integer. Number of threads for parallel computation. If NULL,
#'   uses half of available threads.
#'
#' @return Matrix or vector containing the result of A + B.
#'
#' @examples
#' \donttest{
#' 
#' # Matrix-matrix addition
#' N <- 2500
#' M <- 400
#' nc <- 4
#' 
#' set.seed(555)
#' mat1 <- matrix(rnorm(N*M, mean=0, sd=10), N, M)
#' mat2 <- matrix(rnorm(N*M, mean=0, sd=10), N, M)
#' 
#' # Parallel block addition
#' result <- bdblockSum(mat1, mat2,
#'                      paral = TRUE,
#'                      threads = nc)
#' 
#' # Matrix-vector addition
#' vec <- rnorm(M)
#' result_mv <- bdblockSum(mat1, vec,
#'                         paral = TRUE,
#'                         threads = nc)
#' }
#'
#' @references
#' * Golub, G. H., & Van Loan, C. F. (2013). Matrix Computations, 4th Edition.
#'   Johns Hopkins University Press.
#' * Kumar, V. et al. (1994). Introduction to Parallel Computing: Design and
#'   Analysis of Algorithms. Benjamin/Cummings Publishing Company.
#'
#' @seealso
#' * \code{\link{bdblockSubstract}} for block-based matrix subtraction
#' * \code{\link{bdblockMult}} for block-based matrix multiplication
#'
#' @export
bdblockSum <- function(A, B, block_size = NULL, paral = NULL, byBlocks = TRUE, threads = NULL) {
    .Call('_BigDataStatMeth_bdblockSum', PACKAGE = 'BigDataStatMeth', A, B, block_size, paral, byBlocks, threads)
}

#' @title Compute correlation matrix for in-memory matrices (unified function)
#' @description Compute Pearson or Spearman correlation matrix for matrices that fit in memory.
#' This function automatically detects whether to compute:
#' \itemize{
#'   \item Single matrix correlation cor(X) - when only matrix X is provided
#'   \item Cross-correlation cor(X,Y) - when both matrices X and Y are provided
#' }
#' 
#' @param X First numeric matrix (observations in rows, variables in columns)
#' @param Y Second numeric matrix (optional, observations in rows, variables in columns)
#' @param trans_x Logical, whether to transpose matrix X (default: FALSE) 
#' @param trans_y Logical, whether to transpose matrix Y (default: FALSE, ignored if Y not provided)
#' @param method Character string indicating correlation method ("pearson" or "spearman", default: "pearson")
#' @param use_complete_obs Logical, whether to use only complete observations (default: TRUE)
#' @param compute_pvalues Logical, whether to compute p-values for correlations (default: TRUE)
#' @param threads Integer, number of threads for parallel computation (optional, default: -1 for auto)
#' 
#' @return A list containing correlation results
#' 
#' @examples
#' \donttest{
#' set.seed(123)
#' X <- matrix(rnorm(1000), nrow = 100, ncol = 10)
#' 
#' # Single matrix correlation
#' res <- bdCorr_matrix(X)
#' 
#' # Transposed (sample-sample correlations)
#' res_t <- bdCorr_matrix(X, trans_x = TRUE)
#' 
#' # Cross-correlation with a second matrix
#' Y <- matrix(rnorm(400), nrow = 100, ncol = 4)
#' res_xy <- bdCorr_matrix(X, Y)
#' }
#' 
#' @export
bdCorr_matrix <- function(X, Y = NULL, trans_x = NULL, trans_y = NULL, method = NULL, use_complete_obs = NULL, compute_pvalues = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_bdCorr_matrix', PACKAGE = 'BigDataStatMeth', X, Y, trans_x, trans_y, method, use_complete_obs, compute_pvalues, threads)
}

#' Efficient Matrix Cross-Product Computation
#'
#' @description
#' Computes matrix cross-products efficiently using block-based algorithms and
#' optional parallel processing. Supports both single-matrix (X'X) and two-matrix
#' (X'Y) cross-products.
#'
#' @details
#' This function implements efficient cross-product computation using block-based
#' algorithms optimized for cache efficiency and memory usage. Key features:
#' 
#' * Operation modes:
#'   - Single matrix: Computes X'X
#'   - Two matrices: Computes X'Y
#' 
#' * Performance optimizations:
#'   - Block-based computation for cache efficiency
#'   - Parallel processing for large matrices
#'   - Automatic block size selection
#'   - Memory-efficient implementation
#'
#' The function automatically selects optimal computation strategies based on
#' input size and available resources. For large matrices, block-based computation
#' is used to improve cache utilization.
#'
#' @param A Numeric matrix. First input matrix.
#' @param B Optional numeric matrix. If provided, computes A'B instead of A'A.
#' @param transposed Logical. If TRUE, uses transposed input matrix.
#' @param block_size Integer. Block size for computation. If NULL, uses optimal
#'   block size based on matrix dimensions and cache size.
#' @param paral Logical. If TRUE, enables parallel computation.
#' @param threads Integer. Number of threads for parallel computation. If NULL,
#'   uses all available threads.
#'
#' @return Numeric matrix containing the cross-product result.
#'
#' @examples
#' 
#' # Single matrix cross-product
#' n <- 100
#' p <- 60
#' X <- matrix(rnorm(n*p), nrow=n, ncol=p)
#' res <- bdCrossprod(X)
#' 
#' # Verify against base R
#' all.equal(crossprod(X), res)
#' 
#' # Two-matrix cross-product
#' n <- 100
#' p <- 100
#' Y <- matrix(rnorm(n*p), nrow=n)
#' res <- bdCrossprod(X, Y)
#' all.equal(crossprod(X, Y), res)
#' 
#' # Parallel computation
#' res_par <- bdCrossprod(X, paral = TRUE, threads = 2)
#'
#' @references
#' * Golub, G. H., & Van Loan, C. F. (2013). Matrix Computations, 4th Edition.
#'   Johns Hopkins University Press.
#' * Kumar, V. et al. (1994). Introduction to Parallel Computing: Design and
#'   Analysis of Algorithms. Benjamin/Cummings Publishing Company.
#'
#' @seealso
#' * \code{\link{bdtCrossprod}} for transposed cross-product
#' * \code{\link{bdblockMult}} for block-based matrix multiplication
#'
#' @export
bdCrossprod <- function(A, B = NULL, transposed = NULL, block_size = NULL, paral = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_bdCrossprod', PACKAGE = 'BigDataStatMeth', A, B, transposed, block_size, paral, threads)
}

#' Matrix–scalar weighted product
#'
#' @description
#' Multiplies a numeric matrix \code{A} by a scalar weight \code{w},
#' returning \eqn{w * A}. The input must be a base R numeric matrix (or
#' convertible to one). 
#'
#' @param A Numeric matrix (or object convertible to a dense numeric matrix).
#' @param w Numeric scalar weight.
#'
#' @return A numeric matrix with the same dimensions as \code{A}.
#'
#' @examples
#' set.seed(1234)
#' n <- 5; p <- 3
#' X <- matrix(rnorm(n * p), n, p)
#' w <- 0.75
#' bdScalarwproduct(X, w)
#'
#' @export
bdScalarwproduct <- function(A, w) {
    .Call('_BigDataStatMeth_bdScalarwproduct', PACKAGE = 'BigDataStatMeth', A, w)
}

#' Efficient Matrix Transposed Cross-Product Computation
#'
#' @description
#' Computes matrix transposed cross-products efficiently using block-based
#' algorithms and optional parallel processing. Supports both single-matrix (XX')
#' and two-matrix (XY') transposed cross-products.
#'
#' @details
#' This function implements efficient transposed cross-product computation using
#' block-based algorithms optimized for cache efficiency and memory usage.
#' Key features:
#' 
#' * Operation modes:
#'   - Single matrix: Computes XX'
#'   - Two matrices: Computes XY'
#' 
#' * Performance optimizations:
#'   - Block-based computation for cache efficiency
#'   - Parallel processing for large matrices
#'   - Automatic block size selection
#'   - Memory-efficient implementation
#'
#' The function automatically selects optimal computation strategies based on
#' input size and available resources. For large matrices, block-based computation
#' is used to improve cache utilization.
#'
#' @param A Numeric matrix. First input matrix.
#' @param B Optional numeric matrix. If provided, computes XY' instead of XX'.
#' @param transposed Logical. If TRUE, uses transposed input matrix.
#' @param block_size Integer. Block size for computation. If NULL, uses optimal
#'   block size based on matrix dimensions and cache size.
#' @param paral Logical. If TRUE, enables parallel computation.
#' @param threads Integer. Number of threads for parallel computation. If NULL,
#'   uses all available threads.
#'
#' @return Numeric matrix containing the transposed cross-product result.
#'
#' @examples
#' 
#' # Single matrix transposed cross-product
#' n <- 100
#' p <- 60
#' X <- matrix(rnorm(n*p), nrow=n, ncol=p)
#' res <- bdtCrossprod(X)
#' all.equal(tcrossprod(X), res)
#' 
#' # Two-matrix transposed cross-product
#' # Both matrices must have the same number of columns
#' n <- 100
#' p <- 60
#' Y <- matrix(rnorm(n*p), nrow=n, ncol=p)
#' res <- bdtCrossprod(X, Y)
#' all.equal(tcrossprod(X, Y), res)
#' 
#' # Parallel computation
#' res_par <- bdtCrossprod(X, paral = TRUE, threads = 2)
#'
#' @references
#' * Golub, G. H., & Van Loan, C. F. (2013). Matrix Computations, 4th Edition.
#'   Johns Hopkins University Press.
#' * Kumar, V. et al. (1994). Introduction to Parallel Computing: Design and
#'   Analysis of Algorithms. Benjamin/Cummings Publishing Company.
#'
#' @seealso
#' * \code{\link{bdCrossprod}} for standard cross-product
#' * \code{\link{bdblockMult}} for block-based matrix multiplication
#'
#' @export
bdtCrossprod <- function(A, B = NULL, transposed = NULL, block_size = NULL, paral = NULL, threads = NULL) {
    .Call('_BigDataStatMeth_bdtCrossprod', PACKAGE = 'BigDataStatMeth', A, B, transposed, block_size, paral, threads)
}

#' Weighted matrix–vector products and cross-products
#'
#' @description
#' Compute weighted operations using a diagonal weight from \code{w}:
#' \itemize{
#' \item \code{"xtwx"}: \eqn{X' diag(w) X}  (row weights; \code{length(w) = nrow(X)})
#' \item \code{"xwxt"}: \eqn{X diag(w) X'}  (column weights; \code{length(w) = ncol(X)})
#' \item \code{"xw"}  : \eqn{X diag(w)}     (column scaling; \code{length(w) = ncol(X)})
#' \item \code{"wx"}  : \eqn{diag(w) X}     (row scaling;    \code{length(w) = nrow(X)})
#' }
#' Inputs may be base numeric matrices .
#'
#' @param X Numeric matrix (n x p).
#' @param w Numeric weight vector (length \code{n} or \code{p}), or a 1D matrix coerced to a vector.
#' @param op Character string (case-insensitive): one of
#'   \code{"XtwX"/"xtwx"}, \code{"XwXt"/"xwxt"}, \code{"Xw"/"xw"}, \code{"wX"/"wx"}.
#'
#' @return Numeric matrix with dimensions depending on \code{op}:
#' \code{p x p} for \code{"xtwx"}, \code{n x n} for \code{"xwxt"}, and \code{n x p} for \code{"xw"}/\code{"wx"}.
#'
#' @details
#' \code{w} is interpreted as the diagonal of a weight matrix; its required length depends on the operation:
#' rows for \code{"xtwx"} and \code{"wx"}, columns for \code{"xwxt"} and \code{"xw"}.
#'
#' @examples
#' set.seed(1)
#' n <- 10; p <- 5
#' X <- matrix(rnorm(n * p), n, p)
#' u <- runif(n); w <- u * (1 - u)
#' bd_wproduct(X, w, "xtwx")  # p x p
#' bd_wproduct(X, w, "wx")    # n x p (row scaling)
#'
#' v <- runif(p)
#' bd_wproduct(X, v, "xw")    # n x p (col scaling)
#' bd_wproduct(X, v, "xwxt")  # n x n
#'
#' @export
bd_wproduct <- function(X, w, op) {
    .Call('_BigDataStatMeth_bd_wproduct', PACKAGE = 'BigDataStatMeth', X, w, op)
}

Try the BigDataStatMeth package in your browser

Any scripts or data that you put into this service are public.

BigDataStatMeth documentation built on May 15, 2026, 1:07 a.m.