R/matrices.R

Defines functions randn rand randi template_summary sparsify vec vec_no_diag unique_values unique_indices is_binary split_size

Documented in is_binary rand randi randn sparsify split_size template_summary unique_indices unique_values vec vec_no_diag

# random matrices =========================================

#' Create a matrix with random N(0,1) entries
#' @param ... dimensions of the matrix: sz1, sz2, ...
#' @return matrix with random N(0,1) entries
#' @export
randn = function(...) {
    size = c(...)
    data = stats::rnorm(prod(size))
    array(data = data, dim = size)
}


#' Create a matrix with random U(0,1) entries
#' @param ... dimensions of the matrix: sz1, sz2, ...
#' @return matrix with random U(0,1) entries
#' @export
rand = function(...) {
    size = c(...)
    data = stats::runif(prod(size))
    array(data = data, dim = size)
}


#' Create a matrix with random integers between imin and imax
#' @param imax scalar [then imin = 1] or 2-elements vector c(imin, imax)
#' @param ... dimensions of the matrix: sz1, sz2, ...
#' @return matrix with random integers between imin and imax
#' @export
randi = function(imax, ...) {
    
    if (length(imax) == 1) {
        imin = 1
    } else if (length(imax) == 2) {
        imin = imax[1]
        imax = imax[2]
    } else {
        stop("Argument 'imax' must be a scalar or have two elements.")
    }
    
    if (imin > imax) {
        stop("Argument 'imax' must be greater than or equal to 'imin'.")
    }
    
    size = c(...)
    data = sample_random(imin:imax, prod(size), replace = TRUE)
    array(data = data, dim = size)
}



# matrix summaries =========================================

#' Template for matrix summaries along a given dimension
#' @param fcn_all function to apply when dim = 'all'
#' @param fcn_1 function to apply when dim = '1', i.e. over rows
#' @param fcn_2 function to apply when dim = '2', i.e. over columns
#' @return function that computes summary statistics of a matrix 
#' over all entries, rows or columns
template_summary = function(fcn_all, fcn_1, fcn_2) {
    function(x, dim = 'all', ...) {
        x = as.matrix(x)
        dim = as.character(dim)
        switch(dim,
               'all' = fcn_all(x, ...),
               '1'   = fcn_1(x, ...),
               '2'   = fcn_2(x, ...),
               stop('dim not supported'))
    }
}


#' Sum
#' @param x matrix
#' @param dim dimension on which to operate along
#' @param ... additional parameters
#' @export
get_sum = template_summary(fcn_all = base::sum, 
                           fcn_1 = matrixStats::colSums2, 
                           fcn_2 = matrixStats::rowSums2)


#' Mean
#' @param x matrix
#' @param dim dimension on which to operate along
#' @param ... additional parameters
#' @export
get_mean = template_summary(fcn_all = base::mean, 
                            fcn_1 = matrixStats::colMeans2, 
                            fcn_2 = matrixStats::rowMeans2)


#' Standard deviation
#' @param x matrix
#' @param dim dimension on which to operate along
#' @param ... additional parameters
#' @export
get_std = template_summary(fcn_all = stats::sd, 
                           fcn_1 = matrixStats::colSds, 
                           fcn_2 = matrixStats::rowSds)


#' Maximum
#' @param x matrix
#' @param dim dimension on which to operate along
#' @param ... additional parameters
#' @export
get_max = template_summary(fcn_all = base::max, 
                           fcn_1 = matrixStats::colMaxs, 
                           fcn_2 = matrixStats::rowMaxs)


#' Minimum
#' @param x matrix
#' @param dim dimension on which to operate along
#' @param ... additional parameters
#' @export
get_min = template_summary(fcn_all = base::min, 
                           fcn_1 = matrixStats::colMins, 
                           fcn_2 = matrixStats::rowMins)


#' Median
#' @param x matrix
#' @param dim dimension on which to operate along
#' @param ... additional parameters
#' @export
get_median = template_summary(fcn_all = stats::median,
                              fcn_1 = matrixStats::colMedians,
                              fcn_2 = matrixStats::rowMedians)


#' Interquartile range
#' @param x matrix
#' @param dim dimension on which to operate along
#' @param ... additional parameters
#' @export
get_iqr = template_summary(fcn_all = stats::IQR,
                           fcn_1 = matrixStats::colIQRs,
                           fcn_2 = matrixStats::rowIQRs)



# sparsification ==============================================

#' Sparsify array
#' @param Y array
#' @param mode binary or real entries?
#' @param frac_keep fraction of elements to keep
#' @param n_keep absolute number of elements to keep
#' @export
sparsify = function(Y, mode = c('binary', 'real'), 
                    frac_keep = NULL, n_keep = NULL) {
    
    Y = as.matrix(Y)
    
    mode = match.arg(mode)
    
    if ( is.null(frac_keep) && is.null(n_keep) ) {
        stop('at least one of `frac_keep` or `n_keep` must be provided.')
    }
    
    stopifnot( is.null(frac_keep) || (frac_keep >= 0 && frac_keep <= 1) )
    
    n = length(Y)
    
    if ( is.null(n_keep) ) {
        n_keep = floor(n * frac_keep)
    }
    
    ids = order(Y, decreasing = TRUE)
    ids = ids[1:n_keep]
    
    Ysparse = matrix(0, nrow(Y), ncol(Y))
    
    if (mode == 'binary') {
        Ysparse[ids] = 1
    } else if (mode == 'real') {
        Ysparse[ids] = Y[ids]
    } else {
        stop('unknown mode')
    }
    
    return(Ysparse)
    
}



# vectorization ============================================

#' Vectorize matrix
#' @param x matrix to vectorize
#' @export
vec = function(x) {
    x = as.matrix(x)
    c(x)
}



#' Remove diagonal from matrix and vectorize
#' @param x matrix to vectorize without the diagonal
#' @export
vec_no_diag = function(x) {
    x = as.matrix(x)
    n = nrow(x)
    x[-seq(1,n^2,n+1)]
}



#' Remove diagonal from matrix and vectorize
#' @param x matrix to vectorize without the diagonal
#' @export
vec_rm_diag = vec_no_diag



# tools for binary matrices ================================

#' Return unique values of array
#' @param x array
#' @export
unique_values = function(x) {
    unique(vec(x))
}


#' Return indices of the unique values
#' @param x array
#' @param MARGIN on which margin the function will be applied over
#' @export
unique_indices = function(x, MARGIN = 0) {
    ids = switch(as.character(MARGIN),
                 '1' = seq_len(nrow(x)),
                 '2' = seq_len(ncol(x)),
                 seq_along(x))
    ids[!duplicated(x, MARGIN = MARGIN)]
}


#' Check if an array has binary entries
#' @param x array or dataframe
#' @param allowed_values allowed values for the entries
#' @param na.rm remove NAs?
#' @export
is_binary = function(x, allowed_values = c(0, 1), na.rm = FALSE) {
    x_unique = unique_values(x)
    if ( isTRUE(na.rm) ) {
        x_unique = stats::na.omit(x_unique)
    }
    flag = all(x_unique %in% allowed_values)
    return(flag)
}


#' Split vector into chunks of equal size
#' @param x vector
#' @param n chunk size
#' @export
split_size = function(x, n) {
    
    dx = n / length(x)
    
    chunks = split(x, 
                cut(x, stats::quantile(x, probs = seq(0, 1, by = dx)), include.lowest=TRUE))
    
    unname(chunks)
}
unoe/noe documentation built on Nov. 5, 2019, 11:05 a.m.