#
# sparsebnUtils-functions.R
# sparsebnUtils
#
# Created by Bryon Aragam (local) on 1/22/16.
# Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
#
#
# PACKAGE SPARSEBNUTILS: Functions
#
# CONTENTS:
# check_if_matrix
# check_if_data_matrix
# check_if_complete_data
# check_if_numeric_data
# check_null
# check_na
# count_nas
# list_classes
# auto_generate_levels
# auto_count_levels
# check_list_class
# check_list_numeric
# check_list_names
# col_classes
# cor_vector
# capitalize
# recode_levels
# convert_factor_to_discrete
# format_list
# cor_vector
# cor_vector_ivn
# pmatch_numeric
#
#' @name sparsebn-functions
#' @rdname sparsebn-functions
#'
#' @param x a compatible object.
#' @param m a \code{matrix}.
#' @param df a \code{data.frame}.
#' @param li a \code{list}.
#' @param check.class \code{character} class name to compare against.
#' @param check.names \code{character} names to compare against.
#' @param X a matrix.
#' @param data a \code{data.frame}.
#' @param ivn list of interventions (see \code{\link{sparsebnData}}).
#' @param string a \code{character} string.
#' @param numnode \code{integer} number of nodes.
#' @param table table of values to compare against.
#' @param tol maximum tolerance used for matching.
#'
#' @title Utility functions
#'
#' @description Various utility functions for packages in the \code{sparsebn} family
#'
NULL
# Check if an object is EITHER matrix or Matrix object
#' @rdname sparsebn-functions
#' @export
check_if_matrix <- function(m){
is.matrix(m) || inherits(m, "Matrix")
} # END .CHECK_IF_MATRIX
# Check if an object is a valid dataset
#' @rdname sparsebn-functions
#' @export
check_if_data_matrix <- function(df){
is.data.frame(df) || check_if_matrix(df)
} # END .CHECK_IF_DATA_MATRIX
# Check if a dataset contains missing data
#' @rdname sparsebn-functions
#' @export
check_if_complete_data <- function(df){
(count_nas(df) == 0)
} # END .CHECK_IF_COMPLETE_DATA
# Check if a dataset contains only numeric (including integer) values
#' @rdname sparsebn-functions
#' @export
check_if_numeric_data <- function(df){
all(col_classes(df) %in% c("numeric", "integer"))
} # END .CHECK_IF_NUMERIC_DATA
# Check if an object contains any null values
#' @rdname sparsebn-functions
#' @export
check_null <- function(x){
any(unlist(lapply(x, is.null)))
} # END .CHECK_NULL
# Check if an object contains any missing values
#' @rdname sparsebn-functions
#' @export
check_na <- function(x){
suppressWarnings(any(unlist(lapply(x, is.na)))) # suppress warning about checking NULLs
} # END .CHECK_NA
# Count missing values in a matrix or data.frame
#' @rdname sparsebn-functions
#' @export
count_nas <- function(df){
if( !check_if_data_matrix(df)){
stop("Input must be a data.frame or a matrix!")
}
sum(is.na(df))
} # END .COUNT_NAS
# Return the types for each element in a list
#' @rdname sparsebn-functions
#' @export
list_classes <- function(li){
unlist(lapply(li, class))
} # END .LIST_CLASSES
# Return the different levels for each column in a data.frame, compatible
# with the 'levels' component of sparsebnData
#' @rdname sparsebn-functions
#' @export
auto_generate_levels <- function(df){
if( !check_if_data_matrix(df)){
stop("Input must be a data.frame or a matrix!")
}
if(!is.data.frame(df)) df <- data.frame(df)
out <- lapply(df, function(x) sort(unique(x)))
out
} # END AUTO_GENERATE_LEVELS
# Return the number of levels for each column in a data.frame
#' @rdname sparsebn-functions
#' @export
auto_count_levels <- function(df){
lapply(auto_generate_levels(df), length)
} # END .COUNT_NAS
# Return TRUE if every element of a list inherits check.class, FALSE otherwise
#' @rdname sparsebn-functions
#' @export
check_list_class <- function(li, check.class){
if(length(li) == 0){
warning("List contains no elements!")
TRUE # default to true if empty
}
all(unlist(lapply(li, function(x) inherits(x, check.class))))
} # END .CHECK_LIST_CLASS
# Return TRUE if every element of a list inherits numeric, FALSE otherwise
# In particular, if every component is integer, will still return TRUE due to quirks of R
# This is because is.numeric(1L) returns TRUE
#' @rdname sparsebn-functions
#' @export
check_list_numeric <- function(li){
if(length(li) == 0){
warning("List contains no elements!")
TRUE # default to true if empty
}
all(unlist(lapply(li, is.numeric)))
} # END .CHECK_LIST_NUMERIC
# Return TRUE if names(list) matches check.names, FALSE otherwise
#' @rdname sparsebn-functions
#' @export
check_list_names <- function(li, check.names){
if(length(li) == 0){
warning("List contains no elements!")
TRUE # default to true if empty
}
(length(li) == length(check.names)) && (names(li) == check.names)
} # END .CHECK_LIST_NAMES
# Output the class of each column in X, return as a character vector
#' @rdname sparsebn-functions
#' @export
col_classes <- function(X){
# if( !check_if_data_matrix(X)){
# stop("Input must be a data.frame or a matrix!")
# }
stopifnot(is.data.frame(X))
sapply(X, class)
} # END .COL_CLASSES
# Utility to capitalize the first letter in a string
# Borrowed verbatim from the 'Hmisc' package
#' @rdname sparsebn-functions
#' @export
capitalize <- function(string) {
capped <- grep("^[^A-Z]*$", string, perl = TRUE)
substr(string[capped], 1, 1) <- toupper(substr(string[capped],
1, 1))
return(string)
} # END CAPITALIZE
# Recode a factor to start at 0
#' @rdname sparsebn-functions
#' @export
recode_levels <- function(x){
stopifnot(is.factor(x))
levels(x) <- seq(0, length(levels(x)) - 1, 1) # convert levels to 0...k-1
x
}
# Convert a factor to integer with levels 0...n-1
#' @rdname sparsebn-functions
#' @export
convert_factor_to_discrete <- function(x){
f <- recode_levels(x)
as.numeric(levels(f))[f] # convert factor to numeric (see ?factor)
}
# Format a list for pretty printing
format_list <- function(x){
stopifnot(is.list(x))
if(is.null(names(x))){
row_names <- 1L:length(x)
} else{
row_names <- names(x)
}
### Adjust width of output based on longest string
row_width <- max(c(0, nchar(row_names))) + 4 # Row headers
row_width_str <- paste0("%-", row_width, "s") #
cell_width <- max(c(0, nchar(unlist(x)))) + 2 # Cell contents
cell_width_str <- paste0("%-", cell_width, "s") #
### Assemble string output
list.out <- mapply(function(x, y){
prefix <- paste0("[", x, "]")
# prefix <- sprintf("%-5s", prefix)
prefix <- sprintf(row_width_str, prefix)
if(is.numeric(y)) y <- round(y, 2)
# paste0(prefix, paste(sprintf("%-5s", sort(y)), collapse = ""))
paste0(prefix, paste(sprintf(cell_width_str, sort(y)), collapse = ""))
}, row_names, x)
list.out <- unlist(list.out)
list.out <- paste(list.out, collapse = " \n")
list.out <- paste0(list.out, "\n") # add trailing newline
list.out
}
# NOTE: Deprecated as of 3/2/17
cor_vector <- function(data){
.Deprecated(new = "cor_vector_ivn")
check.numeric <- (col_classes(data) != "numeric")
if( any(check.numeric)){
not.numeric <- which(check.numeric)
stop(paste0("Input columns must be numeric! Columns ", paste(not.numeric, collapse = ", "), " are non-numeric."))
}
if( any(dim(data) < 2)){
stop("Input must have at least 2 rows and columns!") # 2-8-15: Why do we check this here?
}
cors <- stats::cor(data)
cors <- cors[upper.tri(cors, diag = TRUE)]
cors
} # END COR_VECTOR
# Compute the correlation matrix of a dataset, and return the unduplicated elements (i.e. upper-triangular portions) as a vector
# Used as the primary "carrier of information" in ccdr since the algorithms only depends on pairwise correlations
# Works with intervention data as well
#
# Migrated from ccdrAlgorithm package
#' @rdname sparsebn-functions
#' @export
cor_vector_ivn <- function(data, ivn = NULL){
check.numeric <- check_if_numeric_data(data)
if(!check.numeric){
check.numeric <- (col_classes(data) %in% c("numeric", "integer"))
not.numeric <- which(!check.numeric)
stop(data_not_numeric(not.numeric))
}
if( any(dim(data) < 2)){
stop("Input must have at least 2 rows and columns!") # 2-8-15: Why do we check this here?
}
pp <- ncol(data)
## unique(unlist(list(NULL, NULL, ...))) returns NULL without error
## so checking list(NULL, NULL, ...) is equivalent to checking if ivnlabels is NULL
ivnlabels <- unique(unlist(ivn))
if(is.null(ivnlabels)) {
## i.e. purely observational
cors <- stats::cor(data)
cors <- cors[upper.tri(cors, diag = TRUE)]
return(list(cors = cors, indexj = rep(0L, pp + 1)))
} else {
## so there are at least some interventions
ivnj <- as.integer(c(ivnlabels, pp + 1))
# get all the j's that has interventions
# including pp+1 for observational rows (compatible with purely observational data)
# is this necessary if most are balanced designs where all nodes get intervention?
# any possible optimization?
len <- length(ivnj)
cors <- vector("list", len)
indexj <- as.integer(rep(len - 1, pp + 1)) # zero-based index for C compatibility
for(j in 1:(len - 1)) {
jj <- ivnj[j]
indexj[jj] <- j - 1
## extract rows where node jj has no intervetion
corsjj <- stats::cor(data[!sapply(lapply(ivn, is.element, jj), any), , drop = FALSE])
cors[[j]] <- corsjj[upper.tri(corsjj, diag = TRUE)]
}
corsjj <- stats::cor(data[!sapply(lapply(ivn, is.element, pp + 1), any), , drop = FALSE])
## do not change above line to cor(data[sapply(ivn, is.null), ])
## why?
cors[[len]] <- corsjj[upper.tri(corsjj, diag = TRUE)]
cors <- unlist(cors)
return(list(cors = cors, indexj = indexj))
}
} # END COR_VECTOR_IVN
# Partial matching for numeric values
# Returns the value in table closest to x, unless the minimum absolute distance
# exceeds tol (in which case NA is returned).
#
#' @rdname sparsebn-functions
#' @export
pmatch_numeric <- function(x, table, tol = 0.1){
absdiff <- abs(table - x)
idx <- which.min(absdiff)
if(absdiff[idx] < tol){
idx
} else{
NA
}
} # END PMATCH_NUMERIC
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.