matter: Out-of-core statistical computing and signal processing

Documented in array_ind avg eval_at eval_exprs hex2raw iQuote isoclos isofun linear_ind matter_defaults mem memcl memtime parse_formula pinv profmem raw2hex seq_rel shingles size_bytes sizeof uuid

#### Set up matter options ####
## ----------------------------

.onLoad <- function(libname, pkgname) {
	options(
		matter.compress.atoms = 3,
		matter.default.nchunks = 20L,
		matter.default.chunksize = NA_real_,
		matter.default.serialize = NA,
		matter.default.verbose = FALSE,
		matter.matmul.bpparam = NULL,
		matter.show.head = TRUE,
		matter.show.head.n = 6L,
		matter.coerce.altrep = FALSE,
		matter.wrap.altrep = FALSE,
		matter.temp.dir = tempdir(),
		matter.temp.gc = TRUE,
		matter.vizi.par = par_style_new(),
		matter.vizi.panelgrid = NULL,
		matter.vizi.trans3d = NULL,
		matter.vizi.engine = "base",
		matter.vizi.style = "light",
		matter.vizi.dpal = "Tableau 10",
		matter.vizi.cpal = "Viridis")
}

matter_defaults <- function(nchunks = 20L, chunksize = NA_real_,
	serialize = NA, verbose = FALSE)
{
	if ( !missing(nchunks) ) {
		nchunks <- as.integer(nchunks)
		if ( !isTRUE(nchunks > 0L) || length(nchunks) != 1L )
			matter_error("nchunks must be a single positive number")
		options(matter.default.nchunks=nchunks)
	} else {
		nchunks <- getOption("matter.default.nchunks")
	}
	if ( !missing(chunksize) ) {
		if ( isFALSE(chunksize > 0L) || length(chunksize) != 1L )
			matter_error("chunksize must be a single positive number or NA")
		options(matter.default.chunksize=as_size(chunksize))
	} else {
		chunksize <- getOption("matter.default.chunksize")
	}
	if ( !missing(serialize) ) {
		if ( !is.logical(serialize) || length(serialize) != 1L )
			matter_error("serialize must be a single logical value")
		options(matter.default.serialize=serialize)
	} else {
		serialize <- getOption("matter.default.serialize")
	}
	if ( !missing(verbose) ) {
		if ( !isTRUE(verbose) && !isFALSE(verbose) )
			matter_error("verbose must be TRUE or FALSE")
		options(matter.default.verbose=verbose)
	} else {
		verbose <- getOption("matter.default.verbose")
	}
	defaults <- list(nchunks=nchunks, chunksize=chunksize,
		serialize=serialize, verbose=verbose)
	if ( nargs() > 0L ) {
		invisible(defaults)
	} else {
		defaults
	}
}

#### Environments and closures ####
## ---------------------------------

matter_ns <- environment(NULL)

matter_env <- function() matter_ns

copy_env <- function(envir, parent = baseenv()) {
	copy <- new.env(parent=parent)
	for ( name in ls(envir=envir) )
		assign(name, get(name, envir=envir), envir=copy)
	copy
}

isofun <- function(fun, envir = baseenv()) {
	environment(fun) <- envir
	fun
}

isoclos <- function(fun, envir = baseenv()) {
	enclos <- copy_env(environment(fun), parent=envir)
	environment(fun) <- enclos
	fun
}

#### Normalize subscripts ####
## ----------------------------

as_subscripts <- function(i, x, exact = TRUE) {
	if ( missing(i) )
		return(NULL)
	if ( is.logical(i) )
		i <- which(rep_len(i, length(x)))
	if ( !is.numeric(i) )
		if ( exact ) {
			i <- match(i, names(x))
		} else {
			i <- pmatch(i, names(x))
		}
	i
}

as_array_subscripts <- function(i, x, margin, exact = TRUE) {
	if ( missing(i) )
		return(NULL)
	if ( is.list(i) || missing(margin) ) {
		i <- rep_len(i, length(dim(x)))
		return(lapply(seq_along(i), function(j)
			as_array_subscripts(i[[j]], x, j, exact)))
	}
	if ( is.null(i) ) {
		i <- seq_len(dim(x)[margin])
	} else if ( is.logical(i) ) {
		i <- which(rep_len(i, dim(x)[[margin]]))
	} else if ( !is.numeric(i) ) {
		if ( exact ) {
			i <- match(i, dimnames(x)[[margin]])
		} else {
			i <- pmatch(i, dimnames(x)[[margin]])
		}
	}
	i
}

as_row_subscripts <- function(i, x, exact = TRUE) {
	as_array_subscripts(i, x, 1L, exact)
}

as_col_subscripts <- function(i, x, exact = TRUE) {
	as_array_subscripts(i, x, 2L, exact)
}

linear_ind <- function(index, dim, rowMaj = FALSE) {
	if ( is.null(index) )
		return(seq_len(prod(dim)))
	if ( is.list(index) ) {
		for ( j in seq_along(dim) ) {
			if ( is.null(index[[j]]) )
				index[[j]] <- seq_len(dim[j])
		}
		ans.dim <- lengths(index)
		if ( rowMaj ) {
			index <- as.matrix(rev(expand.grid(rev(index))))
		} else {
			index <- as.matrix(expand.grid(index))
		}
	} else {
		ans.dim <- NULL
	}
	index <- as.matrix(index)
	for ( j in seq_along(dim) ) {
		i <- index[,j]
		if ( any(!is.na(i) & (i <= 0 | i > dim[j])) )
			matter_error("subscript out of bounds")
	}
	if ( rowMaj ) {
		strides <- c(rev(cumprod(rev(dim[-1L]))), 1L)
	} else {
		strides <- c(1L, cumprod(dim[-length(dim)]))
	}
	index <- ((index - 1L) %*% strides) + 1L
	if ( !is.null(ans.dim) ) {
		if ( rowMaj ) {
			ord <- aperm(array(seq_along(index), rev(ans.dim)))
		} else {
			ord <- array(seq_along(index), ans.dim)
		}
		storage.mode(ord) <- typeof(index)
		attr(index, "order") <- ord
	}
	index
}

array_ind <- function(i, dim, rowMaj = FALSE) {
	i <- i - 1L
	if ( rowMaj ) {
		strides <- c(rev(cumprod(rev(dim[-1L]))), 1L)
	} else {
		strides <- c(1L, cumprod(dim[-length(dim)]))
	}
	index <- matrix(nrow=length(i), ncol=length(dim))
	for ( j in seq_along(dim) ) {
		nextind <- i %/% strides[j]
		index[,j] <- nextind %% dim[j]
	}
	index + 1L
}

subset_list <- function(x, i) {
	f <- function(y) {
		if ( length(y) > 1L ) {
			y[i]
		} else {
			y
		}
	}
	lapply(x, f)
}

#### Data type codes and type conversions ####
## ---------------------------------------------

make_code <- function(codes, x, nomatch = NA_integer_) {
	if ( missing(x) )
		x <- character()
	if ( is.factor(x) && setequal(levels(x), codes) )
		return(x)
	if ( !is.numeric(x) )
		x <- pmatch(tolower(x), codes, nomatch, TRUE)
	factor(x, levels=seq_along(codes), labels=codes)
}

get_Rtypes <- function() {
	c("raw", "logical", "integer",
		"double", "character", "list")
}

get_Ctypes <- function() {
	c("char", "uchar", "int16", "uint16",
		"int32", "uint32", "int64", "uint64",
		"float32", "float64")
}

get_Caliases <- function() {
	c(short="int16", ushort="uint16",
		`16-bit integer`="int16",
		`16-bit unsigned integer`="uint16",
		int="int32", uint="uint32",
		`32-bit integer`="int32",
		`32-bit unsigned integer`="uint32",
		long="int64", ulong="uint64",
		`64-bit integer`="int64",
		`64-bit unsigned integer`="uint64",
		float="float32", double="float64",
		`32-bit float`="float32",
		`64-bit float`="float64")
}

as_Rtype <- function(x) {
	if ( !missing(x) ) {
		if ( is_Rtype(x, strict=TRUE) )
			return(x)
		if ( !is_Rtype(x) ) {
			if ( is_Ctype(x) )
				return(to_Rtype(as_Ctype(x)))
			if ( is_Calias(x) )
				return(to_Rtype(unalias_Ctype(x)))
		}
	}
	if ( !missing(x) && is.character(x) ) {
		# allow 'numeric' as synonym for 'double'
		i <- pmatch(x, "numeric", 0L, TRUE)
		i <- which(as.logical(i))
		if ( length(i) > 0 )
			x[i] <- "double"
	}
	make_code(get_Rtypes(), x)
}

as_Ctype <- function(x) {
	if ( !missing(x) ) {
		if ( is_Ctype(x, strict=TRUE) )
			return(x)
		if ( !is_Ctype(x) ) {
			if ( is_Rtype(x) )
				return(to_Ctype(as_Rtype(x)))
			if ( is_Calias(x) )
				x <- unalias_Ctype(x)
		}
	}
	make_code(get_Ctypes(), x)
}

is_Rtype <- function(x, strict = FALSE) {
	codes <- get_Rtypes()
	valid <- is.factor(x) && setequal(levels(x), codes)
	valid || (all(x %in% codes) && !strict)
}

is_Ctype <- function(x, strict = FALSE) {
	codes <- get_Ctypes()
	valid <- is.factor(x) && setequal(levels(x), codes)
	valid || (all(x %in% codes) && !strict)
}

is_Calias <- function(x) {
	aliases <- names(get_Caliases())
	any(x %in% aliases)
}

unalias_Ctype <- function(x) {
	aliases <- get_Caliases()
	ifelse(x %in% names(aliases), aliases[x], x)
}

to_Rtype <- function(x) {
	codes <- c(char = "character", uchar = "raw",
		int16 = "integer", uint16 = "integer",
		int32 = "integer", uint32 = "integer",
		int64 = "double", uint64 = "double",
		float32 = "double", float64 = "double")
	as_Rtype(codes[as.integer(as_Ctype(x))])
}

to_Ctype <- function(x) {
	codes <- c(raw = "uchar", logical = "int32",
		integer = "int32", numeric = "float64",
		character = "char", list = NA_character_)
	as_Ctype(codes[as.integer(as_Rtype(x))])
}

sizeof <- function(x) {
	sizes <- c(char = 1L, uchar = 1L, int16 = 2L, uint16 = 2L,
		int32 = 4L, uint32 = 4L, int64 = 8L, uint64 = 8L,
		float32 = 4L, float64 = 8L)
	sizes[as.integer(as_Ctype(x))]
}

topmode_Rtype <- function(x) {
	x <- as_Rtype(x)
	codes <- levels(x)
	as_Rtype(codes[max(as.integer(x))])
}

#### Codes for C-level switch statements ####
## ------------------------------------------

as_Ops <- function(x) {
	codes <- c(
		# Arith (1-7)
		"+", "-", "*", "^", "%%", "%/%", "/",
		# Compare (8-13)
		"==", ">", "<", "!=", "<=", ">=",
		# Logic (14-16)
		"&", "|", "!",
		# Math (17+)
		"log", "log10", "log2", "log1p", "exp")
	make_code(codes, x)
}

as_Summary <- function(x) {
	codes <- c(
		# Summary (1-7)
		"max", "min", "range", "prod", "sum", "any", "all",
		# Statistics (8-11)
		"mean", "var", "sd", "nnzero")
	make_code(codes, x)
}

as_run_type <- function(x) {
	codes <- c("drle", "rle", "seq")
	make_code(codes, x[1L], nomatch=1L)
}

as_tol <- function(x) {
	if ( !is.null(attr(x, "tol_type")) )
		return(x)
	tol <- x[1L]
	codes <- c("absolute", "relative")
	if ( !is.null(names(tol)) ) {
		tol_type <- pmatch(names(tol), codes, nomatch=1L)
	} else {
		tol_type <- 1L
	}
	tol_type <- factor(tol_type, levels=seq_len(2L), labels=codes)
	structure(as.double(tol), tol_type=tol_type)
}

as_tol_ref <- function(x) {
	codes <- c("abs", "x", "y")
	make_code(codes, x[1L], nomatch=1L)
}

as_interp <- function(x) {
	codes <- c(
		# simple interp (1-5)
		"none", "sum", "mean", "max", "min",
		# peak-based (6)
		"area",
		# spline interp (7-8)
		"linear", "cubic",
		# kernel interp (9-10)
		"gaussian", "lanczos")
	make_code(codes, x)
}

as_binstat <- function(x) {
	codes <- c(
		# location (1-4)
		"sum", "mean", "max", "min",
		# spread (5-7)
		"sd", "var", "mad",
		# other (8-9)
		"quantile", "sse")
	make_code(codes, x[1L], nomatch=1L)
}

as_dist <- function(x) {
	codes <- c(
		"euclidean", "maximum",
		"manhattan", "minkowski")
	make_code(codes, x[1L], nomatch=1L)
}

as_size <- function(x) {
	if ( is.null(names(x)) ) {
		x[1L]
	} else {
		size_bytes(unname(x[1L]), units=names(x[1L]))
	}
}

#### Data structure utility functions ####
## ---------------------------------------

as_real_memory_matrix <- function(x) {
	if ( is.matrix(x) || is(x, "Matrix") ) {
		x
	} else {
		matter_warn("coercing input to a local matrix")
		as.matrix(x)
	}
}

I <- function(x) {
	if ( is.null(x) ) {
		NULL
	} else {
		base::I(x)
	}
}

non_null <- function(x) !vapply(x, is.null, logical(1L))

is_null_or_na <- function(x) is.null(x) || is.na(x)

is_discrete <- function(x) {
	is.factor(x) || is.character(x) || is.logical(x)
}

update_attr <- function(x, attr) {
	for ( nm in names(attr) )
		attr(x, nm) <- attr[[nm]]
	x
}

drop_attr <- function(x, keep.names = TRUE) {
	y <- as.vector(x)
	dim(y) <- dim(x)
	if ( keep.names ) {
		dimnames(y) <- dimnames(x)
		names(y) <- names(x)
	}
	y
}

set_names <- function(x, nm, i) {
	if ( !missing(i) && !is.null(i) )
		nm <- nm[i]
	names(x) <- nm
	x
}

combine_names <- function(x1, x2) {
	nm1 <- names(x1)
	nm2 <- names(x2)
	if ( is.null(nm1) && is.null(nm2) )
		return(names(x1))
	nm1 <- nm1 %||% character(length(x1))
	nm2 <- nm2 %||% character(length(x2))
	c(nm1, nm2)
}

combine_any <- function(x, ...)
{
	if ( ...length() > 0 ) {
		do.call(combine, list(x, ...))
	} else {
		x
	}
}

set_dimnames <- function(x, dnm, index) {
	if ( !missing(index) && !is.null(index) )
		dnm <- subset_dimnames(dnm, index)
	dimnames(x) <- dnm
	x
}

subset_dimnames <- function(dnm, index) {
	for ( i in seq_along(index) ) {
		j <- index[[i]]
		if ( !is.null(dnm[[i]]) && !is.null(j) )
			dnm[[i]] <- dnm[[i]][j]
	}
	dnm
}

cbind_dimnames <- function(x1, x2) {
	c1 <- colnames(x1)
	c2 <- colnames(x2)
	if ( is.null(c1) && is.null(c2) )
		return(dimnames(x1))
	c1 <- c1 %||% character(ncol(x1))
	c2 <- c2 %||% character(ncol(x2))
	list(rownames(x1), c(c1, c2))
}

rbind_dimnames <- function(x1, x2) {
	r1 <- rownames(x1)
	r2 <- rownames(x2)
	if ( is.null(r1) && is.null(r2) )
		return(dimnames(x1))
	r1 <- r1 %||% character(nrow(x1))
	r2 <- r2 %||% character(nrow(x2))
	list(c(r1, r2), colnames(x1))
}

cbind_any <- function(..., deparse.level = 1)
{
	if ( ...length() == 1L )
		return(...elt(1))
	x <- ...elt(1)
	y <- ...elt(2)
	if ( ...length() > 2L )	{
		more <- list(...)[-c(1L, 2L)]
		cbind2(x, do.call(cbind2, c(list(y), more)))
	} else {
		cbind2(x, y)
	}
}

rbind_any <- function(..., deparse.level = 1)
{
	if ( ...length() == 1L )
		return(...elt(1))
	x <- ...elt(1)
	y <- ...elt(2)
	if ( ...length() > 2L )	{
		more <- list(...)[-c(1L, 2L)]
		rbind2(x, do.call(rbind2, c(list(y), more)))
	} else {
		rbind2(x, y)
	}
}

check_comformable_lengths <- function(x, y) {
	if ( is.vector(x) ) {
		return(check_comformable_lengths(y, x))
	} else if ( length(y) != 1 && length(y) != length(x) ) {
		return("argument length is non-conformable with array length")
	}
	TRUE
}

check_comformable_dims <- function(x, y, margin = 1L) {
	if ( is.vector(x) ) {
		return(check_comformable_dims(y, x))
	} else if ( length(y) != 1 && length(y) != dim(x)[margin] ) {
		return("argument length is non-conformable with array dimensions")
	}
	TRUE
}

normalize_lengths <- function(list) {
	if ( length(list) > 0L ) {
		ns <- lengths(list)
		nmax <- max(ns)
		if ( any(ns != nmax) )
			list <- lapply(list, rep_len, length.out=nmax)
	}
	list
}

normalize_lengths2 <- function(list1, list2) {
	n1 <- length(list1)
	n2 <- length(list1)
	nmax <- max(n1, n2)
	if ( length(list1) != nmax )
		list1 <- rep_len(list1, nmax)
	if ( length(list2) != nmax )
		list2 <- rep_len(list2, nmax)
	ns1 <- lengths(list1)
	ns2 <- lengths(list2)
	nsmax <- pmax(ns1, ns2)
	if ( any(ns1 != nsmax) )
		list1 <- Map(rep_len, list1, nsmax)
	if ( any(ns2 != nsmax) )
		list2 <- Map(rep_len, list2, nsmax)
	list(list1, list2)
}

array2list <- function(X, MARGIN) {
	apply(X, MARGIN, identity, simplify=FALSE)
}

nlines <- function(x) {
	xsub <- gsub("\n", "", x, fixed=TRUE)
	nchar(x) - nchar(xsub) + 1L
}

#### Show utility functions ####
## -----------------------------

show_matter_mem <- function(x) {
	size <- size_bytes(object.size(x))
	size <- format(size, units="auto")
	if ( is.matter(x) ) {
		shm <- format(shm_used(x), units="auto")
		vm <- format(vm_used(x), units="auto")
		cat("(",
			size, " real", " | ",
			shm, " shared", " | ",
			vm, " virtual)\n", sep="")
	} else {
		cat("(", size, " real)\n", sep="")
	}
}

paste_head <- function(x, n=getOption("matter.show.head.n"), collapse=" ") {
	if ( length(x) > n ) {
		paste0(paste0(head(x, n=n), collapse=collapse), " ...")
	} else {
		paste0(x, collapse=collapse)
	}
}

preview_vector_data <- function(x, n = getOption("matter.show.head.n"), ...) {
	hdr <- head(x, n=n)
	out <- format(hdr, ...)
	more <- length(x) > length(hdr)
	if ( !is.null(names(hdr)) ) {
		nms <- names(hdr)
	} else {
		nms <- paste0("[", seq_along(hdr), "]")
	}
	if ( more ) {
		out <- c(out, "...")
		nms <- c(nms, "...")
	}
	if ( length(x) > 0L ) {
		matrix(out, nrow=1, dimnames=list("", nms))
	} else {
		matrix("", nrow=1, dimnames=list("", "[0]"))
	}
}

preview_vector <- function(x, n = getOption("matter.show.head.n"), ...) {
	print(preview_vector_data(x, n, ...), quote=FALSE, right=TRUE)
}

preview_matrix_data <- function(x, n = getOption("matter.show.head.n"), ...) {
	more_i <- nrow(x) > n
	more_j <- ncol(x) > n
	if ( more_i ) {
		i <- 1:n
	} else {
		i <- 1:nrow(x)
	}
	if ( more_j ) {
		j <- 1:n
	} else {
		j <- 1:ncol(x)
	}
	hdr <- x[i,j,drop=FALSE]
	out <- matrix(format(hdr, ...), nrow=nrow(hdr), ncol=ncol(hdr))
	if ( !is.null(rownames(x)) ) {
		rnm <- rownames(x)[i]
	} else {
		rnm <- paste0("[", seq_along(i), ",]")
	}
	if ( !is.null(colnames(x)) ) {
		cnm <- colnames(x)[j]
	} else {
		cnm <- paste0("[,", seq_along(j), "]")
	}
	if ( more_i ) {
		out <- rbind(out, "...")
		rnm <- c(rnm, "...")
	}
	if ( more_j ) {
		out <- cbind(out, "...")
		cnm <- c(cnm, "...")
	}
	dimnames(out) <- list(rnm, cnm)
	out
}

preview_matrix <- function(x, n = getOption("matter.show.head.n"), ...) {
	print(preview_matrix_data(x, n, ...), quote=FALSE, right=TRUE)
}

preview_Nd_array <- function(x, n = getOption("matter.show.head.n"), ...) {
	more_i <- nrow(x) > n
	more_j <- ncol(x) > n
	if ( more_i ) {
		i <- 1:n
	} else {
		i <- 1:nrow(x)
	}
	if ( more_j ) {
		j <- 1:n
	} else {
		j <- 1:ncol(x)
	}
	extra <- rep(1L, length(dim(x)) - 2L)
	inds <- c(list(i, j), as.list(extra))
	hdr <- do.call("[", c(list(x), inds, list(drop=FALSE)))
	out <- matrix(format(hdr, ...), nrow=nrow(hdr), ncol=ncol(hdr))
	if ( !is.null(rownames(x)) ) {
		rnm <- rownames(x)[i]
	} else {
		rnm <- paste0("[", seq_along(i), ",]")
	}
	if ( !is.null(colnames(x)) ) {
		cnm <- colnames(x)[j]
	} else {
		cnm <- paste0("[,", seq_along(j), "]")
	}
	if ( more_i ) {
		out <- rbind(out, "...")
		rnm <- c(rnm, "...")
	}
	if ( more_j ) {
		out <- cbind(out, "...")
		cnm <- c(cnm, "...")
	}
	dimnames(out) <- list(rnm, cnm)
	cat(paste0(c("", "", extra), collapse=", "), "\n")
	print(out, quote=FALSE, right=TRUE)
	if ( prod(dim(x)[-c(1,2)]) > 1L ) {
		dots <- ifelse(dim(x)[-c(1,2)] > 1L, "...", "")
		cat(paste0(c("", "", dots), collapse=", "), "\n")
	}
}

preview_list <- function(x, n = getOption("matter.show.head.n"), ...) {
	n1 <- min(n, length(x))
	for ( i in 1:n1 ) {
		xi <- x[[i]]
		if ( is.null(dim(xi)) ) {
			fmt <- preview_vector_data(xi, n, ...)
		} else {
			fmt <- preview_matrix_data(xi, n, ...)
		}
		if ( !is.null(names(x)) ) {
			nm <- paste0("$", names(x)[i])
		} else {
			nm <- paste0("[[", i, "]]")
		}
		if ( nrow(fmt) > 1L )
			nm <- c(nm, character(nrow(fmt) - 1L))
		rownames(fmt) <- nm
		print(fmt, quote=FALSE, right=TRUE)
	}
	if ( length(x) > n1 )
		cat("...\n")
}

preview_recursive <- function(x, n = getOption("matter.show.head.n"), ...) {
	n1 <- min(n, length(x))
	for ( i in 1:n1 ) {
		xi <- x[[i]]
		if ( !is.null(names(x)) ) {
			cat("$", names(x)[i], "\n", sep="")
		} else {
			cat("[[", i, "]]\n", sep="")
		}
		preview_list(xi)
		if ( i < n1 )
			cat("\n")
	}
	if ( length(x) > n1 )
		cat("...\n")
}

preview_table <- function(x, n = getOption("matter.show.head.n"), cls = NULL, ...) {
	more_i <- nrow(x) > n
	more_j <- ncol(x) > n
	if ( more_i ) {
		i <- 1:n
	} else {
		i <- 1:nrow(x)
	}
	if ( more_j ) {
		j <- 1:n
	} else {
		j <- 1:ncol(x)
	}
	hdr <- x[i,j,drop=FALSE]
	out <- as.matrix(hdr)
	out <- matrix(format(out, ...), nrow=nrow(out), ncol=ncol(out))
	if ( is.null(cls) ) {
		cls <- vapply(hdr, function(xj) class(xj)[1L], character(1L))
	} else {
		cls <- cls[j]
	}
	cls <- paste0("<", cls, ">")
	if ( !is.null(rownames(x)) ) {
		rnm <- rownames(x)[i]
	} else {
		rnm <- paste0("[", seq_along(i), ",]")
	}
	if ( !is.null(colnames(x)) ) {
		cnm <- colnames(x)[j]
	} else {
		cnm <- paste0("[,", seq_along(j), "]")
	}
	if ( more_i ) {
		out <- rbind(out, "...")
		rnm <- c(rnm, "...")
	}
	if ( more_j ) {
		out <- cbind(out, "...")
		cnm <- c(cnm, "...")
		cls <- c(cls, "")
	}
	out <- rbind(cls, out)
	rnm <- c("", rnm)
	dimnames(out) <- list(rnm, cnm)
	print(out, quote=FALSE, right=TRUE, ...)
}

#### Miscellaneous internal functions ####
## ---------------------------------------

apply_int <- function(X, MARGIN, FUN, FUN.VALUE, ...) {
	FUN <- match.fun(FUN)
	if ( !MARGIN %in% c(1L, 2L) )
		matter_error("MARGIN must be 1 or 2")
	switch(MARGIN,
		vapply(seq_len(nrow(X)), function(i)
			FUN(X[i,,drop=TRUE], ...), FUN.VALUE),
		vapply(seq_len(ncol(X)), function(j)
			FUN(X[,j,drop=TRUE], ...), FUN.VALUE))
}

bplapply_int <- function(X, FUN, ..., BPPARAM = NULL) {
	if ( is.null(BPPARAM) ) {
		ans <- vector("list", length=length(X))
		for ( i in seq_along(X) )
			ans[[i]] <- FUN(X[[i]], ...)
		set_names(ans, names(X))
	} else {
		bplapply(X, FUN, ..., BPPARAM=BPPARAM)
	}
}

# A sequence with half-bin-widths in relative units
# x = bin center, y = half-width, d = relative diff
# y[n] = d * x[n]
# y[n+1] = d * (x[n] - y[n])) / (1 - d)
# x[n+1] = x[n] + y[n] + y[n+1]
# => x[n] ((1 + d) / (1 - d))^n * x[0]
# log x[n] = n log {(1 + d) / (1 - d)} + log x[0]
# => n = (log x[n] - log x[0]) / log {(1 + d) / (1 - d)}
seq_rel <- function(from, to, by) {
	half <- by / 2
	length.out <- (log(to) - log(from)) / log((1 + half) / (1 - half))
	length.out <- floor(1 + length.out)
	i <- seq_len(length.out)
	from * ((1 + half) / (1 - half))^(i - 1)
}

# Cleveland style shingles (i.e., overlapping intervals)
shingles <- function(x, breaks, overlap = 0.5, labels = NULL)
{
	if ( !is.matrix(breaks) )
		breaks <- co.intervals(x, number=breaks, overlap=overlap)
	y <- apply(breaks, 1L, function(b) I(which(b[1L] <= x & x <= b[2L])))
	binner <- function(i) which(breaks[i,1L] <= x & x <= breaks[i,2L])
	y <- lapply(seq_len(nrow(breaks)), binner)
	if ( is.null(labels) ) {
		labeller <- function(b) paste0("[", b[1L], ",", b[2L], "]")
		labels <- apply(breaks, 1L, labeller)
	} else if ( length(labels) != length(y) ) {
		matter_error("length of labels not equal to length of breaks")
	}
	attr(y, "breaks") <- breaks
	attr(y, "counts") <- lengths(y)
	attr(y, "mids") <- rowMeans(breaks)
	names(y) <- labels
	y
}

# calculate mean/median/mode
avg <- function(x, center = mean)
{
	x <- x[!is.na(x)]
	if ( is.numeric(x) || is.complex(x) ) {
		center <- match.fun(mean)
		y <- center(x)
	} else {
		ux <- unique(x)
		y <- ux[which.max(tabulate(match(x, ux)))]
	}
	unname(y)
}

# encode a dummy (one-hot) variable
encode_dummy <- function(x, drop = TRUE) {
	x <- as.factor(x)
	if ( drop )
		x <- droplevels(x)
	v <- levels(x)
	d <- matrix(0L, nrow=length(x), ncol=length(v))
	for ( i in seq_along(v) )
		d[x == v[i],i] <- 1L
	dimnames(d) <- list(names(x), v)
	d
}

n_unique <- function(x, na.rm = TRUE) {
	x <- unique(as.vector(x))
	if ( is.atomic(x) && na.rm ) {
		length(x[!is.na(x)])
	} else {
		length(x)
	}
}

# soft thresholding
soft <- function(x, t) sign(x) * pmax(abs(x) - t, 0)

# get predicted classes from scores
predict_class <- function(scores) {
	i <- seq_len(ncol(scores))
	if ( is.null(colnames(scores)) ) {
		labs <- i
	} else {
		labs <- colnames(scores)
	}
	cls <- apply(scores, 1L, which.max)
	factor(cls, levels=i, labels=labs)
}

# matrix pseudoinverse based on MASS::ginv
pinv <- function(x, tol = sqrt(.Machine$double.eps))
{
	x <- as.matrix(x)
	sv <- svd(x)
	pos <- sv$d > max(tol * sv$d[1L], 0)
	if ( all(pos) ) {
		sv$v %*% (1 / sv$d * t(sv$u))
	} else if ( !any(pos) ) {
		array(0, dim(x)[2L:1L])
	} else {
		sv$v[,pos,drop=FALSE] %*% (t(sv$u[,pos,drop=FALSE]) / sv$d[pos])
	}
}

# fix file paths
fix_path <- function(path, winslash = "\\", mustWork = NA) {
	file <- !is_shared_memory_pattern(path)
	if ( any(file) )
		path[file] <- normalizePath(path[file],
			winslash=winslash, mustWork=mustWork)
	path
}

# check file paths
same_path <- function(path1, path2) {
	path1 <- fix_path(path1, mustWork=FALSE)
	path2 <- fix_path(path2, mustWork=FALSE)
	identical(path1, path2)
}

#### Utilities for raw bytes and memory ####
## -----------------------------------------

# convert between 'raw' and 'character'

raw2char <- function(x, multiple = FALSE, encoding = "unknown") {
	y <- rawToChar(as.raw(x), multiple=multiple)
	Encoding(y) <- encoding
	y
}

char2raw <- function(x) {
	charToRaw(as.character(x))
}

# convert between 'raw' and hexadecimal strings

hex2raw <- function(x) {
	x <- tolower(gsub("[^[:alnum:] ]", "", x))
	sst <- strsplit(x, "")[[1]]
	hex <- paste0(sst[c(TRUE, FALSE)], sst[c(FALSE, TRUE)])
	codes <- factor(hex, levels=as.character(as.raw(0:255)))
	as.raw(as.integer(codes) - 1L)
}

raw2hex <- function(x, uppercase = FALSE) {
	hex <- paste0(as.character(x), collapse="")
	if ( uppercase ) {
		toupper(hex)
	} else {
		hex
	}
}

# create a uuid
uuid <- function(uppercase = FALSE)
{
	oseed <- getRNGStream()
	on.exit(setRNGStream(oseed))
	setRNGStream(get_shared_RNGStream())
	hex <- as.raw(0:255)
	version <- hex[65:80] # 0100 xxxx (version 4)
	variant <- hex[129:192] # 10xx xxxx (variant 1)
	time_low <- sample(hex, 4L, TRUE)
	time_hi <- sample(hex, 2L, TRUE)
	time_hi_and_version <- c(sample(version, 1L), sample(hex, 1L))
	clock_seq_hi_and_res <- sample(variant, 1L)
	clock_seq_low <- sample(hex, 1L)
	node <- sample(hex, 6L, TRUE)
	set_shared_RNGStream(getRNGStream()$seed)
	bytes <- c(time_low, time_hi, time_hi_and_version,
		clock_seq_hi_and_res, clock_seq_low, node)
	string <- c(
		paste0(as.character(time_low), collapse=""),
		paste0(as.character(time_hi), collapse=""),
		paste0(as.character(time_hi_and_version), collapse=""),
		paste0(as.character(clock_seq_hi_and_res),
			as.character(clock_seq_low), collapse=""),
		paste0(as.character(node), collapse=""))
	if ( uppercase ) {
		string <- toupper(paste0(string, collapse="-"))
	} else {
		string <- paste0(string, collapse="-")
	}
	list(string=string, bytes=bytes)
}

# creates internal S3 class 'size_bytes'
# (similar to 'object_size' but works w/ vectors)
size_bytes <- function(x, units = "B") {
	if ( inherits(x, "size_bytes") )
		return(x)
	units <- match.arg(units,
		c("bytes", "B", "KB", "MB", "GB", "TB", "PB"))
	x <- set_names(as.numeric(x), names(x))
	x <- switch(units,
		"bytes" = , "B" = x,
		"KB" = x * 1000,
		"MB" = x * 1000^2,
		"GB" = x * 1000^3,
		"TB" = x * 1000^4,
		"PB" = x * 1000^5)
	class(x) <- "size_bytes"
	x
}

# based on utils:::format.object_size
format.size_bytes <- function(x, units = "auto", ...)
{
	units <- match.arg(units,
		c("auto", "B", "KB", "MB", "GB", "TB", "PB"))
	if ( all(is.na(x)) ) {
		mx <- NA_real_
	} else {
		mx <- max(x, na.rm=TRUE)
	}
	if ( units == "auto" )
		units <- if ( is.na(mx) )
			" "
		else if ( mx >= 1000^5 )
			"PB"
		else if ( mx >= 1000^4 )
			"TB"
		else if ( mx >= 1000^3 )
			"GB"
		else if ( mx >= 1000^2 )
			"MB"
		else if ( mx >= 1000 )
			"KB"
		else "B"
	sizes <- switch(units,
		" " = , "B" = x,
		"KB" = roundup(x / 1000, 2L),
		"MB" = roundup(x / 1000^2, 2L),
		"GB" = roundup(x / 1000^3, 2L),
		"TB" = roundup(x / 1000^4, 2L),
		"PB" = roundup(x / 1000^5, 2L))
	label <- switch(units, "B"="bytes", units)
	set_names(paste(sizes, label), names(x))
}

print.size_bytes <- function(x, units = "auto",
	quote = FALSE, right = TRUE, ...) 
{
	print.default(format(x, units=units),
		quote=quote, right=right, ...)
}

`[.size_bytes` <- function(x, i, j, ..., drop=FALSE)
{
	structure(NextMethod(), class="size_bytes")
}

roundup <- function(x, digits = 0) {
	ceiling(10^digits * x) / 10^(digits)
}

mem <- function(x, reset = FALSE)
{
	if ( missing(x) ) {
		cell.size <- c(Ncells=56, Vcells=8)
		gc.result <- gc(reset=reset)
		gc.cols <- c(1L, ncol(gc.result) - 1L)
		real <- unname(colSums(gc.result[,gc.cols] * cell.size))
		temp <- sum(unname(sizeof_file_resource(owned=TRUE)))
		shm.used <- sum(unname(sizeof_memory_resource(owned=TRUE)))
		if ( reset ) {
			set_shared_memory_max(shm.used)
			set_shared_file_max(temp)
			shm.max <- shm.used
		} else {
			shm.max <- update_shared_memory_max()
		}
		shared <- unname(c(shm.used, shm.max))
		mem <- c(
			"real"=real[1L],
			"shared"=shared[1L],
			"max real"=real[2L],
			"max shared"=shared[2L],
			"temp"=temp)
	} else {
		size <- as.numeric(object.size(x))
		shm <- as.numeric(shm_used(x))
		vm <- as.numeric(vm_used(x))
		mem <- c("real"=size, "shared"=shm, "virtual"=vm)
	}
	size_bytes(mem)
}

memcl <- function(cl = bpparam(), reset = FALSE)
{
	if ( is(cl, "SnowParam") && !is(cl, "MulticoreParam") )
		cl <- bpbackend(cl)
	if ( !inherits(cl, "cluster") )
		matter_error("cannot monitor memory for cluster of class: ",
			sQuote(class(cl)))
	if ( inherits(cl, "NULLcluster") ) {
		matter_warn("cluster is NULL; has it been started yet?")
		return(data.frame())
	}
	if ( reset ) {
		ans <- clusterEvalQ(cl, matter::mem(reset=TRUE))
	} else {
		ans <- clusterEvalQ(cl, matter::mem(reset=FALSE))
	}
	node <- vapply(cl, function(worker) worker$host, character(1L))
	ans <- do.call(rbind, ans)
	ans <- apply(ans, 2L, size_bytes, simplify=FALSE)
	ans <- c(list(node=node), ans)
	data.frame(ans, check.names=FALSE)
}

memtime <- function(expr, verbose = NA, BPPARAM = NULL)
{
	if ( is.na(verbose) )
		verbose <- getOption("matter.default.verbose")
	expr <- substitute(expr)
	expr_string <- paste0(deparse(expr), collapse="\n")
	mem.cols <- seq_len(4L)
	start <- mem(reset=TRUE)[mem.cols]
	matter_log(tstamp(), verbose=verbose)
	if ( !is.null(BPPARAM) )
	{
		if ( !bpisup(BPPARAM) ) {
			managed.cl <- TRUE
			matter_log("Starting cluster...", verbose=verbose)
			bpstart(BPPARAM)
		} else {
			managed.cl <- FALSE
		}
		if ( inherits(BPPARAM, "SnowParam") ) {
			monitored.cl <- TRUE
			start.cl <- memcl(BPPARAM, reset=TRUE)
		} else {
			monitored.cl <- FALSE
		}
	}
	matter_log("Timing started at ", format(Sys.time(), "%X"),
		verbose=verbose)
	matter_log(expr_string, verbose=verbose)
	t.start <- proc.time()
	eval(expr, parent.frame())
	rm(expr)
	t.end <- proc.time()
	matter_log("Timing ended at ", format(Sys.time(), "%X"),
		verbose=verbose)
	if ( !is.null(BPPARAM) )
	{
		if ( monitored.cl ) {
			matter_log("Collecting cluster memory use...", verbose=verbose)
			end.cl <- memcl(BPPARAM, reset=FALSE)
		}
		if ( managed.cl ) {
			matter_log("Stopping cluster...", verbose=verbose)
			bpstop(BPPARAM)
		}
	}
	matter_log(tstamp(), verbose=verbose)
	end <- mem(reset=FALSE)[mem.cols]
	change <- c(
		"real"=unname(end["real"] - start["real"]),
		"shared"=unname(end["shared"] - start["shared"]))
	result <- list(
		"start"=start, "end"=end,
		"change"=size_bytes(change))
	overhead <- c(
		"real"=unname(end["max real"] - end["real"]),
		"shared"=unname(end["max shared"] - end["shared"]))
	total <- unname(sum(end[["max real"]]) + sum(end[["max shared"]]))
	if ( !is.null(BPPARAM) )
	{
		if ( monitored.cl ) {
			total.cl <- sum(end.cl[["max real"]])
			result$cluster <- list(
				"start"=start.cl, "end"=end.cl,
				"total"=size_bytes(total.cl))
			overhead["real"] <- overhead["real"] + total.cl
			total <- total + total.cl
		}
	}
	result$overhead <- size_bytes(overhead)
	result$total <- size_bytes(total)
	result$time <- t.end - t.start
	structure(result, class="memtime")
}

tstamp <- function(pre = "##------ ", post = " ------##") {
	paste0(pre, date(), post)
}

profmem <- function(expr)
{
	.Defunct("memtime")
}

#### Formula parsing ####
## ----------------------

iQuote <- function(x, q = "`")
{
	if ( length(x) > 0 ) {
		paste0(q, as.character(x), q)
	} else {
		character(0L)
	}
}

parse_formula <- function(formula, envir = NULL, eval = !missing(envir))
{
	e <- environment(formula)
	if ( length(formula) == 2L ) {
		rhs <- formula[[2L]]
		lhs <- NULL
	} else if ( length(formula) == 3L ) {
		rhs <- formula[[3L]]
		lhs <- formula[[2L]]
	}
	if ( length(rhs) == 1L ) {
		# single-term rhs that doesn't include |
		g <- NULL
		rhs <- rhs
	} else if ( length(rhs) == 3L && deparse1(rhs[[1L]]) != "|" ) {
		# rhs includes multiple terms but not |
		g <- NULL
		rhs <- rhs
	} else if ( length(rhs) == 3L && deparse1(rhs[[1L]]) == "|" ) {
		# rhs includes | so add condition
		g <- rhs[[3]]
		rhs <- rhs[[2]]
	} else {
		# failsafe
		g <- NULL
	}
	# parse lhs
	if ( !is.null(lhs) )
		lhs <- parse_side(lhs)
	if ( eval )
		for ( i in seq_along(lhs) )
			lhs[[i]] <- eval(lhs[[i]], envir=envir, enclos=e)
	# parse rhs
	if ( !is.null(rhs) )
		rhs <- parse_side(rhs)
	if ( eval )
		for ( i in seq_along(rhs) )
			rhs[[i]] <- eval(rhs[[i]], envir=envir, enclos=e)
	# parse condition
	if ( !is.null(g) )
		g <- parse_side(g)
	if ( eval )
		for ( i in seq_along(g) )
			g[[i]] <- eval(g[[i]], envir=envir, enclos=e)
	list(lhs=lhs, rhs=rhs, g=g)
}

parse_side <- function(formula, envir = NULL, eval = FALSE)
{
	enclos <- environment(formula)
	if ( length(formula) != 1L ) {
		if ( deparse1(formula[[1L]]) %in% c("~", "*", "+", ":") ) {
			side <- lapply(as.list(formula)[-1L], parse_side)
		} else if ( deparse1(formula[[1L]]) == "I" ) {
			side <- list(formula[[2L]])
		} else {
			side <- list(formula)
		}
	} else {
		side <- list(formula)
	}
	if ( is.list(side) ) {
		side <- unlist(side, recursive=TRUE)
		names(side) <- vapply(side, deparse1, character(1L))
	}
	if ( eval ) {
		for ( i in seq_along(side) )
			side[[i]] <- eval(side[[i]], envir=envir, enclos=enclos)
	}
	side
}

eval_exprs <- function(exprs, data, i = NULL, j = NULL,
	split_along = NULL, group = NULL, reduce = "+",
	recursive = !is.null(split_along))
{
	ans <- vector("list", length=length(exprs))
	for ( k in seq_along(exprs) )
	{
		ans[[k]] <- eval_at(exprs[[k]], data=data, i=i, j=j,
			split_along=split_along, group=group, reduce=reduce,
			recursive=recursive)
	}
	attr(ans, "recursive") <- recursive
	names(ans) <- names(exprs)
	ans
}

eval_at <- function(expr, data, i = NULL, j = NULL,
	split_along = NULL, group = NULL, reduce = "+",
	recursive = !is.null(split_along))
{
	vars <- all.vars(expr)
	data <- lapply(vars,
		function(nm) {
			x <- data[[nm]]
			if ( !is.null(i) && !is.null(j) ) {
				if ( is.null(dim(x)) ) {
					x <- subset_list(x[i], j)
				} else {
					x <- x[i,j,drop=FALSE]
				}
			} else if ( !is.null(i) ) {
				if ( is.null(dim(x)) ) {
					x <- x[i]
				} else {
					x <- x[i,,drop=FALSE]
				}
			} else if ( !is.null(j) ) {
				if ( is.null(dim(x)) ) {
					x <- subset_list(x, j)
				} else {
					x <- x[,j,drop=FALSE]
				}
			}
			if ( !is.null(split_along) )
				x <- array2list(x, split_along)
			if ( !is.null(group) ) {
				FUN <- match.fun(reduce)
				group <- factor(group, levels=unique(group))
				x <- lapply(levels(group),
					function(g) Reduce(FUN, x[group %in% g]))
				names(x) <- levels(group)
			}
			x
		})
	if ( recursive ) {
		EVAL <- function(...)
		{
			datalist <- list(...)
			names(datalist) <- vars
			eval(expr, envir=datalist)
		}
		do.call(Map, c(list(EVAL), data))
	} else {
		names(data) <- vars
		eval(expr, envir=data)
	}
}
kuwisdelu/matter documentation built on April 12, 2025, 2:41 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
kuwisdelu/matter
Out-of-core statistical computing and signal processing

R/utils.R
In kuwisdelu/matter: Out-of-core statistical computing and signal processing

Documented in array_ind avg eval_at eval_exprs hex2raw iQuote isoclos isofun linear_ind matter_defaults mem memcl memtime parse_formula pinv profmem raw2hex seq_rel shingles size_bytes sizeof uuid

R Package Documentation

Browse R Packages

We want your feedback!

kuwisdelu/matter Out-of-core statistical computing and signal processing

R/utils.R In kuwisdelu/matter: Out-of-core statistical computing and signal processing

Documented in array_ind avg eval_at eval_exprs hex2raw iQuote isoclos isofun linear_ind matter_defaults mem memcl memtime parse_formula pinv profmem raw2hex seq_rel shingles size_bytes sizeof uuid

R Package Documentation

Browse R Packages

We want your feedback!

kuwisdelu/matter
Out-of-core statistical computing and signal processing

R/utils.R
In kuwisdelu/matter: Out-of-core statistical computing and signal processing