R/main.R
In garray: Generalized Array Arithmetic for Ragged Arrays with Named Margins

#options(error=recover)
#source("R/zzz.R")

#' Generalized and smart array
#'
#' Creates or tests for generalized arrays.
#'
#' Generalized arrays are generalized because they handle dimensions and
#'	subdimensions that are ragged; and they are also smart
#'	because they automatically match dimensions by margins
#'	(names of dimnames).
#' Margins is implemented similar to R's native class "table", i.e.,
#'	use names of dimnames to store the margins
#'
#' Attribute sdim denotes subdimensions, which are the subdivision of
#'	dimensions or grouping of members of a dimension, for organizing a
#'	ragged array.
#'	It is a named list of numeric vectors, each of which indicates the
#'	lengths of subdivision groups within a dimension.  Every name of the
#'	list prefixed with a margin of the generalized array. By the matching
#'	of sdim names and dim names, utility functions figure out which
#'	dimensions the sub dimensions reside in.  Sum of a vector of the
#'	list usually equals to the extent of the corresponding dimension.
#'	If they are not equal and the extent can not be divided exactly
#'	by the sum, the subdimension is invalid and will be dropped.
#'	If the extent can be divided exactly
#'	by the sum, the subdimension is still valid but non-canonical.
#'	Non-canonical subdimension can be provided to `garray()` and `sdim<-`
#'	as argument, and the two functions canonicalize it.
#'	Other utility functions cannot handle non-canonical subdimension,
#'	thus manually constructing objects of garray class is permitted but
#'	dangerous.
#'	Values of each vector of the list denotes the
#'	repeating times of subdimension residing in the coresponding dimension
#'	(called superdim). 
#'	More than 1 subdimension reside in the same superdim is allowed.
#'	This feature allows dividing a subdimension further,
#'	organizing the subdims
#'	into hierachy.
#'
#' By definition and for S3 dispatching, `class(.)="garray"` is required, but
#'	simple arrays with proper margins actually work correctly with most
#'	functionalities of this package.  For the sake of compatibility and
#'	reducing warning message, `is.garray.duck()` tests whether the array
#'	has proper margins.
#'
#' A still problem is that attributes in R are fragile, even indexing
#'	will drop most attributes.  Utility functions and methods for
#'	dispatching for 'garray' implemented in this package guaranttee to
#'	save the margins (names of dimnames) and subdimension (attr(*,'sdim')).
#' @param data  Usually a simple array, and can be a vector without dimensions.
#' @param dim  An integer vector giving the maximal indices in each dimension.
#' @param dimnames  A list (or it will be ignored) with for each dimension
#'	one component, either ‘NULL’ or a character vector.
#' @param margins  Override the names of dim and of dimnames.
#' @param sdim  Optional, a named list of numeric vectors indicating the
#'	subdivision of some of the dimensions.  The value vill become,
#'	after validated, the attribute sdim.  See 'Details'
#'	and '?sdim'.
#' @param x  An R object.
#' @param ...  Additional arguments to be passed to or from methods.
#' @examples
#'	a1 <- garray(1:27, c(A=3,B=9), sdim=list(AA=c(a=2,b=1),BB=c(a=3)))
#'	a2 <- garray(1:27, c(A=3,B=9), sdim=list(AA=c(a=2,b=1),BB=c(a=4)))
garray <- function(data, dim=NULL, dimnames=NULL,
		margins=NULL, sdim=attr(data, "sdim", exact=TRUE)) {
	if (is.null(dim)) {
		data <- as.array(data) 
		d <- dim(data)	# array() keeps the names of dim.
		dn <- if (is.null(dimnames)) dimnames(data) else dimnames
	} else {
		d <- dim
		names(dim) <- NULL
		data <- array(data, dim, dimnames)
		dn <- dimnames(data)
	}
	if (is.null(dn)) dn <- vector("list", length(d))
	if (is.null(margins)) {
		margins <- if (!is.null(names(dn))) names(dn)
			else if (!is.null(names(d))) names(d)
			else stop("need margins")
	}
	names(dn) <- margins
	dimnames(data) <- dn
	class(data) <- "garray"
	sdim(data) <- sdim
	data
}

#' @describeIn garray  A simple and faster version of garray(),
#'	mainly for internal usage.  Note that garray() is not generic function,
#'	thus garray.array() will never be called by dispatching.
garray.array <- function(x, sdim) {
	class(x) <- "garray"
	stopifnot(is.garray.duck(x))
	sdim(x) <- sdim
	x
}

#' @rdname garray
as.garray <- function(x, ...) UseMethod("as.garray")

#' @rdname garray
as.garray.garray <- function(x, ...)
	if (is.garray(x)) x else stop("broken array")
# Issuing a stop because this function is called by S3 dispatching.

#' @rdname garray
as.garray.default <- function(x, ...) garray(x, ...)

#' @describeIn garray  `is.garray` do simple validation, no check for validity
#'	of sdim because it is too expensive. 
#'	Operation of sdim by this package is always guaranteed the validity.
is.garray <- function(x) {
	n <- names(dimnames(x))
	is.array(x)&&!is.null(n)&&!anyNA(n)&&all(""!=n)&&inherits(x, "garray")
	#warning("a valid array but no class")
}

#' @describeIn garray  `is.garray.duck` do duck-typing validation, ignoring
#'	the class
is.garray.duck <- function(x) {
	n <- names(dimnames(x))
	is.array(x)&&!is.null(n)&&!anyNA(n)&&all(""!=n)
}	# duck-typing validation without warning

#' @describeIn garray  Test whether the vector or array is actually a scalar
#'	(`length(x)==1L`).
is.scalar <- function(x) { 1L==length(x) }

#' Coerce to a Data Frame
#'
#' Convert a 2D generalized array into a data.frame,
#'	making `print()` work correctly.
#' @param x  A generalized array object.
#' @param row.names,optional,stringsAsFactors,...  See the same arguments in
#'	?as.data.frame.
#' @param col.names  'NULL' or a character vector giving the column names.
as.data.frame.garray <- function(x, row.names=NULL, optional=FALSE,
		col.names=NULL, ..., stringsAsFactors=FALSE) {
	stopifnot(2==length(dim(x)))	# Make View(x) work for 2-D array.
	row.names <- if (is.null(row.names)) rownames(x)
	col.names <- if (is.null(col.names)) colnames(x)
	if (!is.null(spd <- .superdim(x))) {
		sd <- sdim(x)
		if (is.null(row.names)) {
			row.names <- do.call("paste", c(lapply(sd[names(
				spd[spd==margins(x)[1]])], function(reptimes) {
					rep.int(names(reptimes), reptimes)
				}), list(1:nrow(x)), sep="|"))
		}
		if (is.null(col.names)) {
			col.names <- do.call("paste", c(lapply(sd[names(
				spd[spd==margins(x)[2]])], function(reptimes) {
					rep.int(names(reptimes), reptimes)
				}), list(1:ncol(x)), sep="|"))
			colnames(x) <- col.names
		}	# as.data.frame.matrix() not has option col.names
	}
	as.data.frame.matrix(unclass(x), row.names, optional, ...)
}

#' Print Values
#'
#' Print out a generalized array and returns it _invisibly_.
#' @param x  A generalized array object.
#' @param ...  Additional arguments to be passed to or from methods.
print.garray <- function(x, ...) {
	dn <- dimnames(x)
	if (1==length(dn)&&is.null(dn[[1]])) cat(names(dn), sep="\n")
	NextMethod("print")
}


#' The margins and dimensions of a generalized array object
#'
#' Margins means the names of dimnames of an array.
#' `margins<-` and `remargins` are for renaming, but
#'	`margins<-` ignores the names of `value` while
#'	`remargins` according to the names of `value` renames the margins. Doing
#'	so, `remargins` may also keep sdim.  `margins<-` always removes sdim.
#'	For `remargins` the length of value can be shorter than that of the
#'	margins if the value has names.
#' @param x  A generalized array.
#' @param value  A character vector will become the margins (names of dimnames)
#'	of the generalized array.
#'	`margins<-` ignores the names of `value` while
#'	`remargins` according to the names of `value` renames the margins.
#'	For `remargins` the length of value can be shorter than that of the
#'	margins if the value has names.
margins <- function(x) {
	if (!is.garray(x)) stop("margins of invalid array may be incorrect")
	names(dimnames(x))
}
# `margins()` used to be `names()`, but I realized that even an array
#	can have attribute `names`, which is by default retieved by `names()`.

#' @rdname margins
`margins<-` <- function(x, value) {
	if (!is.array(x)) {
		stop("set margins of non-array")
	} else {
		if (!inherits(x, "garray")) class(x) <- "garray"
	}	# try fix it.
	if (is.null(value)||anyNA(value)||any(""==value))
		stop("illegal margins")
	dn <- dimnames(x)
	if (is.null(dn)) dn <- vector("list", length(value))
	attr(x, "sdim") <- NULL
	names(dn) <- value
	dimnames(x) <- dn	# if value not match dim(x), stop here
	x
}

#' @rdname margins
remargins <- function(x, value) {
	if (!is.array(x)) {
		stop("set margins of non-array")
	} else {
		if (!inherits(x, "garray")) class(x) <- "garray"
	}	# try fix it.
	if (is.null(value)||anyNA(value)||any(""==value))
		stop("illegal margins")
	dn <- dimnames(x)
	if (is.null(dn)) {
		dn <- vector("list", length(value))
	} else if (!is.null(nn <- names(value))) {
		sd <- sdim(x)
		if (!is.null(sd)) {
			spd <- .superdim(x)
			names(sd) <- vapply(names(sd), function(k) {
				if (spd[k]%in%nn) substr(k, 1, length(spd[k])
					) <- value[spd[k]]
				k
			}, "")
		}
		m <- names(dn)
		value <- value[m]
		m[!is.na(value)] <- value[!is.na(value)]
		value <- m
		attr(x, "sdim") <- sd
	} else {
		attr(x, "sdim") <- NULL
	}
	names(dn) <- value
	dimnames(x) <- dn
	x
}

#' Dimensions of a generalized array
#'
#' Retrieve or set the dimension of a generalized array.
#' 
#' The functions `dim` and `dim<-` are internal generic primitive functions.
#' Here `dim.garray` and `dim<-.garray` are methods for 'garray's, which
#'	returns and setting with the named dimensions (margins).  The two
#'	function is usually used as, for example, `dim(arr)` and
#'	`dim(arr) <- c(A=3,B=2)`.
#' Native R saves the names of dim but seldom uses it.  However, it is
#'	undocumented and not stable because some functions 
#'	  discard it (like: t()) .  This package will totally neglect it but
#'	keeps the margins in dimnames.
#' @param x  An generalized array.
#' @param value  An integer (can be coerced from double numeric) vector, with
#'	names.
#' @aliases dim dim<-
`dim.garray` <- function(x) {
	d <- NextMethod("dim")
	names(d) <- margins(x)	# names of dim() is kept by dimnames(). 
	d	# dim(x) can have names, but I do not respect it.
}

#' @rdname dim.garray
`dim<-.garray` <- function(x, value) {
	n <- names(value)
	if (is.null(n)||anyNA(n)||any(""==n)) {
		warning("array assigned dim without names")
		class(x) <- NULL
		# Passing garray object into a function that not awares of
		#	garray and changes dim may issue the warning.  To avoid
		#	it, unclass() the object and then passing.
		NextMethod("dim<-")
	} else {
		dn <- vector("list", length(n))
		names(dn) <- n
		dn[n] <-dimnames(x)[n]	# save dimnames
		names(value) <- NULL	# keep names(attr(x,"dim")) NULL
		x <- NextMethod("dim<-")	# default dim() remove dimnames
		dimnames(x) <- dn
		x
	}
}

#' Indexing for the garray class
#'
#' Indexing along margins as usual `[.array`, and along subdim.
#'
#' @aliases [ [<- [.garray [<-.garray
#' @usage
#'	\method{[}{garray}(..., drop=TRUE)
#'	#`[.garray`(..., drop=TRUE)
#'	#x[i]
#'	#x[i,j,...,drop=TRUE]
#'	#x[m]
#'	#x[l]
#'	#x[M=i,N=j,...]
#FIXME: uncommenting the above usage results in Warning by devtools::check()
#' @param x  A generalized array from which elements are extracted or replaced.
#' @param i,j,m,l,M,N,...  In addition to the native styles (`i`, `j`, etc.)
#'	accepted by `[`, can be:
#'	1. a matrix `m` with column names,
#'		which (`colnames(m)`) is a permutation of margins of the array.
#	    #TODO: a missing margin means select all along that margin
#	#TODO: 1.2 - the colnames(.) can be from names of sdim, the return
#	#	will be a list of arrays.
#'	2. an list `l <- list(i,j,...)`, can be unnamed or named,
#'		where NULL means to select all;
#'	3. arguments with names (`M`, `N`, etc), where `NULL` and missing
#'		means to select all.
#'
#'	These extensions make indexing 3 times slower than native indexing.
#'	Since it is hard to assign MissingArg in list(), at the moment
#'	MissingArg is only safe in native R subsettting style.
#'	Using NULL to select all like MissingArg is actually not consistent
#'	in semantic of other uses of NULL. As shown by what `c()` returns,
#'	NULL is a generalized form of `logical(0)`, `integer(0)`,
#'	and `character(0)`, all of which means to select none when indexing.
#'	So take care of NULL if indexing with variables.
#' @param drop  Whether indeces where 1==dim are removed.  Different from
#'	R's native `[`, a garray will become a garray or scalar, never a vector.
#' @param value  An array or a scalar.
#' @examples
#'	mm <- matrix(c(1:3,1), 2, 2, dimnames=list(NULL, c("B","A")))
#'	a <- garray(1:27, c(A=3,B=9), sdim=list(AA=c(a=2,b=1),BB=c(a=3)))
#'	b <- a[mm]
#'	c1 <- a[B=1:2,A=NULL]
#'	c2 <- a[B=1:2,A=]
#'	c3 <- a[B=1:2]
#'	c4 <- a[list(B=1:2)]
#'	c5 <- a[list(B=1:2,A=NULL)]
#'	c6 <- a[list(NULL,1:2)]
#'	d1 <- a[,] ; d1[B=1:2,A=NULL]       <- c1*10
#'	d2 <- a[,] ; d2[B=1:2,A=]           <- c1*10
#'	d3 <- a[,] ; d3[B=1:2]              <- c1*10
#'	d4 <- a[,] ; d4[list(B=1:2)]        <- c1*10
#'	d5 <- a[,] ; d5[list(B=1:2,A=NULL)] <- c1*10
#'	d6 <- a[,] ; d6[B=1:2,A=NULL] <- 1
#'	d7 <- a[,] ; d7[mm] <- 1000
#'	d8 <- a[,] ; d8[mm] <- 1:2*1000
#'	e1 <- a[AA=1,drop=FALSE]
#'	e11 <- a[AA=c(1,1),drop=FALSE]
#'	e2 <- a[AA="b",drop=FALSE]
#'	ebb <- a[AA=c("b","b"),drop=FALSE]
#'	e3 <- a[,] ; e3[AA="b"] <- e2*10
#'	e33 <- a[,] ; e33[AA=c("b","b")] <- c(e2*0.1, e2*100)
#'	# Work in the same manner of `e33[c(3,3),] <- c(e2*0.1, e2*100)`.
#'	e4 <- a[A=c(TRUE,FALSE,FALSE),drop=FALSE]
#'	e5 <- a[A=TRUE,drop=FALSE]
#'	e6 <- a[B=c(TRUE,FALSE,FALSE),drop=FALSE]
#'	e7 <- a[AA=TRUE,drop=FALSE]
#'	e8 <- a[AA=c(TRUE,FALSE),drop=FALSE]
`[.garray` <- function(..., drop=TRUE) {
	n <- margins(..1)
	arg <- match.call()[-c(1:2)]	# fast sys.call() not work for `[`(...)
	arg$drop <- NULL
	argn <- names(arg)
	exist2 <- !missing(..2)&&(is.null(argn)||""==argn[1])
	if (exist2&&is.matrix(..2)&&is.numeric(..2)&&
			1==length(arg)&&!is.null(colnames(..2))) {
		nn <- match(n, colnames(..2))
		if (anyNA(nn)||0L<anyDuplicated(nn)) {
			Z <- NextMethod("[", drop=FALSE)
		} else {
			Z <- .subset(..1, ..2[,nn])
		}
	} else if ((!is.null(argn)&&all(""!=argn))||
			(exist2&&is.list(..2)&&1==length(arg))) {
		pf <- parent.frame()
		idx <- .index_canonical(..1, lapply(arg, function(i)
			if (is.symbol(i)&&""==as.character(i)) NULL
			else eval(i, pf)))
		idx[vapply(idx, is.null, TRUE)] <- list(quote(quote(expr=)))
		# double quote(), for do.call() and .subset().
		Z <- do.call(".subset", c(list(..1), idx, drop=FALSE))
	} else {
		Z <- NextMethod("[", drop=FALSE)
	}	# `[.array` not keep margins if elements of dimnames is NULL
	d <- dim(Z)
	if (length(n)==length(d)) {
		names(d) <- n
		spd <- .superdim(..1)
		sd <- sdim(..1)[names(spd)[spd%in%names(d)[d==dim(..1)&1L!=d]]]
		if (drop) d <- d[1L!=d]
		if (0L==length(d)) return(as.vector(Z))
		class(Z) <- "garray"
		dim(Z) <- d
		sdim(Z, warn=FALSE) <- sd
	}
	Z
}

#' @usage
#'	#\method{[}{garray}(...) <- value
#'	#`[<-.garray`(..., value)
#'	#x[i] <- value
#'	#x[i,j,...] <- value
#'	#x[m] <- value
#'	#x[l] <- value
#'	#x[M=i,N=j,...] <- value
#' @rdname sub-.garray
`[<-.garray` <- function(..., value) {
	cl <- oldClass(..1)
	n <- margins(..1)
	arg <- match.call()[-c(1:2)]
	arg$value <- NULL
	argn <- names(arg)
	exist2 <- !missing(..2)&&(is.null(argn)||""==argn[1])
	matrix2 <- exist2&&is.matrix(..2)&&is.numeric(..2)&&1==length(arg)
	if (!matrix2&&is.garray(value)) {
		sd <- c(sdim(..1), sdim(value))
		d <- dim.garray(value)	# d is named
		if (0L<length(newname <- n[!n%in%names(d)])) {
			dd <- rep.int(1L, length(newname))
			names(dd) <- newname
			d <- c(d, dd)
			dim(value) <- d
		}
		value <- aperm(value, perm=n)
	}
	if (matrix2&&!is.null(colnames(..2))) {
		nn <- match(n, colnames(..2))
		if (anyNA(nn)||0L<anyDuplicated(nn)) {
			Z <- NextMethod("[<-")
		} else {
			Z <- `[<-`(unclass(..1), ..2[,nn], value=value)
			#NextMethod("[<-", ..1, ..2[,nn], value=value) not work
		}
	} else if ((!is.null(argn)&&all(""!=argn))||
			(exist2&&is.list(..2)&&1==length(arg))) {
		pf <- parent.frame()
		idx <- .index_canonical(..1, lapply(arg, function(i)
			if (is.symbol(i)&&""==as.character(i)) NULL
			else eval(i, pf)))
		idx[vapply(idx, is.null, TRUE)] <- list(quote(quote(expr=)))
		if (is.garray(value)) value <- .rep.garray(value, dim(do.call(
			".subset", c(list(..1), idx, drop=FALSE))), sd)
		Z <- do.call("[<-", c(list(unclass(..1)), idx, list(value=
			value)))	# Need a fast method for dim(value[...])
		# Need subsetting sdim(Z)
	} else {
		if (is.garray(value)) value <- .rep.garray(value,
			dim(.subset(..., drop=FALSE)), sd)
		Z <- NextMethod("[<-")
	}
	#sdim(Z) <- sdim(..1)
	class(Z) <- cl
	Z
}

.index_canonical <- function(x, idx) {
# In the extension style of indexing, I use NULL to denote something like
#	MissingArg and avoid non-standard evaluation in this function.
	n <- margins(x)
	idxn <- names(idx)
	if (is.list(idx[[1]])&&(is.null(idxn)||""==idxn[1])) {
		stopifnot(1==length(idx))
		idx <- idx[[1]]
		if (is.null(names(idx))) return(idx) else idxn <- names(idx)
	}
	d <- dim(x)
	l <- vector("list", length(n))
	l[] <- list(NULL)
	names(l) <- n
	idx_not_sd <- idxn%in%n
	l[idxn[idx_not_sd]] <- idx[idx_not_sd]
	if (!all(idx_not_sd)) {
		spd <- .superdim(x)
		sd <- sdim(x)
		for (k in idxn[!idx_not_sd]) {
			reptimes <- sd[[k]]
			if (is.null(reptimes))
				stop(sprintf("no sdim named `%s`", k))
			offset <- cumsum(reptimes)-reptimes
			i <- idx[[k]]
			if (is.logical(i)) i <- seq_along(reptimes)[i]
			l[[spd[k]]] <- if (1L<length(i))
				do.call("c", lapply(i, function(j)
					seq_len(reptimes[j])+offset[j]))
				else if (1L==length(i))
					seq_len(reptimes[i])+offset[i]
		}
	}
	names(l) <- NULL
	l
}


#' Subdimensions of an array
#'
#' Retireve or set the subdimension of an array.
#'
#' Validation of subdimension is expensive because its consistency with dim need
#'	checking.  Thus most of functions do not validate it. 
#'	Operations of subdimension with functions discussed here are guaranteed to
#'	be always keeping the consistency.
#' @param x  A generalized array.
#' @param warn  Whether issue warning when some of subdimensions are invalid
#'	and get dropped.
#' @param value A named list of numeric vectors indicating the
#'	subdivision of some of the dimensions.  The value vill become,
#'	after validated, the attribute sdim.  See '?garray'.
#' @return  The subdimensions (a non-empty list) or NULL
sdim <- function(x) {
	if (!is.garray(x)) {
		warning("sdim of non-array cannot be retrieved")
		return(NULL)
	}
	attr(x, "sdim", exact=TRUE)
}

#' @rdname sdim
`sdim<-` <- function(x, warn=TRUE, value) {
	if (0L==length(value)) {
		attr(x, "sdim") <- NULL
		return(x)
	} else if (!is.list(value)) {
		warning("sdim should be a non-empty list with names") 
		return(x)
	} else if (!is.garray(x)) warning("assign sdim to non-array object")
	d <- dim.garray(x)
	spd <- .validate_sdim(d, value, warn=warn)
	if (0L<length(spd)) {
		nsd <- names(spd)
		names(nsd) <- nsd	# so lapply() return named list
		attr(x, "sdim") <- lapply(nsd, function(k) {
			reptimes <- as.integer(value[[k]])
			na <- names(value[[k]])
			ti <- d[spd[k]]%/%sum(reptimes)
			if (1L<ti) {
				reptimes <- rep.int(reptimes, ti)
				if (!is.null(na)) na <- make.unique(
					rep.int(na, ti))
			}
			names(reptimes) <- na
			reptimes
		})
		# If `sdim<-`() always save canonical reptimes, I do not need
		#	`rep(reptimes, length.out[k]/sum(sd[[k]]))` later.
	} else {
		attr(x, "sdim") <- NULL
	}
	x
}


# Supressing or elevating subdims
#
# @param superdim  Named character.
#	If a value is "" or NA, the name should be the name of some subdim. And
#	if the reptimes of the subdimension equal an integer, the subdimension will be
#	elevated into a margin; if the reptimes are not equal, stop.
#	If a value is some margin, the name should be some other margin,
#	the later will become a subdimension (renamed to "value.name") residing in
#	the former margin.
#TODO: resdim <- function(x, superdim) { }


# Extract and validate superdim for compatible with subdim.
#	Extract is cheap, but validation is expensive.
.superdim <- function(x) {
	sd <- sdim(x)
	if (!is.list(sd) || 0L==length(sd)) return(character())
	sd[] <- names(sd)	# sd has names, which is respected by vapply().
	n <- margins(x)		# dim names (margins and superdim)
	vapply(sd, function(k) n[startsWith(k, n)], "")
	# If there are inconsistant superdim, vapply() will error.
}
.validate_sdim <- function(d, sd, warn=TRUE) {
	if (!is.list(sd) || 0L==length(sd)) return(character())
	n <- names(d)	# margins names (superdim), d can be dim of non garray
	m <- names(sd)	# subdimension names
	stopifnot(is.numeric(d), !is.null(n), !anyNA(n), ""!=n,
		!is.null(m), !anyNA(m), ""!=m)
	starts <- outer(m, n, startsWith)
	nsuperdim <- rowSums(starts)
	if (warn&&any(1!=nsuperdim)) warning(sprintf(
		"sdim not prefixed with exactly 1 dim: %s (%s)",
		paste(m[1!=nsuperdim], collapse=", "), paste(n, collapse=", ")))
	spd <- vapply(seq_along(m), function(i) n[starts[i,]][1], "")
	names(spd) <- m
	idx <- vapply(names(spd), function(k) {
		if (is.na(spd[k])) return(FALSE)
		s <- sum(sd[[k]])
		if (0L==d[spd[k]]%%s) TRUE else {
			if (warn) warning(sprintf(
				"sdim %s (sum=%d) unmatch margin %s (dim=%d)",
				k, s, spd[k], d[spd[k]]))
			FALSE	# remove it
		} }, TRUE)
	spd[idx]
}


#' General array transposition
#'
#' Restore garray attributes that discarded by aperm.default().
#'	Cannot permute between subdimension (means to promote subdimension of
#'	equal length
#'	into regular dim and reduce dim into subdimension), sorry.
#' @param a  A generalized array to be transposed.
#' @param perm  Desired margins after permutation, integer or character.
#' @param ...  Useless, potential arguments inherited from the S3 generic.
aperm.garray <- function(a, perm=NULL, ...) {
	if (all(perm==margins(a))||all(perm==seq_along(margins(a)))) return(a)
	Z <- aperm.default(a=a, perm=perm, resize=TRUE)
	class(Z) <- "garray"
	attr(Z, "sdim") <- attr(a, "sdim", exact=TRUE)	# no further validation
	Z
}


#' Combine generalized arrays
#'
#' Combine generalized arrays, similar to the manner cbind and rbind work. 
#'	Put a sequence of generalized arrays and get a single generalized array
#'	of the same or one more margins.
#'
#' Combine sdim correctly.
#'	Saving or dropping of subdimensions follow a few rules: subdimensions of
#'	the margin with bound along are dropped; of the other margins are save
#'	unless the names of subdimensions are the same; subdimensions of the
#'	same names are dropped except the first one.
#' @param ...  arrays, or a list of several arrays.  Margins of
#'	these arrays should be the same.
#' @param along  The dimension along which to bind the arrays. 
#'	The arrays may have different lengths along that dimension,
#'	and are bind along it, with addition subdimension '*.bind.from'
#'	indicating the composition
#'	of the `along` dimension.  Some arrays may not
#'	have that margin, then the dimension of these arrays expand to 1.
#'	If `along` is a totally new margin, it is created.
#'	In such case, all arrays should have the same dimension.
#' @param margins  Resulting margins.  If includes a totally new margin (not
#'	used by any arrays in `...`), then
#'	`along` is neglected and the new margin setting `along`.
#'	If no new name and `length(along)==0L`, the last `margins` become along.
#'	If `0L==length(margins)`, `along` will become last dimensions of output.
#' @examples
#'	a <- garray(1:24, c(4,6),
#'		dimnames=list(X=1:4, Y=letters[1:6]),
#'	 	sdim=list(XX=c(x1=3,x2=1), YY=c(y1=1,y2=2)))
#'	b <- garray(1:6/10,6,dimnames=list(Y=letters[1:6]))
#'	ab <- abind(a=a, b=b, along="X")
#'	#abind(a, b, margins=c("X","Y"))	# Error
#'	ab2 <- abind(a=a, b=b, margins=c("X","Y"), along="X")
#'	aa <- abind(a=a, a=a, along="Z")
#'	ab3 <- abind(a, b, along="X")
abind <- function(..., margins=character(), along=character()) {
	dots <- list(...)
	if (0L==length(dots)) return(NULL)
		else if (1L==length(dots) && is.list(dots[[1]]))
		dots <- dots[[1]]
	if (!all(vapply(dots, is.garray, TRUE))) stop("need one list of arrays")
	if (0L==length(margins))
		margins <- unique(c(do.call("c", lapply(dots, margins)), along))
	new <- !margins%in%margins(dots[[1]])
	stopifnot(2>sum(new))
	if (0L==length(along)) along <-
		if (0L==sum(new))  margins[length(margins)]
		else if (1L==sum(new)) margins[new]
		else stop("too many new margins")
	n <- c(margins[margins!=along], along)
	d <- dim(dots[[1]])
	d[along] <- 0
	d <- d[n]
	dn <- dimnames(dots[[1]])
	dn[[along]] <- character()
	dn <- dn[n]
	sd <- list()
	newsd <- integer()
	nam <- make.names(names(dots), unique=TRUE)
	if (0L==length(nam)) nam <- make.unique(rep(along, length(dots)))
	for (i in seq_along(dots)) {
		new <- !n%in%margins(dots[[i]])
		di <- dim(dots[[i]])
		stopifnot(2>sum(new))
		if (1L<sum(new)) {
			stop(sprintf("too many new margins for dots[[%d]]", i))
		} else if (1L==sum(new)) {
			di[n[new]] <- 1
			dim(dots[[i]]) <- di
		}	# append with one more margin (length 1)
		dots[[i]] <- aperm(dots[[i]], n)
		if (any((dim(dots[[i]])!=dim(dots[[1]]))[-length(n)]))
			stop("too many dimensions not match")
		d[along] <- d[along]+dim(dots[[i]])[along]
		newsd[nam[i]] <- d[along]
		dn[[along]] <- c(dn[[along]], paste(nam[i],
			dimnames(dots[[i]])[[along]], sep="."))
		sdi <- sdim(dots[[i]])
		for (k in names(sdi)) {
			if (startsWith(k, along)) next
			#warning(sprintf("sdim named `%s` is overridden", k))
			if (is.null(sd[[k]])) sd[[k]] <- sdi[[k]]
			else if (!identical(sd[[k]], sdi[[k]])) warning(sprintf(
				"discard divergence sdim %s for %d", k, i))
		}
	}	# used to be based on lapply(), but `<<-` is tricky.
	sd[[paste0(along, ".bind.from")]] <- diff(c(0, newsd))
	aperm(garray(do.call("c", dots), d, dn, margins=n, sdim=sd), margins)
}


# Simplified list to vector of some type (usually atomic) if possible
# @param value1  A prototype of the return.
# @return  An list (if impossible) or an simple array.
.simplify2array <- function(x, value1=x[[1]]) {
	if (!is.list(x)) stop("non-list need not simplifying")
	l <- unique.default(lengths(x))
	if (is.recursive(value1)) return(x)
	if (1!=length(l)||l!=length(value1)||is.null(value1)) {
		#any(vapply(x, typeof, "")!=typeof(value1))	# time consume
		warning("impossible to simplify the result to array")
		return(x)
	}
	if (is.array(value1)) {
		d1 <- dim(value1)
		dn1 <- dimnames(value1)
		n1 <- names(dn1)
		if (0L==length(n1)||anyNA(n1)||any(""==n1))
			warning("elements without graceful names")
	} else if (1L<l) {
		d1 <- length(value1) 
		dn1 <- list(VALUE1=names(value1))
	} else {
		d1 <- integer()
		dn1 <- NULL
	}	# if 0L==length(value1), dim(Z) will have 0 and 0==length(Z)
	Z <- do.call("c", x)
	dim(Z) <- c(d1, length(x))
	dimnames(Z) <- c(dn1, list(NULL))
	Z
}

# @param length.out  The desired dimensions of the output. 
#	Note that `length(length.out)==length(dim(x))` is mandatory.
#	There are softwares that assume margins new to x to be 1 (like MATLAB
#	and numpy for Python), and I once consider implement the feature,
#	but I finally realize the feature is bug-prone and users do not need it.
#	In circumstance the feature is useful (`amap` and `[<-`),
#	I hard code it there.
# @param sdim  Provide subdimension for help matching dimensions of x and length.out.
#	Usually `x` does not has such information.
#	(x has dimensions smaller than length.out.  In function like `amap`,
#	this information is carried by arrays with larger dimensions).
.rep.garray <- function(x, length.out, sdim=NULL) {
	#tail=utils::tail(which(1<dim(x)), 1)
	# Dimension after that will not be repeated, mainly
	#	because .mapply() will do repeating automatically and quickly. 
	#	Use NULL to disable detecting of tail.
	d <- dim(x)
	n <- names(d)
	if (setequal(names(length.out), n)) {
		length.out <- length.out[n]
	} else {
		stopifnot(is.null(names(length.out)))
		names(length.out) <- n
	}
	stopifnot(length(length.out)==length(d), length.out>=d, 0L<d)
	if (all(length.out==d)) return(x)
	spd <- if (is.null(sdim)) character(0)
		else .validate_sdim(length.out, sdim)
	if (0==length(spd)||all(d==1L|d==length.out)) {
		head <- which(d==length.out)
		perm <- c(head, seq_along(length.out)[-head])
		return(aperm(array(x, length.out[perm]), order(perm)))
	}	# sweep() does it in this manner, seems faster than .subset().
	idx <- lapply(seq_along(d), function(i) {
		if (d[i]==length.out[i]) NULL
		else if (1L==d[i]) rep.int(1L, length.out[i])
		else if (any(ii <- spd==n[i])) {
			j <- names(spd)[ii]
			jj <- which(length.out[i]==vapply(sdim[j], sum, 0L)&
				d[i]==vapply(sdim[j], length, 0L))
			if (0L==length(jj)) stop("subdim match none")
			if (1<length(jj)) {
				warning("subdim match many")
				jj <- jj[1]
			}
			reptimes <- sdim[j][[jj]]
			rep.int(seq_along(reptimes), reptimes)
		}
		else stop("dimension not match")
	})
	idx[d==length.out] <- list(quote(quote(expr=)))
	#idx[vapply(idx, is.null, TRUE)] <- list(quote(quote(expr=)))
	do.call(".subset", c(list(x), idx, drop=FALSE))
}	# mainly for internal usage, thus return only data, no dimnames.


#' Parallel summary, inspired by pmax() and pmin().
#'
#' Functions of Summary group are all, any, max, min, prod, range, and sum,
#'	which reduce a vector into a scalar (except range), thus the name
#'	of psummary().  Of course, other FUN can be passed-in,
#'	but functions like range() that returns a non-scalar vector result
#'	in unpredictable return.  For arguments of different
#'	size, pmin() and pmax() make fractional recycling and issue warning,
#'	but psummary() error since as.data.frame() do not fractionally recycle.
#' @param ...  Usually in the form `psummary(x, y, z, FUN=sum, na.rm=TRUE)`,
#'	alternaitvely `psummary(list(x, y, z), FUN=sum, na.rm=TRUE)`.
psummary <- function(...) UseMethod("psummary")

#' @rdname psummary
psummary.garray <- function(...) {
	dots <- list(...)
	ifun <- which(vapply(dots, is.function, TRUE))
	if (0L==length(ifun)) {
		args <- list()
		FUN <- sum
	} else {
		stopifnot(1<ifun[1])
		args <- dots[-(1:ifun[1])]
		FUN <- dots[[ifun[1]]]
		dots <- dots[1:(ifun[1]-1)]
	}
	if (1L==length(dots) && is.list(dots[[1]])) { dots <- dots[[1]] }
	stopifnot(vapply(dots, is.atomic, TRUE))
	do.call("amap", c(FUN, dots, args))
}

#' @rdname psummary
psummary.default <- function(...) {
	dots <- list(...)
	ifun <- which(vapply(dots, is.function, TRUE))
	if (0L==length(ifun)) {
		args <- list()
		FUN <- sum
	} else {
		stopifnot(1<ifun[1])
		args <- dots[-(1:ifun[1])]
		FUN <- dots[[ifun[1]]]
		dots <- dots[1:(ifun[1]-1)]
	}
	if (1L==length(dots) && is.list(dots[[1]])) { dots <- dots[[1]] }
	stopifnot(vapply(dots, is.atomic, TRUE))
	m <- as.matrix(as.data.frame(lapply(dots, as.vector)))
	# as.data.frame() do non-fractionally recycling
	s <- do.call("apply", c(list(m, 1, FUN), args))
	l <- vapply(dots, length, 0L)
	attributes(s) <- if (1L==which.max(l)) { attributes(dots[[1]]) }
	s
}


#' Read a complex table and return array in basic storagemode. 
#'
#' A complex table has
#'	several row and colume headers, some of which indicate the hierarchy
#'	of the dimensions (thus the returned array may have more dimensions than
#'	row and column), some of which are real row.names and
#'	col.names that will be turn into dimnames, and some of which are
#'	additional attributes.  Cells of non-header are all in a same format
#'	(like double).  The original header are preserved as attributes.
#'	The dimnames strictly save the layout of the matrix, thus row.names
#'	and col.names should be carefully chosen for matching the actual
#'	dimension.  Sometime aperm() is needed after this function.
#'	Sparse table, where dimnames is not complete (unbalanced) and needs
#'	non-fractionally recycling, is not supported.
#' @param file  The name of the file to be read in, see ('file' in ?read.table).
#' @param header  hierarchy structure of the header, a list of length 2,
#'	assigning the index of row and col header,
#'	affecting parsing of the input table;
#' @param row.names,col.names  a list, whose elements can be
#'	a character vector assigning the name of one dimension directly, or
#'	a integer scalar which is the index ({icol,irow}) of {row,col}-header
#'	that will be extracted; since the row and the col headers can have
#'	hierarchy in the input table and the hierarchy will be organized into
#'	addisional dimensions, rownames indicate the names of dimensions that
#'	are from the row of the input table, while colnames, the col; in the
#'	input table, the header can be in two pattern: AAA and AOO;
#'	in the AAA pattern, the names are repeated for the cells that are in
#'	the same index in high dimension, while in the AOO pattern, the names
#'	appear once at the first and the other cells that are in the same index
#'	in high dimension are left blank; in the
#'	output array, {row,col}.names are not necessary `dimnames[[1]]` and
#'	`dimnames[[2]]`;
#'	if all elements of the list are a integer scalar, then the list can
#'	also be coded as a integer vector (since they are the same for lapply);
#' @param ...  Further arguments to be passed to ‘read.table’.
#' @param storagemode  The storagemode of return matrix, usually 'double'.
# @examples
# express.prof <- read.ctable(
#	file.path("http://deleteome.holstegelab.nl/data/downloads",
#		"deleteome_responsive_mutants_ex_wt_var_controls.txt"),
#	header=list(1:2, 1:3), sep="\t", quote="\"",
#	row.names=list(1), col.names=list(2, 1))
read.ctable <- function(file, header, row.names, col.names, ..., storagemode="double") {
	dat <- utils::read.table(file, header=FALSE, dec=".",
		colClasses="character", ...)
	arr      <- unname(as.matrix(dat[-header[[1]],-header[[2]]]))
	storage.mode(arr) <- storagemode
	headrow  <- unname(as.matrix(dat[ header[[1]],-header[[2]]]))
	headcol  <- unname(as.matrix(dat[-header[[1]], header[[2]]]))
	headhead <- unname(as.matrix(dat[ header[[1]], header[[2]]]))
	dimnames <- c( lapply(row.names, function(i) {
		if (is.numeric(i) && 1L==length(i)) {
			i <- unique(headcol[,i])
			i <- i[""!=i]	# neglect empty rownames
		}
		return(i) }), lapply(col.names, function(i) {
		if (is.numeric(i) && 1L==length(i)) {
			i <- unique(headrow[i,])
			i <- i[""!=i]
		}
		return(i) }) )
	# Now arr is 2-dim, dimnames does not match dim(arr), but after runing
	#	the following line, dim(arr) is updated and dimnames(arr) gets
	#	discarded.  If run `dimnames(arr) <- dimnames` first, error.
	dim(arr) <- vapply(dimnames, length, 0)
	# Better than `arr <- array()` since it check for dimension match.
	dimnames(arr) <- dimnames
	attr(arr, "head") <- list(head=headhead, row=headrow, col=headcol)
	return(arr)
}


#' Mapping matching dimension of arrays with a function
#'
#' Generalized and smart mapply()/Map()/outer()/sweep() for data mapping.
#'	Matching is checked automatically.
#' @param FUN  Known vectorized function is recognized if passed in
#'	as character.  Other functions will be vectorized by `.mapply()`.
#' @param ...  Arrays with margins (names of dimnames) and maybe with sdim. 
#'	Orders of their margins can be different, but the extent along a
#'	margin is matched. Unmatched margins are broadcasting like outer(). 
#'	Scalar (length 1 vector) do not contribute margins and
#'	not broadcast here (they will broadcast by `.mapply()` later).
#' @param MoreArgs  a list of other arguments to 'FUN', no matching of margins.
#' @param SIMPLIFY  logical, attempt to reduce the result to exclude recursive
#'	structure (no list hierachy but plain generalized array).
#' @param VECTORIZED  Whether FUN is vectorized will affect the behaviours.
#'	Some combination of FUN and VECTORIZED is not simply slowing down,
#'	but produces meaningless results or even stop (e.g., cumsum).
#'	TRUE - call `FUN` once with arrays being reorganize on dimensions;
#'	FALSE - call `FUN` many times (via `.mapply`), with each cell of arrays.
#' @param e1,e2  Generalized arrays, being operands.
#' @return The dimensions is deduced from inputs.
#' @examples
#' a <- garray(1:24, c(4,6,2), dimnames=list(X=1:4, Y=letters[1:6], Z=NULL),
#' 	sdim=list(XX=c(x1=3,x2=1), YY=c(y1=1,y2=2)))
#' b <- garray(1:6/10,6,dimnames=list(Y=letters[1:6]))
#' c <- garray(1:4/100,c(X=4))
#' d <- garray(1:4/1000,c(Y=4))
#' e <- garray(1:2/1000,c(X=2))
#' f <- garray(0,c(Z=2))
#' g <- garray(0,c(ZZ=2))
#' m1 <- amap(psummary,c,a,b,      0.0001, VECTORIZED=FALSE)
#' m2 <- amap(sum,     c,a,b,      0.0001, VECTORIZED=FALSE)
#' m3 <-               c+a+b+      0.0001
#' n1 <- amap(sum,     c,a,b,d,    0.0001, VECTORIZED=FALSE)
#' n2 <- amap(sum,     c,a,b,e,    0.0001, VECTORIZED=FALSE)
#' n3 <- amap(sum,     c,a,b,e,f,  0.0001, VECTORIZED=FALSE)
#' p1 <- amap(sum,     c,a,b,e,f,g,0.0001, VECTORIZED=FALSE)
#' q1 <- amap(sum, c,a,b,e,f,g,0.0001, SIMPLIFY=FALSE, VECTORIZED=FALSE)
#' q2 <- amap(c,   c,a,b,e,f,g,0.0001, SIMPLIFY=FALSE, VECTORIZED=FALSE)
#' q3 <- amap(list,c,a,b,e,f,g,0.0001, SIMPLIFY=FALSE, VECTORIZED=FALSE)
#' m1==m2
#' m2==m3
#' m2==aperm(m3, 3:1)
amap<- function(FUN, ..., MoreArgs=NULL, SIMPLIFY=TRUE, VECTORIZED=NA) {
	dots <- lapply(list(...), function(X) if (is.scalar(X)) X
		else as.garray(X))
	if (is.na(VECTORIZED)) {
		VECTORIZED <- is.character(FUN)&&FUN%in%c(
			"abs", "sign", "sqrt","floor", "ceiling", "trunc",
			"round", "signif",
			"exp", "log", "expm1", "log1p",
			"cos", "sin", "tan","cospi", "sinpi", "tanpi",
			"acos", "asin", "atan",
			"cosh", "sinh", "tanh","acosh", "asinh", "atanh",
			"lgamma", "gamma", "digamma", "trigamma",
			"cumsum", "cumprod", "cummax", "cummin",
			"+", "-", "*", "/", "^", "%%",
			"&", "|", "!",
			"==", "!=", "<", "<=", ">=", ">",
			"Arg", "Conj", "Im", "Mod", "Re",
			"pmax", "pmin",
			"logsumexp")
		if (isTRUE(!VECTORIZED))
			warning("non-vectorized function is slow")
	}
	# Seldom run, because Ops is always called with VECTORIZED=TRUE when
	#	dispatching; and Math need not amap() at all, since they accept
	#	1 argument and preserve class=garray and sdim.
	n <- unique(do.call("c", lapply(dots, function(X)
		if (is.scalar(X)) character() else margins(X))))
	l <- length(n)
	ddat <- matrix(vapply(seq_along(dots), function(i) {
		if (is.scalar(dots[[i]])) return(rep.int(1L, l))
		d <- dim(dots[[i]])
		if (0L<length(newname <- n[!n%in%names(d)])) {
			dd <- rep.int(1L, length(newname))
			names(dd) <- newname
			d <- c(d, dd)
			dim(dots[[i]]) <<- d
		}
		d[n]
	}, integer(l)), l, length(dots), dimnames=list(n, names(dots)))
	#TODO: May be feature of moving tail is good, need reviving.
	# @param SAFE 
	# FALSE - use unsafe but fast implementation, all inputs are repeated as
	# few as possible, assuming that `FUN` recycles short vectors
	# (like basic operators "+"). 
	# TRUE - slower, all inputs are organized properly to have the same
	# dimension and passed to FUN.  For VECTORIZED=FALSE, SAFE is neglected
	# because `.mapply` recycle short vectors.
	#if (1L<l) ddat <-	# margin len 1 moved to tail
	#	ddat[sort(rowSums(1==ddat), index.return=TRUE)$ix,,drop=FALSE]
	#n <- rownames(ddat)
	d <- vapply(n, function(k) max(ddat[k,], na.rm=TRUE), 0L,USE.NAMES=TRUE)
	dn <- lapply(n, function(k) {
		na <- character(0)
		for (i in seq_along(dots)) {
			nn <- dimnames(dots[[i]])[[k]]
			if (d[k]==length(nn)&&length(nn)>length(na)) na <- nn
			if (length(na)>1) break
		}
		na
	})
	names(dn) <- n
	sd <- do.call("c", lapply(dots, function(X)
		if (is.garray(X)) sdim(X) else NULL))
	sd <- sd[unique(names(sd))]
	dots <- lapply(dots, function(X) as.vector(if (is.scalar(X)) X
		else .rep.garray(aperm(X, perm=n), d, sd)))
	# aperm(X) consume less memory than .rep.garray(X)
	names(d) <- NULL
	FUN <- match.fun(FUN)
	# avoid "object 'FUN' of mode 'function' was not found",
	#	should call once here, not depend on match.fun() within
	#	mapply(), apply() or tapply().
	if (VECTORIZED) {
		X <- do.call(FUN, c(dots, MoreArgs))
		dim(X) <- d	#X <- aperm(., n)
		dimnames(X) <- dn
		if (isTRUE(!SIMPLIFY))
			warning("always return an array for vectorized fun")
	} else {
		X <- .MAPPLY(FUN, dots, MoreArgs)
		if (isTRUE(SIMPLIFY)) {
			X <- .simplify2array(X)
			last <- length(dim(X))
			dnX <- dimnames(X)
			dim(X) <- c(dim(X)[-last], d)
			dimnames(X) <- c(dnX[-last], dn)
		} else {
			dim(X) <- d
			dimnames(X) <- dn
		}
	}
	class(X) <- "garray"	# garray() is too expensive
	sdim(X) <- sd
	X
}

#' @rdname amap
Ops.garray <- function(e1, e2) {	# Not include %*%
	FUN <- get(.Generic, envir=parent.frame(), mode="function")
	if (1L==nargs()) amap(FUN, e1, VECTORIZED=TRUE)
		else amap(FUN, e1, e2, VECTORIZED=TRUE)
}


#' Generalized and smart apply()/Reduce()/tapply() for data folding.
#'
#' @param FUN  Usually a summary function (like `all` and `sum`).
#' @param X  A garray, with margins (names of dimnames) and maybe with sdim.
#' @param MARGIN  Some margins of X and names of sdim.
#'	MARGIN=character() means to reduce all margins (over no margin).
#'	In such case, areduce() is not needed actually.
#' @param ...  Further arguments to 'FUN', no matching of margins.
#' @param SIMPLIFY  TRUE - simplifies the result list to vector of atomic
#'	if possible, and triggers warning and not simplifies if impossible;
#'	FALSE - not simplifies for non speed-up function, and issues warning
#'	(and have to simplify) for speed-up function; NA - simplifies but no
#'	warning if impossible.
#' @param SAFE  TRUE - use safe but slow implementation, in which data splited
#'	from the array are reorganized into small arrays (as are being subset
#'	by `[]`) and passed to FUN (other attributes are dropped, however);
#'	FALSE - faster, data are passed to FUN as dimension-less vectors.
#' @return A matrix (similar to return of apply() or tapply()), with the
#'	trailing margins the same as MARGIN, while the leading margins
#'	depend on FUN and SIMPLIFY.  If FUN returns a scalar or SIMPLIFY=FALSE,
#'	then no leading margins.  In MARGIN, subdimension is replaced with superdims.
#' @examples
#'	a <- garray(1:24, c(4,6),
#'		dimnames=list(X=LETTERS[1:4], Y=letters[1:6]),
#'		sdim=list(XX=c(x1=3,x2=1), YY=c(y1=1,y2=2)))
#'	x1 <- areduce("sum", a, c("X"))
#'	x2 <- areduce(`sum`, a, c("X"))
#'	stopifnot(garray(c(66,72,78,84), margins="X")==x1, x2==x1)
#'	yy1 <- areduce("sum", a, c("YY"))
#'	yy2 <- areduce(`sum`, a, c("YY"))
#'	stopifnot(garray(c(10,68,58,164), margins="Y")==yy1, yy2==yy1)
#'	xyy1 <- areduce("sum", a, c("X","YY"))
#'	xyy2 <- areduce(`sum`, a, c("X","YY"))
#'	stopifnot(xyy1==xyy2)
#'	xxyy1 <- areduce("sum", a, c("XX","YY"))
#'	xxyy2 <- areduce(`sum`, a, c("XX","YY"))
#'	stopifnot(garray(c(6,4,48,20,42,16,120,44), c(X=2,Y=4))==xxyy1)
#'	stopifnot(xxyy2==xxyy1)
#'	b <- garray(1:24, c(3,4,2),
#'		dimnames=list(X=LETTERS[1:3], Y=letters[1:4], Z=NULL),
#'		sdim=list(XX=c(x1=2,x2=1), YY=c(y1=1,y2=1)))
#'	xxyyz1 <- areduce("sum", b, c("XX","YY","Z"))
#'	xxyyz2 <- areduce(`sum`, b, c("XX","YY","Z"))
#'	stopifnot(xxyyz1==xxyyz2)
#'	xyz1 <- areduce(identity, b, c("XX","YY","Z"), SIMPLIFY=FALSE)
#'	xyz2 <- areduce("c",      b, c("XX","YY","Z"), SIMPLIFY=FALSE)
#'	xy1 <- areduce(identity, b, c("XX","YY"), SIMPLIFY=FALSE, SAFE=TRUE)
#'	stopifnot(identical(dimnames(xy1[2,3][[1]]), list(X="C",Y="c",Z=NULL)))
#'	# garray of lists, cannot use `xyz1==xyz2` etc to compare.
areduce <- function(FUN, X, MARGIN, ..., SIMPLIFY=TRUE, SAFE=FALSE) {
	stopifnot(is.garray(X), is.character(MARGIN))
	n <- margins(X)
	d <- dim(X)
	dn <- dimnames(X)
	sd <- sdim(X)
	na.rm <- ifelse(is.null(list(...)$na.rm), FALSE, list(...)$na.rm)
	if (all(MARGIN%in%n)) {
		n1 <- n[!n%in%MARGIN]
		X <- aperm(X, perm=c(n1, MARGIN))
		d1 <- prod(d[n1])
		d2 <- prod(d[MARGIN])	# prod(NULL)==1
		if (is.character(FUN)&&FUN%in%c("sum","mean")) {
			FUN <- c(sum=`.colSums`, mean=`.colMeans`)[[FUN]]
			# col{Sum,Mean}s are 3 times faster than row{Sum,Mean}s
			Z <- if (is.complex(X)) 
				FUN(Re(X), d1, d2, na.rm) +
				FUN(Im(X), d1, d2, na.rm)*1i
				else	# need not set dim(X) <- c(nrow, ncol)
				FUN(   X,  d1, d2, na.rm)
			if (isTRUE(!SIMPLIFY)||isTRUE(SAFE)) warning(
				"always return an array for sum and mean")
			if (0L<length(MARGIN)) {
				dim(Z) <- d[MARGIN]
				dimnames(Z) <- dn[MARGIN]
			}
		} else {
			class(X) <- NULL
			dim(X) <- c(d1, d2)
			FUN <- match.fun(FUN)
			if (SAFE) {
				Z <- .LAPPLY(seq_len(d2), function(i)
					forceAndCall(1, FUN, garray(X[,i],
						d[n1], dn[n1], sdim=sd), ...))
			} else {
				Z <- .LAPPLY(seq_len(d2), function(i)
					forceAndCall(1, FUN, X[,i], ...))
			}
			if (isTRUE(SIMPLIFY)) Z <- .simplify2array(Z)
			last <- length(dim(Z))
			dnZ <- dimnames(Z)
			if (0L<length(MARGIN)) {
				d <- c(dim(Z)[-last], d[MARGIN])
				dn <- c(dnZ[-last], dn[MARGIN])
			} else {
				d <- dim(Z)[-last]
				dn <- dnZ[-last]
			}
			if (0L==length(d)) {
				dim(Z) <- NULL
			} else {
				dim(Z) <- d
				dimnames(Z) <- dn
			}
		}
	} else {
		spd <- .superdim(X)
		if (!all(MARGIN%in%c(names(spd), n))) stop("foreign MARGIN")
		if (any(MARGIN%in%spd[MARGIN]))
			stop("some MARGINs are subdim residing in MARGINs")
		sdMARGIN <- MARGIN%in%names(spd)
		n0 <- spd[MARGIN[sdMARGIN]]	# along sdim (partial reduced)
		n2 <- MARGIN[!sdMARGIN]		# along dim
		n1 <- n[!n%in%c(n0, n2)]	# reduced dim
		X <- aperm(X, perm=c(n0,n1,n2))
		d0 <- prod(d[n0])
		d1 <- prod(d[n1])
		d2 <- prod(d[n2])
		class(X) <- NULL
		dim(X) <- c(d0*d1, d2)
		ngroup <- vector("list", sum(sdMARGIN))
		names(ngroup) <- spd[MARGIN[sdMARGIN]]
		group <- rep.int(1L, d0)
		extent <- 1L
		din <- cumprod(d[n0])	# inner dimension
		for (k in MARGIN[sdMARGIN]) {
			nn <- spd[k]
			reptimes <- sd[[k]]
			#storage.mode(reptimes) <- "integer"
			# Guarantee integer, or rowsum() will return all 0.
			na <- names(reptimes)
			ngroup[[nn]] <- if (is.null(na)) #||anyDuplicated(na)
				seq_along(reptimes) else na
			group <- group+extent*(rep.int(rep.int(seq_along(
				reptimes), reptimes*(din[nn]/d[nn])),
				d0/din[nn])-1L)
			extent <- extent*length(reptimes)
		}
		sd0 <- sd[MARGIN[sdMARGIN]]
		sd[MARGIN[sdMARGIN]] <- NULL	# subdimension going to be reduced
		if (extent>.Machine$integer.max) stop(sprintf(
			"total number of levels > %d", .Machine$integer.max))
		MARGIN[sdMARGIN] <- n0
		if (identical("sum", FUN)) {	# Speed-up (10 times faster)
			#ugroup <- seq_len(extent)
			#Z <- .Internal(rowsum_matrix(X,
			#	rep.int(group, prod(d[n1])), ugroup,
			#	na.rm, as.character(ugroup)))
			Z <- rowsum.default(X,
				rep.int(group, prod(d[n1])), reorder=FALSE,
				na.rm=na.rm)
			# group should be integer; if double, result in 0.
			if (isTRUE(!SIMPLIFY))
				warning("always return an array for sum")
			dim(Z) <- c(lengths(ngroup), d[n2])
			dimnames(Z) <- c(ngroup, dn[n2])
			Z <- aperm(Z, MARGIN)
		} else {
			# Implementations UNLIST(Z) - Faster
			if (SAFE) {
				nn <- spd[names(sd0)]
				names(sd0) <- nn
				lsd0 <- lengths(sd0)
				extent1 <- cumprod(lsd0)
				extent2 <- extent1/lsd0
				subdn <- function(dn, reptimes, i) {
					offset <- cumsum(reptimes)-reptimes
					dn[seq_len(reptimes[i])+offset[i]]
				}
				f <- function(i, XX, ...) {
					idx <- (i-1L)%%extent1%/%extent2+1L
					n[n2] <- NULL
					FUN(aperm(garray(XX[[i]], c(.mapply(
						.subset, list(sd0, idx), NULL),
						d[n1]), c(.mapply(subdn,
						list(dn[nn], sd0, idx), 
						NULL), dn[n1]),
						c(nn, n1), sdim=sd), n), ...)
				}
				Z <- .LAPPLY(seq_len(d2), function(i) {
					XX <- split(X[,i], group)
					.LAPPLY(seq_along(XX), f, XX, ...)
				})
			} else {
				Z <- .LAPPLY(seq_len(d2), function(i)
					.LAPPLY(split(X[,i], group), FUN, ...))
			}
			Z <- do.call("c", Z)
			# Implementations REP(GROUP) - Slower
			#group <- rep.int(group, d2)+
			#	extent*rep(seq_len(d2), each=d0*d1)
			#Z <- lapply(split(X, group), FUN, ...)
			if (isTRUE(SIMPLIFY)) Z <- .simplify2array(Z)
			last <- length(dim(Z))
			dnZ <- dimnames(Z)
			dim(Z) <- c(dim(Z)[-last], lengths(ngroup), d[n2])
			dimnames(Z) <- c(dnZ[-last], ngroup, dn[n2])
			Z <- aperm(Z, c(names(dnZ)[-last], MARGIN))
		}
	}
	if (0L<length(MARGIN)) {
		class(Z) <- "garray"	# garray() is too expensive
		sdim(Z, warn=FALSE) <- sd
	}
	Z
}


#' Generalized array's sweep() for data cleaning.
#'
#' Return a generalized array, by wiping out a summary statistic.
#'
#' @param X  A generalized array.
#' @param FUN  The wiping function.
#' @param STATS  Numeric array or function.
#' @param MARGIN  NULL - STATS is an array; character - STATS is a function,
#'	and by X being reduced along MARGIN, X is wiped.  Length 0 character
#'	vector means reducing along no margin, resulting in a scalar (in
#'	this case, for example, `areduce(sum, X)` is the same as `sum(X)`.
#' @param MoreArgs,SIMPLIFY,VECTORIZED  Argument used by 'amap()'.
#' @param ...  Argument used by 'areduce()'.
#' @examples
#' a <- garray(1:24, c(4,6), list(X=LETTERS[1:4], Y=letters[1:6]),
#' 	sdim=list(XX=c(x1=3,x2=1), YY=c(y1=1,y2=2)))
#' m1 <- awipe(a, MARGIN="XX")
#' m2 <- awipe(a, `-`, mean, "XX")
awipe <- function(X, FUN="-", STATS="mean", MARGIN=NULL,
		MoreArgs=NULL, ..., SIMPLIFY=TRUE, VECTORIZED=NA) {
	if (!is.null(MARGIN)) STATS <- areduce(STATS, X, MARGIN, ...)
	amap(FUN, X, STATS, MoreArgs=MoreArgs, SIMPLIFY=SIMPLIFY,
		VECTORIZED=VECTORIZED)
}


#' Generalized array multiplication. 
#'
#' Default to Einstein summation convention, without explicitly subscripts.
#'
#' Margins shared by X and Y are parallelly mapped by FUN,
#'	and then reduced by SUM (inner product like `%*%`); 
#'	margins in BY and shared by X and Y are simply mapped by FUN
#'	but excluded from reducing (parallel product like `*`);
#'	other margins are extended repeatly (outer product like `%o%`).
#'	Shared margins not to be mapped have to be renamed (like outer product).
#'	For special FUN and SUM, fast algorithms are implemented.
#' @param X,Y  Generalized arrays that can be multiplied.
#' @param FUN  The 'multiply' function.
#' @param SUM  The 'reduce' function.
#' @param BY  margins excluded from summary by SUM.
#' @param MoreArgs,SIMPLIFY,VECTORIZED  Argument used by 'amap()'.
#' @param ...  Argument used by 'areduce()'.
#' @examples
#' a <- garray(1:24, c(4,6), list(X=LETTERS[1:4], Y=letters[1:6]),
#' 	sdim=list(XX=c(x1=3,x2=1), YY=c(y1=1,y2=2)))
#' b <- garray(1:20, c(Z=5, X=4))
#' c <- garray(1:120, c(X=4,Y=6,Z=5))
#' m1 <- amult(a, b)
#' m2 <- amult(a, b, `*`, sum)
#' m3 <- amult(b, a)
#' all.equal(m1, m2)
#' all.equal(m1, m3)
#' all.equal(m1, t(m3))
#' n1 <- amult(a, c, `*`, sum)
#' n2 <- a%X%c
#' all.equal(n1, n2)
#' amult(garray(1:5,margins="I"), garray(1:8,margins="J"))
#' amult(garray(1:8,c(I=2,J=4)), garray(1:9,c(K=3,L=3)))
##@keywords garray, `%*%`
amult <- function(X, Y, FUN="*", SUM="sum", BY=NULL,
		MoreArgs=NULL, ..., SIMPLIFY=TRUE, VECTORIZED=TRUE) {
	nX <- margins(X)
	nY <- margins(Y)
	ni <- intersect(nX, nY)
	nx <- setdiff(nX, ni)
	ny <- setdiff(nY, ni)
	no <- union(nx, ny)
	dX <- dim(X)
	dY <- dim(Y)
	dnX <- dimnames(X)
	dnY <- dimnames(Y)
	sd <- c(sdim(X), sdim(Y))
	if (identical("*",FUN)&&identical("sum",SUM)&&0==length(BY)
			&&all(dX[ni]==dY[ni])) {
		if (length(ni)==1&&length(nX)==2&&length(nY)==2) {
			FUN <- array(list(crossprod, `%*%`,
				function(X,Y) t(Y%*%X), tcrossprod),
				c(2,2))[nX==ni,nY==ni][[1]]
			Z <- FUN(X, Y)
		} else {
			X <- aperm(X, perm=c(nx, ni))
			Y <- aperm(Y, perm=c(ni, ny))
			if (any(dX[ni]!=dY[ni])) {
				d <- pmax(dX[ni], dY[ni])
				X <- .rep.garray(X, c(dX[nx], d), sd)
				Y <- .rep.garray(Y, c(d, dY[ny]), sd)
			} else {
				d <- dX[ni]
			}
			attr(X, "dim") <- c(prod(dX[nx]), prod(d[ni]))
			attr(Y, "dim") <- c(prod(d[ni]), prod(dY[ny]))
			Z <- X%*%Y
		}
		dZ <- c(dX[nx],dY[ny])
		names(dZ) <- NULL
		dim(Z) <- dZ
		dimnames(Z) <- c(dnX[nx], dnY[ny])
		class(Z) <- "garray"
		sdim(Z, warn=FALSE) <- sd
		return(Z)
		# By definition, margins is important but not order.
		#	Sometimes seems saving commutative property of %*%.
	}	# Speed-up for special cases
	areduce(match.fun(SUM), amap(match.fun(FUN), X, Y, MoreArgs=MoreArgs,
		SIMPLIFY=SIMPLIFY, VECTORIZED=VECTORIZED), union(no, BY), ...)
}

#' @rdname amult
`%X%` <- function(X, Y) amult(X, Y)
# %*% cannot be used since it is S4 generic but not S3 generic.
# Cannot simply do `%X%` <- amult, see r-base-3.5.1/src/library/base/outer.R

#' Function composition operator
#'
#' Composite functions `a` and `b` into `a(b(...))`.
#'
#' @param a  A function that can be called with one argument.
#' @param b  A function that can be called with one or more argument,
#'	and result of `b()` can be passed to `a()`.
#' @return  A new function, whose 
#'	arguments are what `b()` can accept, and whose result is what `a()`
#'	can return.
#' @examples
#'	lse <- log%+%sum%+%exp
#'	lse(1:10)
#'	#logsumexp(1:10)	# actual logsumexp() is more sophistic
#'	log(sum(exp(1:10)))
#'	sum <- sd
#'	lse(1:10)	# lse() is fixed at definition
#'	log(sum(exp(1:10)))
#'	(log%+%sum%+%exp)(1:10)	# now is (log%+%sd%+%exp)
`%+%` <- function(a, b) {
	force(a)
	force(b)
	function(...) a(b(...))
}
# `+.function` not work since Ops dispatches on oldClass (attr("class"))
#	and not on class ('function' is implicit class).

# Generalized array cross product 
#
# Generalizaion of `%*%`, dist() and cor(), allowing cross along dimensions
#	of the same margins one by one.
# In general, for 2 matrices to 1 mapping,
#	`%*%` runs on row*column style ([2x4]%*%[4x3]=[2x3]),
#	var/cov/cor/crossprod on column*column style (cor([4x2], [4x3])=[2x3]),
#	dist2()/tcrossprod on row*row style (dist2([2x4], [3x4])=[2x3]).
#across <- function(X, Y, by, byX=by, byY=by, FUN) { }
Any scripts or data that you put into this service are public.
garray documentation built on May 1, 2019, 8:47 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
garray
Generalized Array Arithmetic for Ragged Arrays with Named Margins

R/main.R
In garray: Generalized Array Arithmetic for Ragged Arrays with Named Margins

Try the garray package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

garray Generalized Array Arithmetic for Ragged Arrays with Named Margins

R/main.R In garray: Generalized Array Arithmetic for Ragged Arrays with Named Margins

Try the garray package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

garray
Generalized Array Arithmetic for Ragged Arrays with Named Margins

R/main.R
In garray: Generalized Array Arithmetic for Ragged Arrays with Named Margins