Nothing
# tabMatrix class: generalization of indMatrix from Matrix package
# it has at most one non-zero value in each row
setClass("tabMatrix",
slots = c(
perm = "integer", # as in indMatrix, but 0-based
reduced = "logical", # whether columns are removed (contrasts); a -1 in perm represents such columns
# if reduced there can be all-0 rows
num = "logical", # whether the matrix is numeric or 0/1
# TODO generalize x to matrix/dgCMatrix
x = "numeric", # the numeric values in case num=TRUE
xlab = "character" # name associated with numeric x component (scalar, later to be generalized to colnames)
),
contains = c("sparseMatrix", "generalMatrix"),
validity = function(object) {
n <- object@Dim[1L]
d <- object@Dim[2L]
perm <- object@perm
num <- object@num
reduced <- object@reduced
xlab <- object@xlab
if (length(perm) != n) return(paste("length of 'perm' slot must be", n))
if (num && length(object@x) != n) return(paste("length of 'x' slot must be", n))
if (length(xlab) > 1L) return("length of 'xlab' must not be greater than 1")
if (n > 0L && (any(perm > d - 1L) || (!reduced && any(perm < 0L)))) return("'perm' slot is not a valid index")
# NB 'num' can represent zero rows, so it is more general than 'reduced'
# so we disallow the case that both are TRUE
# 'reduced' is used for matrices having only 0/1 entries and at least one row without 1
if (num && reduced) return("'num' and 'reduced' cannot both be TRUE")
TRUE
}
)
# general constructor
tabMatrix <- function(Dim, Dimnames=NULL, reduced=FALSE, perm=integer(), num=FALSE, x=numeric(), xlab=NULL) {
out <- Ctab(Dim=Dim, reduced=reduced, perm=perm, num=num, x=x)
if (!is.null(Dimnames)) attr(out, "Dimnames") <- Dimnames
if (!is.null(xlab)) attr(out, "xlab") <- xlab
out
}
# constructor from factor
setAs("factor", "tabMatrix",
function(from) {
levs <- attr(from, "levels")
tabMatrix(
Dim = c(length(from), length(levs)),
Dimnames = list(NULL, levs),
reduced = FALSE,
perm = as.integer(from) - 1L,
num = FALSE,
x = numeric(0L),
xlab = character(0L)
)
}
)
# index matrix has exactly one 1 in each row and otherwise 0s
is_ind_matrix <- function(X) class(X)[1L] == "tabMatrix" && !X@num && !X@reduced
setMethod("colSums", "tabMatrix", function(x, na.rm=FALSE, dims=1, ...)
if (x@num)
fast_aggrC(x@x, x@perm + 1L, x@Dim[2L])
else
tabulate(x@perm + 1L, nbins=x@Dim[2L])
)
setMethod("rowSums", "tabMatrix", function(x, na.rm=FALSE, dims=1, ...) {
if (x@num) return(x@x)
out <- rep.int(1, x@Dim[1L])
if (x@reduced) out[x@perm == -1L] <- 0
out
})
setMethod("isDiagonal", "tabMatrix", function(object) {
d <- object@Dim
if (d[1L] != d[2L]) return(FALSE)
if (object@reduced) {
sub <- which(object@perm != -1L)
identical(object@perm[sub], (0:(d[1L] - 1L))[sub])
} else {
identical(object@perm, 0:(d[1L] - 1L))
}
})
setMethod("diag", "tabMatrix", function(x=1, nrow, ncol) {
d <- x@Dim
if (d[1L] <= d[2L]) {
if (x@num)
out <- ifelse(x@perm == 0:(d[1L] - 1L), x@x, 0)
else
out <- ifelse(x@perm == 0:(d[1L] - 1L), 1, 0)
} else {
if (x@num)
out <- ifelse(x@perm[seq_len(d[2L])] == 0:(d[2L] - 1L), x@x[seq_len(d[2L])], 0)
else
out <- ifelse(x@perm[seq_len(d[2L])] == 0:(d[2L] - 1L), 1, 0)
}
if (x@reduced) out[x@perm == -1L] <- 0
out
})
setMethod("t", "tabMatrix", function(x) {
if (x@reduced) {
sub <- which(x@perm != -1L)
sparseMatrix(i=x@perm[sub], j=(0:(x@Dim[1L] - 1L))[sub], x=rep.int(1, length(sub)), dims=rev(x@Dim), dimnames=rev(x@Dimnames), index1=FALSE, check=FALSE)
} else if (x@num) {
sparseMatrix(i=x@perm, j=0:(x@Dim[1L] - 1L), x=x@x, dims=rev(x@Dim), dimnames=rev(x@Dimnames), index1=FALSE, check=FALSE)
} else {
sparseMatrix(i=x@perm, j=0:(x@Dim[1L] - 1L), x=rep.int(1, x@Dim[1L]), dims=rev(x@Dim), dimnames=rev(x@Dimnames), index1=FALSE, check=FALSE)
}
})
setMethod("coerce", c("tabMatrix", "ddiMatrix"), function(from, to="ddiMatrix", strict=TRUE) {
if (!isDiagonal(from)) stop("not a diagonal tabMatrix")
if (from@reduced) {
x <- rep.int(1, from@Dim[1L])
x[from@perm == -1L] <- 0
out <- Cdiag(x)
} else if (from@num) {
out <- Cdiag(from@x)
} else {
out <- CdiagU(from@Dim[1L])
}
if (!(is.null(from@Dimnames[[1L]]) && is.null(from@Dimnames[[2L]]))) attr(out, "Dimnames") <- from@Dimnames
out
})
setMethod("coerce", c("tabMatrix", "matrix"), function(from, to="matrix", strict=TRUE) {
out <- Ctab2mat(from)
if (!(is.null(from@Dimnames[[1L]]) && is.null(from@Dimnames[[2L]]))) attr(out, "dimnames") <- from@Dimnames
out
})
setMethod("coerce", c("tabMatrix", "CsparseMatrix"), function(from, to="CsparseMatrix", strict=TRUE) {
out <- Ctab2dgC(from)
if (!(is.null(from@Dimnames[[1L]]) && is.null(from@Dimnames[[2L]]))) attr(out, "Dimnames") <- from@Dimnames
out
})
setMethod("coerce", c("ddiMatrix", "tabMatrix"), function(from, to="tabMatrix", strict=TRUE)
tabMatrix(Dim=from@Dim, Dimnames=from@Dimnames,
reduced=FALSE, perm=0:(from@Dim[1L] - 1L), num = from@diag == "N",
x = if (from@diag == "N") from@x else numeric()
)
)
setMethod("coerce", c("matrix", "tabMatrix"), function(from, to="tabMatrix", strict=TRUE) {
perm <- apply(from, 1L, function(x) {
nz <- which(x != 0)
if (length(nz) == 1L) nz else if (length(nz) == 0L) NA_integer_ else stop("not a tabMatrix")
})
num <- reduced <- FALSE
x <- from[cbind(seq_len(nrow(from)), perm)]
if (!all(x[!is.na(perm)] == 1)) {
x[is.na(perm)] <- 0
perm[is.na(perm)] <- 1L
num <- TRUE
} else {
x <- numeric()
if (anyNA(perm)) {
perm[is.na(perm)] <- 0L
reduced <- TRUE
}
}
tabMatrix(Dim=dim(from), Dimnames=dimnames(from),
reduced=reduced, perm=perm - 1L, num=num, x=x
)
})
setMethod("coerce", c("dgCMatrix", "tabMatrix"), function(from, to="tabMatrix", strict=TRUE) {
perm <- rep.int(-1L, nrow(from))
x <- numeric(nrow(from))
nnz <- from@p[-1L] - from@p[-length(from@p)]
ind <- 1L
for (co in seq_along(nnz)) {
if (nnz[co]) {
ix_ind <- ind:(ind + nnz[co] - 1L)
perm[from@i[ix_ind][from@x[ix_ind] != 0] + 1L] <- co - 1L
x[from@i[ix_ind][from@x[ix_ind] != 0] + 1L] <- from@x[ix_ind][from@x[ix_ind] != 0]
ind <- ind + nnz[co]
}
}
num <- reduced <- FALSE
if (!all(x[perm >= 0L] %in% c(0, 1))) {
x[perm < 0L] <- 0
perm[perm < 0L] <- 0L
num <- TRUE
} else {
if (any(perm < 0L)) {
perm[x == 0] <- -1L
reduced <- TRUE
}
x <- numeric(0L)
}
tabMatrix(Dim=from@Dim, Dimnames=from@Dimnames,
reduced=reduced, perm=perm, num=num, x=x
)
})
tab_is_zero <- function(x) (x@reduced && all(x@perm == -1L)) || (x@num && all(x@x == 0))
dgC_is_tabMatrix <- function(M) anyDuplicated(M@i[M@x != 0]) == 0
# see whether tabMatrix is actually a permutation matrix
tab_isPermutation <- function(M) {
M@Dim[1L] == M@Dim[2L] && !M@num && identical(sort(M@perm), 0:(M@Dim[1L] - 1L))
}
#' S4 method for row and column subsetting a 'tabMatrix'
#'
#' @keywords internal
#' @param x a tabMatrix object.
#' @param i integer vector indicating the rows to select. Not used for column subsetting.
#' @param j integer vector indicating the columns to select. Not used for row subsetting.
#' @param ... not used.
#' @param drop whether to return a vector in case of a single selected row or column.
#' @returns The selected rows/columns as a tabMatrix or a vector.
#' @name tabMatrix-indexing
NULL
# auxiliary function to get a positive integer index vector
get_ind <- function(index, M, type="row") {
if (is.character(index)) {
ind <- match(index, if (type == "row") rownames(M) else colnames(M))
} else if (is.logical(index)) {
if (length(index) != M@Dim[if (type == "row") 1L else 2L]) stop("incompatible index vector")
ind <- base::which(index)
} else {
ind <- as.integer(index)
if (anyNA(ind)) stop("index vector with NAs not allowed")
if (any(ind < 0L)) {
if (all(ind < 0L)) {
ind <- if (type == "row") seq_len(nrow(M))[ind] else seq_len(ncol(M))[ind]
} else stop("mixed negative and nonnegative index vector")
}
}
if (anyNA(ind) | any(ind == 0L | ind > M@Dim[if (type == "row") 1L else 2L])) stop("index out of range")
ind
}
tab_row_select <- function(M, i, drop) {
i <- get_ind(i, M, "row")
if (drop && length(i) == 1L) {
out <- numeric(M@Dim[2L])
if (M@perm[i] >= 0L)
out[M@perm[i] + 1L] <- if (M@num) M@x[i] else 1
out
} else {
tabMatrix(Dim=c(length(i), M@Dim[2L]), Dimnames=list(M@Dimnames[[1L]][i], M@Dimnames[[2L]]),
reduced=M@reduced, perm=M@perm[i], num=M@num, x=if (M@num) M@x[i] else numeric(0L)
)
}
}
# row selection
#' @rdname tabMatrix-indexing
setMethod("[", c(x="tabMatrix", i="index", j="missing", drop="logical"), function (x, i, j, ..., drop=TRUE)
tab_row_select(x, i, drop)
)
# row selection, missing drop
#' @rdname tabMatrix-indexing
setMethod("[", c(x="tabMatrix", i="index", j="missing", drop="missing"), function (x, i, j, ..., drop=TRUE)
tab_row_select(x, i, drop=TRUE)
)
tab_col_select <- function(M, j, drop) {
j <- get_ind(j, M, "col")
out <- Ctab2dgC(M)[, j, drop=drop]
if (is.vector(out)) out else as(out, "tabMatrix")
}
# column selection
#' @rdname tabMatrix-indexing
setMethod("[", c(x="tabMatrix", i="missing", j="index", drop="logical"), function (x, i, j, ..., drop=TRUE)
tab_col_select(x, j, drop)
)
# column selection, missing drop
#' @rdname tabMatrix-indexing
setMethod("[", c(x="tabMatrix", i="missing", j="index", drop="missing"), function (x, i, j, ..., drop=TRUE)
tab_col_select(x, j, drop=TRUE)
)
# rbind of tabMatrices is tabMatrix
setMethod("rbind2", c("tabMatrix", "tabMatrix"), function(x, y, ...) {
if (x@Dim[2L] != y@Dim[2L]) stop("number of columns of matrices must match for rbind2")
perm <- c(x@perm, y@perm)
num <- x@num || y@num
if (num) {
outx <- c(
(if (x@reduced) x@perm >= 0L else 1) * (if (x@num) x@x else rep.int(1, x@Dim[1L])),
(if (y@reduced) y@perm >= 0L else 1) * (if (y@num) y@x else rep.int(1, y@Dim[1L]))
)
if (x@reduced || y@reduced) perm[perm < 0L] <- 0L
}
out <- Ctab(
Dim = c(x@Dim[1L] + y@Dim[1L], x@Dim[2L]),
reduced = !num && (x@reduced || y@reduced),
perm = perm,
num = num,
x = if (num) outx else numeric()
)
dnx <- dimnames(x)
dny <- dimnames(y)
if (identical(c(dnx, dny), list(NULL, NULL, NULL, NULL))) return(out)
cn <- if (!is.null(dnx[[2L]])) dnx[[2L]] else dny[[2L]]
rx <- dnx[[1L]]
ry <- dny[[1L]]
rn <- if (is.null(rx) && is.null(ry))
NULL
else c(if (!is.null(rx)) rx else character(x@Dim[1L]),
if (!is.null(ry)) ry else character(y@Dim[1L]))
out@Dimnames <- list(rn, cn)
out
})
setMethod("rbind2", c("tabMatrix", "Matrix"), function(x, y, ...)
rbind2(Ctab2dgC(x), y, ...)
)
setMethod("rbind2", c("Matrix", "tabMatrix"), function(x, y, ...)
rbind2(x, Ctab2dgC(y), ...)
)
setMethod("cbind2", c("tabMatrix", "Matrix"), function(x, y, ...)
cbind2(Ctab2dgC(x), y, ...)
)
setMethod("cbind2", c("Matrix", "tabMatrix"), function(x, y, ...)
cbind2(x, Ctab2dgC(y), ...)
)
setMethod("rbind2", c("tabMatrix", "matrix"), function(x, y, ...)
rbind2(Ctab2dgC(x), y, ...)
)
setMethod("rbind2", c("matrix", "tabMatrix"), function(x, y, ...)
rbind2(x, Ctab2dgC(y), ...)
)
setMethod("cbind2", c("tabMatrix", "matrix"), function(x, y, ...)
cbind2(Ctab2dgC(x), y, ...)
)
setMethod("cbind2", c("matrix", "tabMatrix"), function(x, y, ...)
cbind2(x, Ctab2dgC(y), ...)
)
setMethod("nnzero", "tabMatrix", function(x, na.counted=NA) {
if (x@reduced)
sum(x@perm != -1L)
else if (x@num)
sum(x@x != 0)
else
x@Dim[1L]
})
#' S4 method for generic 'anyNA' and signature 'tabMatrix'
#'
#' @keywords internal
#' @param x a tabMatrix object.
#' @param recursive not used.
#' @returns Whether the tabMatrix object contains missings or not.
setMethod("anyNA", "tabMatrix", function(x, recursive=FALSE)
if (x@num) anyNA(x@x) else FALSE
)
setMethod("show", "tabMatrix", function(object) str(object))
# expand a vector, using the indicator part of tabM
expand_mv <- function(tabM, v) Ctab_numeric_prod(tabM, v, TRUE)
remove_levels <- function(f, l=1L) {
lvs <- attr(f, "levels")
if (length(l) != 1L || l < 1L || l > length(lvs)) stop("incorrect input")
lvsnew <- lvs[-l]
structure(match(lvs, lvsnew)[f], levels=lvsnew, class="factor")
}
# factor (interaction) to tabMatrix
# fvars looked up in data, which can either be a data.frame or an environment
# contrasts: either "contr.treatment", "contr.SAS" or an integer vector of the same length as
# fvars specifying for each factor variable which level (by name) is considered the baseline.
# If left unspecified no levels are removed.
# drop.unused.levels: drop unused levels of each factor
# drop: drop empty cells in the interaction (which may result in more columns to be dropped)
fac2tabM <- function(fvars, data, enclos=.GlobalEnv, x=numeric(), xlab=character(), drop.unused.levels=FALSE, drop=FALSE, contrasts=NULL, varsep=":", catsep="$", lex.order=FALSE) {
if (missing(fvars) || !length(fvars)) stop("unexpected 'fvars' argument")
for (f in seq_along(fvars)) {
fac <- eval_in(fvars[f], data, enclos)
if (!is.factor(fac) || drop.unused.levels) fac <- factor(fac)
if (!is.null(contrasts)) {
# TODO instead of changing levels here, compute the indices of the final codes vector
# which should be set to -1; this would be a lot faster for long factors
if (length(contrasts) == 1L && contrasts == "contr.treatment") { # R default: first level is baseline
fac <- remove_levels(fac, 1L)
} else if (length(contrasts) == 1L && contrasts == "contr.SAS") { # 'SAS' convention: last level is baseline
fac <- remove_levels(fac, length(attr(fac, "levels")))
} else if (any(fvars[f] == names(contrasts))) { # user-specified base
m <- match(contrasts[fvars[f]], attr(fac, "levels"))
if (length(m) != 1L || is.na(m)) stop("invalid contrasts")
fac <- remove_levels(fac, m)
}
}
if (f == 1L) {
out <- fac
attr(out, "levels") <- paste(fvars[1L], attr(out, "levels"), sep=catsep)
} else {
out <- interaction(out, fac, drop=drop, sep=paste0(varsep, fvars[f], catsep), lex.order=lex.order)
}
}
if (length(xlab))
labs <- paste(attr(out, "levels"), xlab, sep=":")
else
labs <- attr(out, "levels")
out <- as.integer(out) - 1L
num <- reduced <- FALSE
if (length(x) && !all(x[!is.na(out)] == 1)) {
x[is.na(out)] <- 0
out[is.na(out)] <- 0L
num <- TRUE
} else {
x <- numeric()
if (anyNA(out)) {
out[is.na(out)] <- -1L
reduced <- TRUE
}
}
tabMatrix(Dim=c(length(out), length(labs)), Dimnames=list(NULL, labs),
reduced=reduced, perm=out, num=num, x=as.double(x), xlab=xlab
)
}
# formula to list of tabMatrices
# ... passed to fac2tabM
tables2tabM <- function(formula, data, ...) {
n <- nrow(data)
trms <- terms(formula, data=data)
tmat <- terms_matrix(trms)
if (!length(tmat)) {
if (intercept_only(formula))
return(list(new("tabMatrix", perm=integer(n), reduced=FALSE, num=TRUE, x=rep.int(1, n), Dim=c(n, 1L))))
else
stop("empty formula")
}
tnames <- colnames(tmat)
vnames <- rownames(tmat)
qvar <- !catvars(trms, data) # quantitative variables
qvar <- vnames[which(qvar)]
out <- setNames(vector(mode="list", length(tnames)), tnames)
for (k in seq_along(tnames)) {
countvars <- intersect(vnames[tmat[, k] > 0L], qvar)
if (length(countvars)) {
xk <- eval_in(countvars[1L], data)
if (is.matrix(xk)) stop("matrix variables not yet allowed")
labk <- countvars[1L]
for (v in countvars[-1L]) {
temp <- eval_in(v, data)
xk <- xk * temp
labk <- paste(labk, v, sep=":")
}
} else {
xk <- NULL
labk <- NULL
}
facvars <- setdiff(vnames[tmat[, k] > 0L], qvar)
if (length(facvars)) {
if (length(countvars)) {
fk <- fac2tabM(facvars, data, x=xk, xlab=labk, contrasts=NULL, ...)
} else {
fk <- fac2tabM(facvars, data, contrasts=NULL, ...)
}
} else {
if (length(countvars)) {
fk <- new("tabMatrix", perm=integer(n), reduced=FALSE, num=TRUE,
x=xk, xlab=labk, Dim=c(n, 1L), Dimnames=list(NULL, labk))
} else {
# this should not happen, as the intercept only case is handled above
fk <- new("tabMatrix", perm=integer(n), reduced=FALSE, num=TRUE,
x=rep.int(1, n), Dim=c(n, 1L))
}
}
out[[k]] <- fk
}
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.