### =========================================================================
### SparseAssays objects
### -------------------------------------------------------------------------
#' @include SparseAssays-class.R
NULL
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### SparseAssays class
###
# NOTE: Lossless back and forth coercion from/to SimpleList are automatically
# taken care of by automatic methods defined by the methods package.
#' SimpleListSparseAssays objects
#'
#' @description A concrete subclass of the \link{SparseAssays} virtual class.
#' As such, all the methods documented in
#' \code{?}\link{SparseAssays} also work on a SimpleListSparseAssays object.
#' See \link{SparseAssays} for details.
#'
#' @details The SimpleListSparseAssays class has a nested-list structure. This
#' hierarchy is illustrated below for an example with two sparse assays and
#' three samples:
#' \preformatted{
#' SimpleListSparseAssays
#' |-- sparse_assay_1
#' | |-- sample_1
#' | | |-- key
#' | | |-- value
#' | |-- sample_2
#' | | |-- key
#' | | |-- value
#' | |-- sample_3
#' | | |-- key
#' | | |-- value
#' |-- sparse_assay_2
#' | |-- sample_1
#' | | |-- key
#' | | |-- value
#' | |-- sample_2
#' | | |-- key
#' | | |-- value
#' | |-- sample_3
#' | | |-- key
#' | | |-- value
#' }
#'
#' Each \sQuote{key} is an integer vector and all key elements must have
#' identical length. Each \sQuote{value} element is a matrix object. Each
#' value element may have a different number of rows but the maximum number of
#' rows must be less than or equal to the length of the key elements. A row of
#' the value element may be pointed to multiple times by the key element
#' within the same sample and asssay.
#'
#' @author Peter Hickey, \email{peter.hickey@@gmail.com}
#'
#' @seealso
#' \itemize{
#' \item \link{SparseAssays} objects for a description of the available
#' methods.
#' }
#'
#' @aliases SimpleListSparseAssays
#' [,SimpleListSparseAssays,ANY-method
#'
#' @examples
#' # TODO: Get old examples from docs?
#' # TODO: Use SparseAssays(matrix) in examples?
#' # TODO: Set names on 'key' elements so that can demonstrate combine().
#' ## ---------------------------------------------------------------------
#' ## DIRECT MANIPULATION OF SparseAssays OBJECTS
#' ## ---------------------------------------------------------------------
#' sl1 <- SimpleList(
#' s1 = SimpleList(key = as.integer(c(NA, 1, NA, NA, 2, NA, 3, NA, 4, 5)),
#' value = matrix(1:10, ncol = 2)),
#' s2 = SimpleList(key = as.integer(c(NA, NA, 1, 2, NA, NA, 3, 4, NA, NA)),
#' value = matrix(8:1, ncol = 2)))
#'
#' sl2 <- SimpleList(
#' s1 = SimpleList(key = as.integer(c(NA, 1, NA, 2, 2, NA, 1, NA, NA, 1)),
#' value = matrix(1:2, ncol = 1)),
#' s2 = SimpleList(key = as.integer(c(1, 1, 1, 2, NA, NA, NA, NA, NA, NA)),
#' value = matrix(4:3, ncol = 1)))
#' sa <- SparseAssays(SimpleList(sa1 = sl1, sa2 = sl2))
#' sa
#'
#' as(sa, "SimpleList")
#'
#' length(sa)
#' sa[[2]]
#' dim(sa)
#'
#' sa2 <- sa[-4, 2]
#' sa2
#' length(sa2)
#' sa2[[2]]
#' dim(sa2)
#' dimnames(sa2)
#'
#' names(sa)
#' names(sa) <- c("sa1", "sa2")
#' names(sa)
#' sa[["sa2"]]
#'
#' rbind(sa, sa)
#' \dontrun{
#' # ERROR: cbind-ing requires unique sample names
#' cbind(sa, sa)
#' }
#' # Works because unique sample names
#' cbind(sa[, 1], sa[, 2])
#' \dontrun{
#' # ERROR: combine() requires named 'key' elements
#' combine(sa[1:7, 1], sa[3:8, 1])
#' # ERROR: combine() requires named 'key' elements
#' combine(sa[1:7, 1], sa[3:8, 2])
#' }
#' # TODO: An example of combine() that works because keys are named.
#'
#' @importFrom methods setClass
#'
#' @export
setClass("SimpleListSparseAssays",
contains = c("SparseAssays", "SimpleList")
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity
###
# Check that dimnames are compatible, i.e., check that the names() of 'key'
# elements and "sample names" are identical across sparse assays.
.valid.SimpleListSparseAssays.dimnames <- function(x) {
rn <- rownames(x)
rn_identical <- vapply(x, function(sparse_assay) {
vapply(sparse_assay, function(sample, rn) {
identical(names(sample[["key"]]), rn)
}, logical(1L), rn = rn, USE.NAMES = FALSE)
}, logical(ncol(x)), USE.NAMES = FALSE)
if (!all(rn_identical)) {
return(paste0("rownames mismatch: all names() of 'key' elements of a '",
class(x), "' object must be identical."))
}
cn <- colnames(x)
cn_identical <- vapply(x, function(sparse_assay, cn) {
identical(names(sparse_assay), cn)
}, logical(1L), cn = cn, USE.NAMES = FALSE)
if (!all(cn_identical)) {
return(paste0("colnames mismatch: all names() of elements of a '",
class(x), "' object must be identical."))
}
return(NULL)
}
# TODO: Might be worth breaking this up into smaller functions
.valid.SimpleListSparseAssays <- function(x) {
if (length(x) == 0L) {
return(NULL)
}
# NOTE: Checks the virtual class validity. .valid.SparseAssays() is normally
# called **after** .valid.SimpleListSparseAssays() is called (via
# validObject()), however, I also need it to be run up-front.
val <- .valid.SparseAssays(x)
if (!is.null(val)) {
return(val)
}
sparse_assays <- as(x, "SimpleList", strict = FALSE)
# Check that sample level data has an element named 'key', an element named
# 'value', and nothing else.
element_names <- lapply(sparse_assays, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
names(sample)
})
})
element_names <- unlist(element_names, recursive = FALSE, use.names = FALSE)
if (any(vapply(element_names, function(en) {
!identical(en, c("key", "value")) && !identical(en, c("value", "key"))
}, logical(1L)))) {
return(paste0("All sample-level data within each sparse assay of a '",
class(x), "' object must have one element named 'key', one ",
"element named 'value', and nothing else."))
}
# Check each sparse assay has the same number of samples.
n_samples <- vapply(sparse_assays, length, integer(1L))
if (any(n_samples != n_samples[1])) {
return(paste0("All sparse assays of a '", class(x), "' object must have ",
"an identical number of samples."))
}
# Check all key elements are integer vectors.
is_integer_key <- vapply(sparse_assays, function(sparse_assay) {
vapply(sparse_assay, function(sample) {
is(sample[["key"]], "integer")
}, logical(1L))
}, logical(length(sparse_assays[[1L]])))
if (!isTRUE(all(is_integer_key))) {
return(paste0("All 'key' elements of a '", class(x), "' object must be ",
"integer vectors."))
}
# Check all data elements are numeric matrix objects.
# NOTE: This could probably be relaxed to allow non-numeric storage.mode but
# will require changes to functions that call storage.mode() for where it is
# implicitly assumed that the value is either 'integer' or 'double'.
is_numeric_matrix_value <- vapply(sparse_assays, function(sparse_assay) {
vapply(sparse_assay, function(sample) {
# The check on nrow is because if the matrix has zero rows then it will
# be coerced to a logical matrix even if it was originally numeric (e.g.,
# following subsetting with the [-method).
is(sample[["value"]], "matrix") &&
(is.numeric(sample[["value"]]) || nrow(sample[["value"]]) == 0L)
}, logical(1L))
}, logical(length(sparse_assays[[1L]])))
if (!isTRUE(all(is_numeric_matrix_value))) {
return(paste0("All 'value' elements of a '", class(x), "' object must be ",
"numeric matrix objects."))
}
# Check all key elements have the same length.
key_length <- lapply(sparse_assays, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
length(sample[["key"]])
})
})
if (length(unique(unlist(key_length))) != 1L) {
return(paste0("All 'key' elements of a '", class(x), "' object must have ",
"identical length."))
}
# Check all value elements within each sparse assay have the same number of
# columns.
# NOTE: More generally, if data were an n-dimensional array rather than a
# 2-dimensional matrix, would require that all dimensions except the number
# of rows were identical within a sparse assay.
value_ncol <- lapply(sparse_assays, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
ncol(sample[["value"]])
})
})
if (any(vapply(value_ncol, function(sparse_assay_ncol) {
length(unique(unlist(sparse_assay_ncol))) != 1L
}, logical(1L)))) {
return(paste0("All 'data' elements within each sparse assay of a '",
class(x), "' object must have identical ncol."))
}
# Check that the maximum value in each key element is less than or equal to
# the number of rows in each corresponding value element.
key_max <- lapply(sparse_assays, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
# Suppress warning about taking max of an empty vector
suppressWarnings(max(sample[["key"]], na.rm = TRUE))
})
})
key_max <- unlist(key_max, use.names = FALSE)
value_nrow <- lapply(sparse_assays, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
nrow(sample[["value"]])
})
})
value_nrow <- unlist(value_nrow, use.names = FALSE)
if (any(key_max > value_nrow)) {
return(paste0("Maximum value in each 'key' element must be less than or ",
"equal to the number of rows in each corresponding 'value' ",
"element of a '", class(x), "' object."))
}
# Finally, check that dimnames are compatible.
.valid.SimpleListSparseAssays.dimnames(x)
}
#' @importFrom S4Vectors setValidity2
setValidity2("SimpleListSparseAssays", .valid.SimpleListSparseAssays)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Accessors
###
### NOTE: The following are defined via inheritance to the SparseAssays-method:
### length, NROW, names, names<-, [[, [[<-
### The following are specifically defined for SimpleListSparseAssays
### objects: dim, dimnames, [, [<-, rbind, cbind, combine, densify
### dim
#' @param x A SimpleListSparseAssays object.
#'
#' @rdname SimpleListSparseAssays-class
#'
#' @importFrom methods setMethod
#'
#' @export
setMethod("dim", "SimpleListSparseAssays",
function(x) {
if (length(x) == 0L) {
return(c(0L, 0L))
} else {
c(length(x[[1L]][[1L]][["key"]]),
length(x[[1L]]))
}
}
)
### dimnames
.dimnames.SimpleListSparseAssays <- function(x) {
# NOTE: Uses rownames and colnames from first sparse assay-sample with no
# check that these are the same across other sparse assay-sample
# combinations.
if (length(x) == 0L) {
return(list(NULL, NULL))
}
list(names(x[[1L]][[1L]][["key"]]),
names(x[[1L]]))
}
#' @rdname SimpleListSparseAssays-class
#'
#' @importFrom methods setMethod
#'
#' @export
setMethod("dimnames", "SimpleListSparseAssays",
.dimnames.SimpleListSparseAssays)
### [
# NOTE: Subsetting a SimpleListSparseAssays object requires mapping the i to a
# new coordinate specified by the key element.
#' @importFrom S4Vectors normalizeSingleBracketSubscript SimpleList
#' @importFrom stats na.omit
#' @importMethodsFrom S4Vectors endoapply
.extract_SimpleListSparseAssays_subset <- function(x, i, j) {
if (!missing(i) && !missing(j)) {
# normalize i
if (is.character(i)) {
if (any(!i %in% names(x[[1L]][[1L]][["key"]]))) {
stop("subscript contains NAs or out-of-bounds indices")
}
}
if (!is.character(i)) {
i <- normalizeSingleBracketSubscript(i, x, as.NSBS = FALSE)
}
fun <- function(sparse_assay) {
endoapply(sparse_assay[j], function(sample) {
# Map i using key
ii <- na.omit(sample[["key"]][i])
# Extract using mapped i
data <- sample[["value"]][ii, , drop = FALSE]
# Add rownames (temporary)
rownames(data) <- names(ii)
# Sparsify the data
sparsified <- sparsify(data, "SimpleList")
# Update the key
# Should have length(key) == length(i)
if (!is.null(attr(ii, "na.action"))) {
key <- rep(NA_integer_, length(i))
key[-attr(ii, "na.action")] <- sparsified[["key"]]
names(key) <- names(sample[["key"]][i])
} else {
key <- sparsified[["key"]]
}
stopifnot(length(key) == length(i))
SimpleList(key = key,
value = sparsified[["value"]])
})
}
} else if (!missing(i)) {
# normalize i
if (is.character(i)) {
if (any(!i %in% names(x[[1L]][[1L]][["key"]]))) {
stop("subscript contains NAs or out-of-bounds indices")
}
}
if (!is.character(i)) {
i <- normalizeSingleBracketSubscript(i, x, as.NSBS = FALSE)
}
fun <- function(sparse_assay) {
endoapply(sparse_assay, function(sample) {
# Map i using key
ii <- na.omit(sample[["key"]][i])
# Extract using mapped i
data <- sample[["value"]][ii, , drop = FALSE]
# Add rownames (temporary)
rownames(data) <- names(ii)
# Sparsify the data
sparsified <- sparsify(data, "SimpleList")
# Update the key
# Should have length(key) == length(i)
if (!is.null(attr(ii, "na.action"))) {
key <- rep(NA_integer_, length(i))
key[-attr(ii, "na.action")] <- sparsified[["key"]]
names(key) <- names(sample[["key"]][i])
} else {
key <- sparsified[["key"]]
}
stopifnot(length(key) == length(i))
SimpleList(key = key,
value = sparsified[["value"]])
})
}
} else if (!missing(j)) {
fun <- function(sparse_assay) {
sparse_assay[j]
}
}
endoapply(x, fun)
}
#' @inheritParams dim,SimpleListSparseAssays-method
#' @param i,j Numeric or character vectors indicating which \emph{rows} of the
#' sparse assays (\code{i}) and samples (\code{j}) to select.
#' @param drop Not used by \code{[,SimpleListSparseAssays,ANY-method}.
#'
#' @rdname SimpleListSparseAssays-class
#'
#' @importFrom methods setMethod
#'
#' @export
setMethod("[", "SimpleListSparseAssays",
function(x, i, j, ..., drop = FALSE) {
if (drop) {
warning("'drop' ignored '[,", class(x), ",ANY,ANY-method'")
}
.extract_SimpleListSparseAssays_subset(x, i, j)
}
)
### [<-
# IDEA (when !missing(i)):
# (1) Create new value element by
# rbind(sample[["value"]], v_sample[["value"]])
# (2) Create new key element
# (a) Add nrow(sample[["value"]]) to all elements of v_sample[["key"]]
# to account for rbind() operation.
# (b) sample[["key"]][i] <- v_sample[["key"]].
# (3) Subset value to only contain the required rows by
# value[na.omit(unique(key)), , drop = FALSE]
# (4) Sparsify the value
# (5) Re-map the non-NA values of key by the "sparsified" key.
# NOTE: Steps 3-5 are "sparsifying" the value. The "expansion" of
# (key, value) at (2) and (5) should be identical, even though the
# individual elements may not be identical.
#
# IDEA (when missing(i)):
# (1) Simply replace the j-th sample(s) (key, value)-pair by that given in
# value.
#' @param x A SimpleListSparseAssays object.
#' @param i A numeric or character index.
#' @param j A numeric or character index
#' @param rbind If FALSE (default), i must be an in bounds subscript of x. If
#' TRUE, then i can be an out of bounds subscripts of x, in which case
#' these values will be appended to the end of x.
#' @importFrom methods validObject
#' @importFrom S4Vectors SimpleList
#' @importFrom stats na.omit
#' @importMethodsFrom S4Vectors mendoapply
#' normalizeSingleBracketReplacementValue
.replace_SimpleListSparseAssays_subset <- function(x, i, j, value,
rbind = FALSE) {
if (!missing(i) && !missing(j)) {
fun <- function(sparse_assay, v_sparse_assay) {
sparse_assay[j] <- mendoapply(function(sample, v_sample) {
# (1)
# NOTE: call this matrix 'tmp_value' so as not to cause confusion with
# 'value' (which is a SimpleListSparseAssays object).
tmp_value <- rbind(sample[["value"]], v_sample[["value"]])
# (2)
# NOTE: NAs are correctly propogated since NA + number = NA.
vsm_updated <- v_sample[["key"]] + nrow(sample[["value"]])
key <- sample[["key"]]
key[i] <- vsm_updated
# NOTE: Restore names; not necessary if 'i' is a character
if (!is.character(i)) {
names(key) <- names(sample[["key"]])
}
# (3) and (4)
sparsified <- sparsify(tmp_value[na.omit(unique(key)), , drop = FALSE],
"SimpleList")
# (5)
new_lvls <- sparsified[["key"]]
old_lvls <- na.omit(unique(key))
key[!is.na(key)] <- new_lvls[match(key[!is.na(key)], old_lvls)]
SimpleList(key = key,
value = sparsified[["value"]])
}, sample = sparse_assay[j],
v_sample = v_sparse_assay) # No need to subset v_sparse_assay by j
sparse_assay
}
} else if (!missing(i)) {
fun <- function(sparse_assay, v_sparse_assay) {
sparse_assay <- mendoapply(function(sample, v_sample) {
# (1)
# NOTE: call this matrix 'tmp_value' so as not to cause confusion with
# 'value' (which is a SimpleListSparseAssays object).
tmp_value <- rbind(sample[["value"]], v_sample[["value"]])
# (2)
# NOTE: NAs are correctly propogated since NA + number = NA.
vsm_updated <- v_sample[["key"]] + nrow(sample[["value"]])
key <- sample[["key"]]
key[i] <- vsm_updated
# NOTE: Restore names; not necessary if 'i' is a character
if (!is.character(i)) {
names(key) <- names(sample[["key"]])
}
# (3) and (4)
sparsified <- sparsify(tmp_value[na.omit(unique(key)), , drop = FALSE],
"SimpleList")
# (5)
new_lvls <- sparsified[["key"]]
old_lvls <- na.omit(unique(key))
key[!is.na(key)] <- new_lvls[match(key[!is.na(key)], old_lvls)]
SimpleList(key = key,
value = sparsified[["value"]])
}, sample = sparse_assay, v_sample = v_sparse_assay)
sparse_assay
}
} else if (!missing(j)) {
fun <- function(sparse_assay, v_sparse_assay) {
sparse_assay[j] <- v_sparse_assay
sparse_assay
}
}
# Normalize i, j, and value
if (!missing(i) && !rbind) {
if (!is.character(i)) {
if (any(i > nrow(x))) {
stop("subscript out of bounds")
}
} else {
if (any(!(i %in% names(x[[1L]][[1L]][["key"]])))) {
stop("subscript out of bounds")
}
}
}
if (!missing(j)) {
if (!is.character(j)) {
if (any(j > ncol(x))) {
stop("subscript out of bounds")
}
} else {
if (any(!(j %in% names(x[[1L]])))) {
stop("subscript out of bounds")
}
}
}
# Check dims are compatible
value_dim <- dim(value)
if (!missing(i) && !missing(j)) {
x_dim <- c(length(i), length(j))
} else if (!missing(i)) {
x_dim <- c(length(i), ncol(x))
} else if (!missing(j)) {
x_dim <- c(nrow(x), length(j))
} else {
x_dim <- dim(x)
}
if (!identical(x_dim, value_dim)) {
stop("number of items to replace is not a multiple of replacement length")
}
value <- normalizeSingleBracketReplacementValue(value, x, i)
# Loop over each sparse assay and do replacement
val <- mendoapply(fun, x, value)
# NOTE: Sanity check (shouldn't be necessary and may kill performance, but
# until I have good unit tests in place this stays).
validObject(val)
val
}
#' @inheritParams dim,SimpleListSparseAssays-method
#' @param value An object of a class specified in the S4 method signature or as
#' outlined in \sQuote{Details}.
#'
#' @rdname SimpleListSparseAssays-class
#'
#' @importFrom methods setReplaceMethod
#'
#' @export
setReplaceMethod("[", "SimpleListSparseAssays",
function(x, i, j, ..., value) {
.replace_SimpleListSparseAssays_subset(x, i, j, value,
rbind = FALSE)
}
)
### rbind/cbind
#' @importFrom methods as
#' @importFrom S4Vectors SimpleList
#' @importFrom stats setNames
.bind_SimpleListSparseAssays <- function(lst, bind) {
# NOTE: This is copied from SummarizedExperiment:::.bind_Assays(). I'm not
# sure how this could be passed a zero-length list, but I keep it here until
# I know it's safe to remove.
if (length(lst) == 0L) {
return(SparseAssays())
}
# If the list has only a single element then just return that element.
if (length(lst) == 1L) {
return(lst[[1L]])
}
lens <- sapply(lst, length)
len1 <- lens[1L]
if (any(lens != len1)) {
# TODO: I'm not sure that this error message if very informative/accurate.
stop("elements in sparse assays must have the same length")
}
if (len1 == 0L) {
return(SparseAssays())
}
# Check that samples names are unique for cbind and are identical for rbind
sample_names <- lapply(lst, function(e) {
unname(lapply(e, names))
})
if (identical(bind, cbind)) {
sample_names_unique <- vapply(sample_names[-1L], function(sn, sn1) {
!anyDuplicated(c(unique(sn[[1L]]), unique(sn1[[1L]])))
}, FUN.VALUE = logical(1L), sn1 = sample_names[[1L]])
# NOTE: Error not called if all sample names are NULL.
if (!all(sample_names_unique) && !all(is.null(unlist(sample_names)))) {
stop("Sample names (if present) must be unique when calling 'cbind()' ",
"on '", class(lst[[1L]]), "'.")
}
} else {
# NOTE: This allows all sample names to be NULL.
# NOTE: Don't need check the first element against itself
sample_names_identical <- vapply(sample_names[-1L], function(sn, sn1) {
identical(sn, sn1)
}, FUN.VALUE = logical(1L), sn1 = sample_names[[1L]])
if (!all(sample_names_identical)) {
stop("Sample names (if present) must be identical when calling ",
"'rbind()' on '", class(lst[[1L]]), "'")
}
# If all sample names are NULL and rbind()-ing, need to check that the
# number of samples (ncol) in each SparseAssays object are identical.
if (all(is.null(unlist(sample_names)))) {
nc <- lapply(lst, ncol)
nc_identical <- vapply(nc, function(nc, nc1) {
identical(nc, nc1)
}, FUN.VALUE = logical(1L), nc1 = nc[[1L]])
if (!all(nc_identical)) {
stop("Can only rbind '", class(lst[[1L]]), "' objects when each object ",
"has the same number of samples (ncol).")
}
}
}
# Check all elements of lst have the same sparse assay names
sparse_assay_names <- lapply(lst, names)
if (any(vapply(sparse_assay_names, function(san, san1) {
!identical(san, san1)
}, logical(1L), san1 = sparse_assay_names[[1L]]))) {
stop("All '", class(lst[[1L]]), "' objects must have the same sparse ",
"assay names.")
}
sparse_assay_names <- sparse_assay_names[[1L]]
# NOTE: If sparse assay names don't exist, then match by position.
if (all(is.null(sparse_assay_names))) {
sparse_assay_names <- seq_along(lst[[1L]])
unname <- TRUE
} else {
unname <- FALSE
}
if (identical(bind, rbind)) {
# If rbind-ing, need to check that all data elements within each sparse
# assay have the same number of columns.
same_ncol <- lapply(sparse_assay_names, function(san) {
l_sparse_assay <- lapply(lst, "[[", san)
ncol <- lapply(l_sparse_assay, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
ncol(sample[["value"]])
})
})
ncol <- unlist(ncol, use.names = FALSE)
all(ncol == ncol[1L])
})
same_ncol <- unlist(same_ncol, use.names = FALSE)
if (!isTRUE(any(same_ncol))) {
stop("Can only rbind '", class(lst[[1L]]), "' objects where the 'value' ",
"elements within each sparse assay have the same number of columns.")
}
# NOTE: rbind,SimpleListSparseAssays-method uses the
# SparseAssays,`[<-`-method to recursively add the next
# SimpleListSparseAssays object to the end of the already rbind-ed
# SimpleListSparseAssays objects. It's not the most efficient method,
# but it works and avoids repeating much of the code used by
# SparseAssays,`[<-`-method.
# This assumes that length(lst) > 1, which it should be given above checks
# on length(lst).
val <- lst[[1L]]
for (idx in seq.int(from = 2L, to = length(lst), by = 1L)) {
val_nrow <- nrow(val)
i <- seq.int(from = val_nrow + 1,
to = val_nrow + nrow(lst[[idx]]),
by = 1L)
# NOTE: names taken from first elment only
names(i) <- names(lst[[idx]][[1L]][[1L]][["key"]])
.replace_SimpleListSparseAssays_subset(val, i, value = lst[[idx]],
rbind = TRUE)
# val[i, ] <- lst[[idx]]
}
} else {
# If cbind()-ing, need to check that all key elements have the same length.
same_length <- lapply(sparse_assay_names, function(san) {
l_sparse_assay <- lapply(lst, "[[", san)
length <- lapply(l_sparse_assay, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
length(sample[["key"]])
})
})
length <- unlist(length, use.names = FALSE)
all(length == length[1])
})
same_length <- unlist(same_length, use.names = FALSE)
length <- unlist(length, use.names = FALSE)
if (any(!same_length)) {
stop("Can only cbind '", class(lst[[1L]]), "' objects where the 'key' ",
"elements within each sparse assay have the same length.")
}
val <- lapply(sparse_assay_names, function(san) {
l_sparse_assay <- lapply(lst, "[[", san)
do.call("c", l_sparse_assay)
})
val <- as(setNames(SimpleList(val), sparse_assay_names), class(lst[[1L]]))
}
if (unname) {
val <- unname(val)
}
val
}
# NOTE: Can't defer to rbind,SimpleList-method because it in turn defers to
# rbind,ANY-method, which fails because a SimpleListSparseAssays object cannot
# be coerced to a vector (and even if it could, the resulting operation
# probably wouldn't make sense).
#' @param ... For \code{cbind()}, \code{rbind()}, and \code{combine()} one or
#' more SimpleListSparseAssay objects. Otherwise, additional arguments,
#' for use in specific methods.
#' @param deparse.level See \code{?base::\link[base]{cbind}} for a description
#' of this argument.
#'
#' @rdname SimpleListSparseAssays-class
#'
#' @importFrom methods setMethod
#'
#' @export
setMethod("rbind", "SimpleListSparseAssays",
function(..., deparse.level = 1) {
.bind_SimpleListSparseAssays(unname(list(...)), rbind)
}
)
# NOTE: Can't defer to cbind,SimpleList-method because it in turn defers to
# cbind,ANY-method, which returns a matrix with elements being list objects
# (and doesn't make much sense for SimpleListSparseAssays objects).
# WARNING: Not really a cbind. It adds new elements to the 'sparse_assay'-level
# SimpleList.
#' @inheritParams rbind,SimpleListSparseAssays-method
#'
#' @rdname SimpleListSparseAssays-class
#' @importFrom methods setMethod
#'
#' @export
setMethod("cbind", "SimpleListSparseAssays",
function(..., deparse.level = 1) {
.bind_SimpleListSparseAssays(unname(list(...)), cbind)
}
)
### combine
# Combine sample-level key and value elements
# TODO (longterm): Combine without an expand-then-sparsify operation.
#' @importFrom S4Vectors SimpleList
#' @importFrom stats complete.cases
.combine_sample_level.SimpleListSparseAssays <- function(x, y) {
# NOTE: NULL_cn is a flag for whether the input colnames are NULL
NULL_cn <- FALSE
xe <- .densify.SimpleListSparseAssays.sample(x)
if (is.null(colnames(xe))) {
colnames(xe) <- paste0("V", seq_len(ncol(xe)))
NULL_cn <- TRUE
}
ye <- .densify.SimpleListSparseAssays.sample(y)
if (is.null(colnames(ye))) {
colnames(ye) <- paste0("V", seq_len(ncol(ye)))
}
# NOTE: If the key is all NAs then the 'expanded' data are logical NA,
# which will cause problems when we try to combine this with a
# non-logical matrix.
if (storage.mode(xe) == "logical") {
storage.mode(xe) <- storage.mode(ye)
}
if (storage.mode(ye) == "logical") {
storage.mode(ye) <- storage.mode(xe)
}
z <- combine(xe, ye)
sparsified <- sparsify(z, "SimpleList")
key <- sparsified[["key"]]
names(key) <- rownames(z)
value <- sparsified[["value"]]
if (NULL_cn) {
dimnames(value) <- NULL
} else {
colnames(value) <- colnames(z)
}
NA_idx <- which(rowSums(is.na(value)) == ncol(value))
if (length(NA_idx)) {
# Take care of NA rows
stopifnot(length(NA_idx) == 1L)
# Update value element by dropping NA row
value <- value[-NA_idx, , drop = FALSE]
# Update key element to replace index by NA for NA rows
# TODO (longterm): Probably more efficient ways to do this
names(key)[key == NA_idx] <- NA
key[key == NA_idx] <- NA
key[!is.na(key) & key > NA_idx] <- key[!is.na(key) & key > NA_idx] - 1L
}
SimpleList(key = key, value = value)
}
# NOTE: Can't defer to combine,SimpleList,SimpleList-method because
# SparseAssays objects may have a different number of elements at the
# second level (corresponding to different samples).
# NOTE: Requires that both x and y have the identical number of sparse assays
# with identical names.
#' @inheritParams dim
#' @param y A SimpleListSparseAssays object.
#'
#' @rdname SimpleListSparseAssays-class
#'
#' @importFrom methods setMethod
#' @importFrom S4Vectors SimpleList
#' @importMethodsFrom S4Vectors endoapply mendoapply
#'
#' @export
setMethod("combine", c("SimpleListSparseAssays", "SimpleListSparseAssays"),
function(x, y, ...) {
if (length(y) == 0L) {
return(x)
} else if (length(x) == 0L) {
return(y)
}
# Check that key element is named, otherwise densified data is
# unnamed and unnamed matrices cannot be combined.
x_unnamed <- lapply(x, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
is.null(names(sample[["key"]]))
})
})
if (any(unlist(x_unnamed))) {
stop("Cannot combine '", class(x), "' objects with unnamed ",
"'key' elements")
}
y_unnamed <- lapply(y, function(sparse_assay) {
lapply(sparse_assay, function(sample) {
is.null(names(sample[["key"]]))
})
})
if (any(unlist(y_unnamed))) {
stop("Cannot combine '", class(y), "' objects with unnamed ",
"'key' elements")
}
mendoapply(function(x_sa, y_sa) {
if (is.null(names(x_sa)) || is.null(names(y_sa))) {
stop("sample names must be non-NULL when combining '", class(x),
"' objects")
}
# Identify shared samples
shared_samples <- intersect(names(x_sa), names(y_sa))
rownames <- unique(unlist(c(lapply(x_sa, function(sample) {
names(sample[["key"]])}),
lapply(y_sa, function(sample) {
names(sample[["key"]])})
), use.names = FALSE))
# Update shared samples
val <- mendoapply(
.combine_sample_level.SimpleListSparseAssays,
x_sa[shared_samples],
y_sa[shared_samples])
# Create ordered key
if (length(shared_samples)) {
ordered_key <- names(val[[1]][["key"]])
} else {
ordered_key <- NULL
}
x_sa[shared_samples] <- val
# Update samples unique to x
x_u <- setdiff(names(x_sa), shared_samples)
fake <- endoapply(x_sa[x_u], function(e) {
missing_rownames <- setdiff(rownames, names(e[["key"]]))
key <- rep(NA_integer_, length(missing_rownames))
names(key) <- missing_rownames
value <- matrix(ncol = ncol(e[["value"]]),
dimnames = list(NULL, colnames(e[["value"]])))
storage.mode(value) <- storage.mode(e[["value"]])
SimpleList(key = key, value = value)
})
val <- mendoapply(.combine_sample_level.SimpleListSparseAssays,
x_sa[x_u], fake)
# Update key by ordered key (or create ordered key if it doesn't
# yet exist)
if (!is.null(ordered_key)) {
val <- endoapply(val, function(v, ordered_key) {
key_idx <- match(ordered_key, names(v[["key"]]))
v[["key"]] <- v[["key"]][key_idx]
v
}, ordered_key = ordered_key)
} else {
ordered_key <- names(val[[1L]][["key"]])
}
x_sa[x_u] <- val
# Update samples unique to y
y_u <- setdiff(names(y_sa), shared_samples)
fake <- endoapply(y_sa[y_u], function(e) {
missing_rownames <- setdiff(rownames, names(e[["key"]]))
key <- rep(NA_integer_, length(missing_rownames))
names(key) <- missing_rownames
value <- matrix(ncol = ncol(e[["value"]]),
dimnames = list(NULL, colnames(e[["value"]])))
storage.mode(value) <- storage.mode(e[["value"]])
SimpleList(key = key, value = value)
})
val <- mendoapply(.combine_sample_level.SimpleListSparseAssays,
y_sa[y_u], fake)
# Update key by ordered key
val <- endoapply(val, function(v, ordered_key) {
key_idx <- match(ordered_key, names(v[["key"]]))
v[["key"]] <- v[["key"]][key_idx]
v
}, ordered_key = ordered_key)
# Append to x_sa
x_sa <- c(x_sa, val)
x_sa
}, x, y)
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Coercion
###
# NOTE: Useful if wanting to densify/expand one sample's worth of data at a
# time. dimnames are taken from sample.
.densify.SimpleListSparseAssays.sample <- function(sample, withRownames = TRUE) {
val <- sample[["value"]][sample[["key"]], , drop = FALSE]
if (withRownames) {
row.names(val) <- names(sample[["key"]])
}
val
}
#' @param x A SimpleListSparseAssays or SimpleList object.
.densify.SimpleListSparseAssays <- function(x, withRownames = TRUE,
ShallowSimpleListAssays = FALSE) {
nr <- nrow(x)
nc <- ncol(x)
if (is(x, "SimpleListSparseAssays")) {
# NOTE: Use strict = TRUE to ensure lapply() works as expected.
x <- as(x, "SimpleList", strict = TRUE)
}
if (ShallowSimpleListAssays) {
# NOTE: Want to be able to copy dimnames but drop when doing SSE -> SE
l <- lapply(x, function(sparse_assay) {
# A kludge to guess whether the data are integer or numeric. If multiple
# data storage modes are found then assume numeric.
data_storage_mode <- lapply(sparse_assay, function(sample) {
storage.mode(sample[["value"]])
})
data_storage_mode <- unlist(data_storage_mode)
if (all(data_storage_mode == "integer")) {
val <- array(NA_integer_,
dim = c(nr, nc, ncol(sparse_assay[[1L]][["value"]])),
dimnames = list(NULL, names(sparse_assay), NULL))
} else {
val <- array(NA_real_,
dim = c(nr, nc, ncol(sparse_assay[[1L]][["value"]])),
dimnames = list(NULL, names(sparse_assay), NULL))
}
# TODO (longterm): Investigate a Rcpp version
# Fill val with the "expanded" data
for (sample in seq_along(sparse_assay)) {
val[ , sample, ] <- sparse_assay[[sample]][["value"]][
sparse_assay[[sample]][["key"]], , drop = FALSE]
}
# NOTE: colnames of sparse assay should alway be copied, but note that
# these are not colnames of val.
dimnames(val)[[3L]] <- colnames(sparse_assay[[1L]][["value"]])
if (withRownames) {
rn <- names(sparse_assay[[1L]][["key"]])
if (!is.null(rn)) {
dimnames(val)[[1L]] <- rn
}
}
val
})
return(Assays(l))
} else {
l <- lapply(x, function(sparse_assay) {
lapply(sparse_assay, .densify.SimpleListSparseAssays.sample,
withRownames = withRownames)
})
SimpleList(l)
}
}
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "missing", "missing"),
function(x, i, j, ..., withRownames = TRUE) {
stop("It is strongly recommended that you specify at least one of ",
"'i' or 'j'; see ?densify for reasons why. If you still ",
"really want to densify all sparse assays and samples, then ",
"use 'densify(x, seq_along(x), seq_len(ncol(x)), ...)'")
})
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "numeric", "missing"),
function(x, i, j, ..., withRownames = TRUE) {
tryCatch({
# NOTE: Need strict = TRUE otherwise
# [,SimpleListSparseAssays-method is called instead of
# [,SimpleList-method
sparse_assays <- as(x, "SimpleList", strict = TRUE)[i]
}, error = function(err) {
stop("'densify(<", class(x), ">, i=\"numeric\", j=\"missing\", ",
"...)' invalid subscript 'i'\n", conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "character", "missing"),
function(x, i, j, ..., withRownames = TRUE) {
msg <- paste0("'densify(<", class(x), ">, i=\"character\", ",
"j=\"missing\", ...)' invalid subscript 'i'")
tryCatch({
# NOTE: Need strict = TRUE otherwise
# [,SimpleListSparseAssays-method is called instead of
# [,SimpleList-method
sparse_assays <- as(x, "SimpleList", strict = TRUE)[i]
}, error = function(err) {
stop(msg, "\n", conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "missing", "numeric"),
function(x, i, j, ..., withRownames = TRUE) {
tryCatch({
sparse_assays <- x[, j]
}, error = function(err) {
stop("'densify(<", class(x), ">, i=\"missing\", j=\"numeric\", ",
"...)' invalid subscript 'j'\n", conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "missing", "character"),
function(x, i, j, ..., withRownames = TRUE) {
msg <- paste0("'densify(<", class(x), ">, i=\"missing\", ",
"j=\"character\", ...)' invalid subscript 'j'")
tryCatch({
sparse_assays <- x[, j]
}, error = function(err) {
stop(msg, "\n", conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "numeric", "numeric"),
function(x, i, j, ..., withRownames = TRUE) {
tryCatch({
x <- x[, j]
}, error = function(err) {
stop("'densify(<", class(x), ">, i=\"numeric\", j=\"numeric\", ",
"...)' invalid subscript 'j'\n", conditionMessage(err))
})
tryCatch({
# NOTE: Need strict = TRUE otherwise
# [,SimpleListSparseAssays-method is called instead of
# [,SimpleList-method
sparse_assays <- as(x, "SimpleList", strict = TRUE)[i]
}, error = function(err) {
stop("'densify(<", class(x), ">, i=\"numeric\", j=\"numeric\", ",
"...)' invalid subscript 'i'\n",
conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "numeric", "character"),
function(x, i, j, ..., withRownames = TRUE) {
msg <- paste0("'densify(<", class(x), ">, i=\"numeric\", ",
"j=\"character\", ...)' invalid subscript 'j'")
tryCatch({
x <- x[, j]
}, error = function(err) {
stop(msg, "\n", conditionMessage(err))
})
tryCatch({
# NOTE: Need strict = TRUE otherwise
# [,SimpleListSparseAssays-method is called instead of
# [,SimpleList-method
sparse_assays <- as(x, "SimpleList", strict = TRUE)[i]
}, error = function(err) {
stop("'densify(<", class(x), ">, i=\"numeric\", ",
"j=\"character\", ...)' invalid subscript 'i'\n",
conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "character", "numeric"),
function(x, i, j, ..., withRownames = TRUE) {
tryCatch({
x <- x[, j]
}, error = function(err) {
stop("'densify(<", class(x), ">, i=\"character\", ",
"j=\"numeric\", ...)' invalid subscript 'j'\n",
conditionMessage(err))
})
msg <- paste0("'densify(<", class(x), ">, i=\"character\", ",
"j=\"numeric\", ...)' invalid subscript 'i'")
tryCatch({
# NOTE: Need strict = TRUE otherwise
# [,SimpleListSparseAssays-method is called instead of
# [,SimpleList-method
sparse_assays <- as(x, "SimpleList", strict = TRUE)[i]
}, error = function(err) {
stop(msg, "\n", conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setMethod
#'
#' @export
setMethod("densify", c("SimpleListSparseAssays", "character", "character"),
function(x, i, j, ..., withRownames = TRUE) {
msg <- paste0("'densify(<", class(x), ">, i=\"character\", ",
"j=\"character\", ...)' invalid subscript 'j'")
tryCatch({
x <- x[, j]
}, error = function(err) {
stop(msg, "\n", conditionMessage(err))
})
msg <- paste0("'densify(<", class(x), ">, i=\"character\", ",
"j=\"character\", ...)' invalid subscript 'i'")
tryCatch({
# NOTE: Need strict = TRUE otherwise
# [,SimpleListSparseAssays-method is called instead of
# [,SimpleList-method
sparse_assays <- as(x, "SimpleList", strict = TRUE)[i]
}, error = function(err) {
stop(msg, "\n", conditionMessage(err))
})
.densify.SimpleListSparseAssays(sparse_assays, withRownames)
}
)
#' @importFrom methods setAs
setAs("SimpleListSparseAssays", "ShallowSimpleListAssays",
function(from) {
# TODO: Check whether dimanmes are subsequently stripped from assays
# slot if this is called from within makeSEFromSSE
.densify.SimpleListSparseAssays(from, withRownames = TRUE,
ShallowSimpleListAssays = TRUE)
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### TODOs
###
# TODO: show,SimpleListSparseAssays prints truncated class name.
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Miscellaneous NOTEs
###
# NOTE: rev,Assays-method doesn't work! Nor does rev,SparseAssays-method.
# NOTE: x[] errors if x is an Assays object. Should be a no-op (I think). Also
# errors if x is a SparseAssays object.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.