R/Assays-class.R

Defines functions .bind_Assays_objects .bind_assays .replace_Assays_subset .extract_Assays_subset .updateObject_Assays Assays normarg_assays .valid.Assays

Documented in Assays

### =========================================================================
### Assays objects
### -------------------------------------------------------------------------
###
### The Assays API consists of:
###   (a) The Assays() constructor function.
###   (b) Lossless back and forth coercion from/to SimpleList. The coercion
###       method from SimpleList doesn't need (and should not) validate the
###       returned object.
###   (c) The following methods, split in 2 groups:
###       - List-like methods:   length, names, names<-, getListElement, and
###                              setListElement
###       - Matrix-like methods: dim, [, [<-, rbind, cbind
###
### An Assays concrete subclass needs to implement (b) (required) plus
### optionally any of the methods in (c). The reason they are optionals
### is that default methods are provided and they work on any Assays
### derivative as long as lossless back and forth coercion from/to
### SimpleList works.
###
### IMPORTANT
### ---------
###
### 1. Nobody in the Assays hierarchy is allowed to inherit from SimpleList
###    because of the conflicting semantic of [.
###
### 2. Methods that return a modified Assays object (a.k.a. endomorphisms),
###    that is, [ as well as replacement methods names<-, setListElement,
###    and [<-, must respect the copy-on-change contract. With objects that
###    don't make use of references internally, the developer doesn't need
###    to take any special action for that because it's automatically taken
###    care of by R itself. However, for objects that do make use of
###    references internally (e.g. environments, external pointers, pointer
###    to a file on disk, etc...), the developer needs to be careful to
###    implement endomorphisms with copy-on-change semantics. This can
###    easily be achieved (and is what the default methods for Assays objects
###    do) by performaing a full (deep) copy of the object before modifying it
###    instead of trying to modify it in-place. However note that this full
###    (deep) copy can be very expensive and is actually not necessary in
###    order to achieve copy-on-change semantics: it's enough (and often
###    preferrable for performance reasons) to copy only the parts of the
###    object that need to be modified.


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Assays class
###

setClass("Assays", contains="RectangularData", representation("VIRTUAL"))
### Should Assays contain Vector? The length would be the 3rd dimension
### i.e. the nb of assays (1st and 2nd dimensions being the nb of rows
### and cols). That would seem like the right thing to do.
#setClass("Assays",
#    contains=c("RectangularData", "Vector"),
#    representation("VIRTUAL")
#)

### Validity

.valid.Assays <- function(x)
{
    assays <- try(as(x, "SimpleList"), silent=TRUE)
    if (inherits(assays, "try-error"))
        return("'as(x, \"SimpleList\")' must work")
    if (!is(assays, "SimpleList"))
        return("'as(x, \"SimpleList\")' must return a SimpleList object")
    if (length(assays) == 0L)
        return(NULL)

    ## Check dims.
    all_dims <- sapply(assays, function(assay) dim(assay)[1:2])
    if (any(is.na(all_dims)))
        return(wmsg("all assays must be matrix-like objects ",
                    "with 2 (or more?) dimensions"))
    if (!all(all_dims == all_dims[ , 1L]))
        stop("all assays must have the same nrow and ncol")

    NULL
}

setValidity2("Assays", .valid.Assays)

### Constructor

### Always return a SimpleList object by default. Will return a NULL only
### if 'as.null.if.no.assay' is set to TRUE and no assays are supplied.
normarg_assays <- function(assays, as.null.if.no.assay=FALSE)
{
    if (!isTRUEorFALSE(as.null.if.no.assay))
        stop(wmsg("'as.null.if.no.assay' must be TRUE or FALSE"))

    if (is.null(assays)) {
        if (as.null.if.no.assay)
            return(NULL)
        return(SimpleList())
    }

    ## The truth is that the assays can be any array-like objects
    ## with at least 2 dimensions, not just matrix-like objects.
    error_msg <- c("'assays' must be a list or SimpleList of ",
                   "matrix-like elements, or a matrix-like object, ",
                   "or a NULL (see '?SummarizedExperiment')")

    if (is(assays, "Assays"))
        stop(wmsg(error_msg))

    assays_dim <- dim(assays)
    ## Some objects like SplitDataFrameList have a dim() method that
    ## returns a non-NULL object (a matrix!) even though they don't have
    ## an array-like semantic.
    if (!is.matrix(assays_dim) && length(assays_dim) >= 2L)
        #return(SimpleList(assays))  # broken on a data frame
        return(new2("SimpleList", listData=list(assays), check=FALSE))

    if (!is(assays, "SimpleList")) {
        if (is.list(assays)) {
            #assays <- do.call(SimpleList, assays) # broken on a list of
                                                   # data frames
            assays <- new2("SimpleList", listData=assays, check=FALSE)
        } else if (is(assays, "List")) {
            assays <- as(assays, "SimpleList")  # could fail
        } else {
            stop(wmsg(error_msg))
        }
    }
    if (length(assays) == 0L && as.null.if.no.assay)
        return(NULL)
    assays
}

### Always return a SimpleAssays object by default. Will return a NULL only
### if 'as.null.if.no.assay' is set to TRUE and no assays are supplied.
Assays <- function(assays=SimpleList(), as.null.if.no.assay=FALSE)
{
    if (!isTRUEorFALSE(as.null.if.no.assay))
        stop(wmsg("'as.null.if.no.assay' must be TRUE or FALSE"))
    ## Starting with SummarizedExperiment 1.15.4, we wrap the user-supplied
    ## assays in a SimpleAssays object instead of a ShallowSimpleListAssays
    ## object. Note that there are probably hundreds (if not thousands) of
    ## serialized SummarizedExperiment objects around that use
    ## ShallowSimpleListAssays. These objects should keep working as before!
    if (!is(assays, "SimpleAssays")) {
        if (is(assays, "Assays")) {
            ## Will turn any Assays derivative (e.g. ShallowSimpleListAssays)
            ## into a SimpleAssays object.
            assays <- as(as(assays, "SimpleList"), "SimpleAssays")
        } else {
            assays <- normarg_assays(assays, as.null.if.no.assay)
            if (is.null(assays))
                return(NULL)
            assays <- as(assays, "SimpleAssays")
            validObject(assays)
        }
    }
    if (length(assays) == 0L && as.null.if.no.assay)
        return(NULL)
    assays
}

### updateObject

.updateObject_Assays <- function(object, ..., verbose=FALSE)
{
    assays <- as(object, "SimpleList")
    assays <- endoapply(assays,
        function(assay)
            updateObject(assay, ..., verbose=verbose)
    )
    if (length(assays) == 0L)
        return(NULL)
    as(assays, "SimpleAssays")
}

setMethod("updateObject", "Assays", .updateObject_Assays)

### Accessors

setMethod("length", "Assays",
    function(x)
    {
        x <- as(x, "SimpleList")
        callGeneric()
    }
)

setMethod("names", "Assays",
    function(x)
    {
        x <- as(x, "SimpleList")
        callGeneric()
    }
)

setReplaceMethod("names", "Assays",
    function(x, value)
    {
        ans_class <- class(x)
        x <- as(x, "SimpleList")
        as(callGeneric(), ans_class)
    }
)

setMethod("getListElement", "Assays",
    function(x, i, exact=TRUE)
    {
        x <- as(x, "SimpleList")
        callGeneric()
    }
)

setMethod("setListElement", "Assays",
    function(x, i, value)
    {
        ans_class <- class(x)
        x <- as(x, "SimpleList")
        ans <- as(callGeneric(), ans_class)
        validObject(ans)
        ans
    }
)

setMethod("dim", "Assays",
    function(x)
    {
        if (length(x) == 0L)
            return(c(0L, 0L))
        dim(getListElement(x, 1L))[1:2]
    }
)

### 2D-Subsetting

### Subset each assay in Assays object 'x' along its first 2 dimensions.
### If not missing, 'i' and/or 'j' are assumed to be valid subscripts that
### can be used to subset an ordinary matrix or array.
.extract_Assays_subset <- function(x, i, j)
{
    subscripts12 <- list(if (missing(i)) quote(expr=) else i,
                         if (missing(j)) quote(expr=) else j)
    extract_assay_subset <- function(a) {
        ndim <- length(dim(a))
        stopifnot(ndim >= 2L)  # should never happen
        more_subscripts <- rep.int(list(quote(expr=)), ndim - 2L)
        args <- c(list(a), subscripts12, more_subscripts, list(drop=FALSE))
        do.call(`[`, args)
    }
    assays <- as(x, "SimpleList")
    as(endoapply(assays, extract_assay_subset), class(x))
}

setMethod("[", "Assays",
    function(x, i, j, ..., drop=TRUE) .extract_Assays_subset(x, i, j)
)

### Subassign each assay in Assays object 'x' along its first 2 dimensions.
### If not missing, 'i' and/or 'j' are assumed to be valid subscripts that
### can be used to subassign an ordinary matrix or array.
.replace_Assays_subset <- function(x, i, j, value)
{
    subscripts12 <- list(if (missing(i)) quote(expr=) else i,
                         if (missing(j)) quote(expr=) else j)
    replace_assay_subset <- function(a, v) {
        ndim <- length(dim(a))
        stopifnot(ndim >= 2L)  # should never happen
        more_subscripts <- rep.int(list(quote(expr=)), ndim - 2L)
        args <- c(list(a), subscripts12, more_subscripts, list(value=v))
        do.call(`[<-`, args)
    }
    assays <- as(x, "SimpleList")
    values <- as(value, "SimpleList")
    as(mendoapply(replace_assay_subset, assays, values), class(x))
}

setReplaceMethod("[", "Assays",
    function(x, i, j, ..., value) .replace_Assays_subset(x, i, j, value)
)

### rbind/cbind

### 'assays' is assumed to be an unnamed list of length >= 1
.bind_assays <- function(assays, along.cols=FALSE)
{
    if (length(dim(getListElement(assays, 1L))) == 2L) {
        BINDING_FUN <- if (along.cols) "cbind" else "rbind"
    } else {
        BINDING_FUN <- if (along.cols) "acbind" else "arbind"
    }
    do.call(BINDING_FUN, assays)
}

.bind_Assays_objects <- function(objects, along.cols=FALSE)
{
    if (length(objects) == 0L)
        return(Assays())
    lens <- sapply(objects, length)
    if (length(unique(lens)) != 1)
        stop("the objects to bind must have the same number of assays")
    len1 <- lens[1L]
    if (len1 == 0L)
        return(Assays())
    var <- lapply(objects, names)
    uvar <- unique(unlist(var))
    if (is.null(uvar)) {
        ## no names, match by position
        res <- lapply(seq_len(len1), function(index) {
            assays <- lapply(objects, getListElement, index)
            .bind_assays(assays, along.cols=along.cols)
        })
    } else {
        ## match by name
        ok <- all(vapply(var, function(x, y) identical(sort(x), y),
                         logical(1), sort(uvar)))
        if (!ok)
            stop("assays must have the same names()")
        res <- lapply(uvar, function(index) {
            assays <- lapply(objects, getListElement, index)
            .bind_assays(assays, along.cols=along.cols)
        })
        names(res) <- uvar
    }
    as(SimpleList(res), class(getListElement(objects, 1L)))
}

setMethod("rbind", "Assays",
    function(..., deparse.level=1)
    {
        objects <- unname(list(...))
        .bind_Assays_objects(objects, along.cols=FALSE)
    }
)

setMethod("cbind", "Assays",
    function(..., deparse.level=1)
    {
        objects <- unname(list(...))
        .bind_Assays_objects(objects, along.cols=TRUE)
    }
)

### Having "arbind" and "acbind" methods for Matrix objects will make rbind()
### and cbind() work on Assays objects with Matrix list elements.
### Maybe these methods should be defined next to the arbind() and acbind()
### generics (which are defined in the IRanges package) but that would require
### to make IRanges depend on the Matrix package.
setMethod("arbind", "Matrix", function(...) rbind(...))
setMethod("acbind", "Matrix", function(...) cbind(...))


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### SimpleAssays class
###
### Store the Assays in a SimpleList object.
###

### SimpleAssays cannot contain SimpleList because of the conflicting
### semantic of [.
setClass("SimpleAssays",
    contains="Assays",
    representation(data="SimpleList")
)

### We only need to implement the REQUIRED coercions.

setAs("SimpleList", "SimpleAssays",
    function(from) new2("SimpleAssays", data=from, check=FALSE)
)

setAs("SimpleAssays", "SimpleList", function(from) from@data)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### ShallowSimpleListAssays class
###
### WARNING: Looks like reference classes as implemented in the methods
### package are a bit problematic e.g. all.equal() can return false negatives
### after a serialization/deserialization cycle on a ref object as reported
### here https://stat.ethz.ch/pipermail/bioc-devel/2019-May/015112.html
### Anyway their use in the assays slot of a SummarizedExperiment object is
### probably not needed anymore now that R is "shallow copy by default"
### according to this comment by Michael:
###     https://github.com/Bioconductor/SummarizedExperiment/issues/16#issuecomment-455541415
###

.ShallowData <- setRefClass("ShallowData",
    fields = list( data = "ANY" ))

.ShallowSimpleListAssays0 <- setRefClass("ShallowSimpleListAssays",
    fields = list( data = "SimpleList" ),
    contains = c("ShallowData", "Assays"))

### We only need to implement the REQUIRED coercions.

setAs("SimpleList", "ShallowSimpleListAssays",
    function(from) .ShallowSimpleListAssays0(data=from)
)

setAs("ShallowSimpleListAssays", "SimpleList", function(from) from$data)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### AssaysInEnv class
###
### A *broken* alternative to ShallowSimpleListAssays that does NOT respect
### the copy-on-change contract (only provided for illustration purposes).
###
### We implement the REQUIRED coercions plus OPTIONAL methods: length, names,
### names<-, getListElement, and setListElement.
###

setClass("AssaysInEnv",
    contains="Assays",
    representation(envir="environment")
)

.NAMES_SYMBOL <- ".names"  # must begin with a . so is ommitted by ls() 

setMethod("length", "AssaysInEnv", function(x) length(x@envir) - 1L)

setMethod("names", "AssaysInEnv", function(x) x@envir[[.NAMES_SYMBOL]])

### Does NOT respect the copy-on-change contract!
setReplaceMethod("names", "AssaysInEnv",
    function(x, value)
    {
        value <- S4Vectors:::normarg_names(value, class(x), length(x))
        x@envir[[.NAMES_SYMBOL]] <- value
        x
    }
)

setMethod("getListElement", "AssaysInEnv",
    function(x, i, exact=TRUE)
    {
        key <- setNames(ls(x@envir, sorted=TRUE), names(x))[[i]]
        get(key, envir=x@envir)
    }
)

### Does NOT respect the copy-on-change contract!
setMethod("setListElement", "AssaysInEnv",
    function(x, i, value)
    {
        key <- setNames(ls(x@envir, sorted=TRUE), names(x))[[i]]
        assign(key, value, envir=x@envir)
        x
    }
)

setAs("SimpleList", "AssaysInEnv",
    function(from)
    {
        from <- as.list(from)
        from_names <- names(from)
        keys <- paste(sprintf("%09d", seq_along(from)), from_names, sep=":")
        names(from) <- keys
        envir <- list2env(from, parent=emptyenv())
        envir[[.NAMES_SYMBOL]] <- from_names
        new("AssaysInEnv", envir=envir)
    }
)

setAs("AssaysInEnv", "SimpleList",
    function(from)
        SimpleList(setNames(as.list(from@envir, sorted=TRUE), names(from)))
)
Bioconductor/SummarizedExperiment documentation built on Feb. 4, 2024, 11:34 a.m.