R/h5ad.R
In rliger: Linked Inference of Genomic Experimental Relationships

Documented in writeH5AD writeH5AD.dgCMatrix writeH5AD.liger writeH5AD.ligerDataset

#' Write liger object to H5AD files
#' @description
#' Create an H5AD file from a \linkS4class{liger} object. This function writes
#' only raw counts to \code{adata.X}, while normalized and scaled expression
#' data will not be written, because LIGER use different normalization and
#' scaling strategy than most of the other tools utilizing H5AD format.
#'
#' Supports for single sparse matrices or internal \linkS4class{ligerDataset}
#' objects are also provided if there is a need to convert single datasets.
#' @param object One of \linkS4class{liger}, \linkS4class{ligerDataset} or
#' \link[Matrix]{dgCMatrix-class} object.
#' @param filename A character string, the path to the H5AD file to be written
#' @param obs External data.frame that contains metadata of the cells but does
#' not embed inside the object. Rownames must be identical to the colnames of
#' object.
#' @param var External data.frame that contains metadata of the features but
#' does not embed inside the object. Rownames must be identical to the rownames
#' of object.
#' @param overwrite Logical, whether to overwrite the file if it exists.
#' @param verbose Logical. Whether to show information of the progress. Default
#' \code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.
#' @param ... Arguments passed down to S3 methods
#' @return No return value, an H5AD file is written to disk with the following
#' specification, assuming the file is loaded to \code{adata} in Python:
#' \itemize{
#'   \item{\code{adata.X} - Raw count CSR matrix, outer joined with all
#'   datasets}
#'   \item{\code{adata.obs} - Cell metadata, with exactly same content of
#'   \code{cellMeta(object)}}
#'   \item{\code{adata.var} - Feature metadata containing only the feature names
#'   as the index of \code{pd.DataFrame}.}
#'   \item{\code{adata.obsm['X_inmf_aligned']} - The integrated embedding,
#'   aligned cell factor loading matrix, the primary output of LIGER, if
#'   available.}
#'   \item{\code{adata.obsm['X_inmf']} - The raw cell factor loading matrix, if
#'   available.}
#'   \item{\code{adata.obsm['<dimRedName>']} - The dimensional reduction matrix,
#'   such as UMAP or TSNE, if available.}
#'   \item{\code{adata.uns['inmf']['W']} - The shared factor feature loading
#'   matrix, if available.}
#'   \item{\code{adata.uns['inmf']['V']['<datasetName>']} - The dataset-specific
#'   factor feature loading matrix, if available.}
#'   \item{\code{adata.uns['inmf']['features']} - The variable features being
#'   used for factorization, supposed to match to the second shape of W and V,
#'   if available.}
#'   \item{\code{adata.uns['inmf']['lambda']} - The hyperparameter lambda used,
#'   the regularization parameter for the factorization, if available.}
#'   \item{\code{adata.uns['inmf']['k']} - The number of factors used for the
#'   factorization, if available.}
#' }
#' @export
#' @examples
#' print("The example below works, but causes PDF manual rendering issue for some reason")
#' \dontrun{
#' writeH5AD(pbmc, filename = tempfile(fileext = ".h5ad"))
#' }
writeH5AD <- function(
        object,
        ...
) {
    UseMethod('writeH5AD', object)
}

#' @export
#' @rdname writeH5AD
#' @method writeH5AD dgCMatrix
writeH5AD.dgCMatrix <- function(
        object,
        filename,
        obs = NULL,
        var = NULL,
        overwrite = FALSE,
        verbose = getOption('ligerVerbose', TRUE),
        ...
) {
    if (file.exists(filename)) {
        if (isTRUE(overwrite)) {
            file.remove(filename)
        } else {
            cli::cli_abort("H5AD file exists at {.file {normalizePath(filename)}}")
        }
    }
    if (!is.null(obs) &&
        !identical(rownames(obs), colnames(object))) {
        cli::cli_abort("The rownames of {.field obs} should be identical to the colnames of {.field object}.")
    }
    if (!is.null(var) &&
        !identical(rownames(var), rownames(object))) {
        cli::cli_abort("The rownames of {.field var} should be identical to the rownames of {.field object}.")
    }
    dfile <- hdf5r::H5File$new(filename = filename, mode = 'w')
    # Work on expression matrices
    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding `adata.X`")
    .writeMatrixToH5AD(x = object, dfile = dfile, dname = "X")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    # Add cell metadata
    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding `adata.obs`")
    obsDF <- obs %||% data.frame(row.names = colnames(object))
    .writeDataFrameToH5AD(df = obsDF, dfile = dfile, dname = "obs")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding `adata.var`")
    varDF <- var %||% data.frame(row.names = rownames(object))
    .writeDataFrameToH5AD(df = varDF, dfile = dfile, dname = "var")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    dfile$close_all()
    return(invisible(NULL))
}

#' @export
#' @rdname writeH5AD
#' @method writeH5AD ligerDataset
writeH5AD.ligerDataset <- function(
        object,
        filename,
        obs = NULL,
        overwrite = FALSE,
        verbose = getOption("ligerVerbose", TRUE),
        ...
) {
    if (file.exists(filename)) {
        if (isTRUE(overwrite)) {
            file.remove(filename)
        } else {
            cli::cli_abort("H5AD file exists at {.file {normalizePath(filename)}}")
        }
    }
    if (!is.null(obs) &&
        !identical(rownames(obs), colnames(object))) {
        cli::cli_abort("The rownames of {.field obs} should be identical to the colnames of {.field object}.")
    }
    dfile <- hdf5r::H5File$new(filename = filename, mode = 'w')
    # Work on expression matrices
    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding `adata.X`")
    .writeMatrixToH5AD(x = rawData(object), dfile = dfile, dname = "X")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    # Add cell metadata
    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding `adata.obs`")
    obsDF <- obs %||% data.frame(row.names = colnames(object))
    .writeDataFrameToH5AD(df = obsDF, dfile = dfile, dname = "obs")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding `adata.var`")
    varDF <- as.data.frame(featureMeta(object))
    .writeDataFrameToH5AD(df = varDF, dfile = dfile, dname = "var")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    dfile$close_all()
    return(invisible(NULL))
}

#' @export
#' @rdname writeH5AD
#' @method writeH5AD liger
writeH5AD.liger <- function(
        object,
        filename,
        overwrite = FALSE,
        verbose = getOption("ligerVerbose", TRUE),
        ...
) {
    if (file.exists(filename)) {
        if (isTRUE(overwrite)) {
            file.remove(filename)
        } else {
            cli::cli_abort("H5AD file exists at {.file {normalizePath(filename)}}")
        }
    }
    dfile <- hdf5r::H5File$new(filename = filename, mode = 'w')

    # Work on expression matrices
    if (isTRUE(verbose)) {
        cliID <- cli::cli_process_start("Adding merged raw counts to `adata.X`")
    }
    X <- mergeSparseAll(rawData(object))
    .writeMatrixToH5AD(x = X, dfile = dfile, dname = "X")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    # Add cell metadata
    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding `adata.obs`")
    .writeDataFrameToH5AD(
        df = cellMeta(object, as.data.frame = TRUE),
        dfile = dfile,
        dname = "obs"
    )
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    # Work on var, as per liger object design, we don't provide feature metadata
    # for merged dataset for now.
    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding empty `adata.var`")
    varDF <- data.frame(
        row.names = rownames(X)
    )
    .writeDataFrameToH5AD(df = varDF, dfile = dfile, dname = "var")
    if (isTRUE(verbose)) cli::cli_process_done(cliID)

    # Add dimensional reduction information
    # Check if any to be written first
    obsmList <- list()
    if (!is.null(getMatrix(object, "H.norm"))) {
        if (isTRUE(verbose)) {
            cli::cli_alert_info("Planning aligned cell factor loading to `adata.obsm['X_inmf_aligned']`")
        }
        # Transposed due to H5AD automatically aligns column-majored storage in
        # HDF5 to row-majored arrays in NumPy array. i.e. an inner array for
        # a cell (obs entry) stays the same.
        obsmList$X_inmf_aligned <- t(getMatrix(object, "H.norm"))
    }

    if (!any(sapply(getMatrix(object, "H"), is.null))) {
        if (isTRUE(verbose)) {
            cli::cli_alert_info("Planning raw cell factor loading to `adata.obsm['X_inmf']`")
        }
        HList <- getMatrix(object, 'H')
        obsmList$X_inmf <- Reduce(cbind, HList)
    }
    if (length(dimReds(object)) > 0) {
        for (i in seq_along(dimReds(object))) {
            dimRedName <- names(dimReds(object))[i]
            if (isTRUE(verbose)) {
                cli::cli_alert_info(
                    "Planning `dimRed(object, {dimRedName})` to `adata.obsm['{dimRedName}']`"
                )
            }
            # Transposed due to the same reason as H.norm above.
            obsmList[[dimRedName]] <- t(dimReds(object)[[i]])
        }
    }
    if (length(obsmList) == 0 && isTRUE(verbose))
        cli::cli_alert_info("No low-dimensional representation to be written.")
    if (length(obsmList) > 0) {
        if (isTRUE(verbose))
            cliID <- cli::cli_process_start("Adding obsm")
        .createMapping(dfile, 'obsm')
        for (i in seq_along(obsmList)) {
            obsmName <- names(obsmList)[i]
            dfile[['obsm']]$create_dataset(
                name = obsmName,
                robj = obsmList[[i]],
                dtype = .H5AD.guessDType(obsmList[[i]]),
                chunk_dims = NULL
            )
        }
        if (isTRUE(verbose)) cli::cli_process_done(cliID)
    }


    if (isTRUE(verbose)) cliID <- cli::cli_process_start("Adding uns")
    .createMapping(dfile, 'uns')
    .createMapping(dfile[['uns']], 'inmf')
    unsList <- object@uns$factorization
    if (!is.null(getMatrix(object, 'W'))) {
        unsList$W <- getMatrix(object, 'W')
        unsList$features <- rownames(unsList$W)
    }
    if (length(unsList) > 0) {
        for (i in seq_along(unsList)) {
            unsName <- names(unsList)[i]
            dfile[['uns/inmf']]$create_dataset(
                name = unsName,
                robj = unsList[[i]],
                dtype = .H5AD.guessDType(unsList[[i]]),
                chunk_dims = NULL
            )
        }
    }
    Vlist <- getMatrix(object, 'V')
    Vlist <- Vlist[!sapply(Vlist, is.null)]
    if (length(Vlist) > 0) {
        .createMapping(dfile[['uns/inmf']], 'V')
        for (i in seq_along(Vlist)) {
            VName <- names(Vlist)[i]
            dfile[['uns/inmf/V']]$create_dataset(
                name = VName,
                robj = Vlist[[i]],
                dtype = .H5AD.guessDType(Vlist[[i]]),
                chunk_dims = NULL
            )
        }
    }
    if (isTRUE(verbose)) cli::cli_process_done(cliID)


    dfile$close_all()
    return(invisible(NULL))
}

.writeMatrixToH5AD <- function(
        x,
        dfile,
        dname
) {
    dnamePaths <- unlist(strsplit(dname, '/'))
    # Recursively create groups
    for (i in seq_along(dnamePaths)) {
        search <- paste0(dnamePaths[1:i], collapse = '/')
        if (!dfile$exists(name = search)) {
            dfile$create_group(name = search)
        }
    }
    dfile[[dname]]$create_dataset(
        name = 'data',
        robj = x@x,
        dtype = .H5AD.guessDType(x@x)
    )
    dfile[[dname]]$create_dataset(
        name = 'indices',
        robj = x@i,
        dtype = .H5AD.guessDType(x@i)
    )
    dfile[[dname]]$create_dataset(
        name = 'indptr',
        robj = x@p,
        dtype = .H5AD.guessDType(x@p)
    )
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-type',
        robj = 'csr_matrix',
        dtype = .H5AD.guessDType('csr_matrix'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-version',
        robj = '0.1.0',
        dtype = .H5AD.guessDType('0.1.0'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    dfile[[dname]]$create_attr(
        attr_name = 'shape',
        robj = rev(dim(x)),
        dtype = .H5AD.guessDType(dim(x))
    )

    return(invisible(NULL))
}

.writeDataFrameToH5AD <- function(
        df,
        dfile,
        dname
) {
    dnamePaths <- unlist(strsplit(dname, '/'))
    # Recursively create groups
    for (i in seq_along(dnamePaths)) {
        search <- paste0(dnamePaths[1:i], collapse = '/')
        if (!dfile$exists(name = search)) {
            dfile$create_group(name = search)
        }
    }
    # Add index
    dfile[[dname]]$create_dataset(
        name = '_index',
        robj = rownames(df),
        dtype = .H5AD.guessDType(rownames(df))
    )
    dfile[[dname]]$create_attr(
        attr_name = '_index',
        robj = '_index',
        dtype = .H5AD.guessDType('index'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    # Add columns
    for (i in colnames(df)) {
        path <- paste0(dname, '/', i)
        if (is.factor(df[[i]])) {
            # Writing categorical
            .writeCategroricalToH5AD(df[[i]], dfile, path)
        } else {
            dfile[[dname]]$create_dataset(
                name = i,
                robj = df[[i]],
                dtype = .H5AD.guessDType(df[[i]])
            )
        }
    }
    dfile[[dname]]$create_attr(
        attr_name = 'column-order',
        robj = colnames(df),
        dtype = .H5AD.guessDType(colnames(df))
    )
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-type',
        robj = 'dataframe',
        dtype = .H5AD.guessDType('dataframe'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-version',
        robj = '0.2.0',
        dtype = .H5AD.guessDType('0.2.0'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    return(invisible(NULL))
}

.writeCategroricalToH5AD <- function(x, dfile, dname) {
    dfile$create_group(name = dname)
    dfile[[dname]]$create_dataset(
        name = 'categories',
        robj = levels(x),
        dtype = .H5AD.guessDType(levels(x)),
        chunk_dims = NULL
    )
    dfile[[dname]]$create_dataset(
        name = "codes",
        robj = as.integer(x) - 1L,
        dtype = .H5AD.guessDType(as.integer(x) - 1L)
    )
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-type',
        robj = 'categorical',
        dtype = .H5AD.guessDType('categorical'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-version',
        robj = '0.2.0',
        dtype = .H5AD.guessDType('0.2.0'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    dfile[[dname]]$create_attr(
        attr_name = 'ordered',
        robj = FALSE,
        dtype = .H5AD.guessDType(FALSE),
        space = hdf5r::H5S$new(type = "scalar")
    )
    return(invisible(NULL))
}

.createMapping <- function(dfile, dname) {
    dfile$create_group(name = dname)
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-type',
        robj = 'dict',
        dtype = .H5AD.guessDType('dict'),
        space = hdf5r::H5S$new(type = "scalar")
    )
    dfile[[dname]]$create_attr(
        attr_name = 'encoding-version',
        robj = '0.1.0',
        dtype = .H5AD.guessDType('0.1.0'),
        space = hdf5r::H5S$new(type = "scalar")
    )
}

.H5AD.guessDType <- function(x, stype = "utf8", ...) {
    dtype <- hdf5r::guess_dtype(x = x, ...)
    if (inherits(dtype, "H5T_STRING")) {
        dtype <- .H5AD.stringType(stype = stype)
    }
    else if (inherits(dtype, "H5T_COMPOUND")) {
        cpd.dtypes <- dtype$get_cpd_types()
        for (i in seq_along(cpd.dtypes)) {
            if (inherits(cpd.dtypes[[i]], "H5T_STRING")) {
                cpd.dtypes[[i]] <- .H5AD.stringType(stype = stype)
            }
        }
        dtype <- hdf5r::H5T_COMPOUND$new(
            labels = dtype$get_cpd_labels(),
            dtypes = cpd.dtypes,
            size = dtype$get_size()
        )
    }
    else if (inherits(dtype, "H5T_LOGICAL")) {
        dtype <- hdf5r::guess_dtype(x = .boolToInt(x = x), ...)
    }
    return(dtype)
}

.boolToInt <- function(x) {
    x <- as.integer(x)
    x[is.na(x)] <- 2L
    return(x)
}

.H5AD.stringType <- function(stype = c("utf8", "ascii7"))
{
    stype <- match.arg(arg = stype)
    switch(
        EXPR = stype,
        utf8 = hdf5r::H5T_STRING$new(size = Inf)$set_cset(
            cset = hdf5r::h5const$H5T_CSET_UTF8
        ),
        ascii7 = hdf5r::H5T_STRING$new(size = 7L)
    )
}
Any scripts or data that you put into this service are public.
rliger documentation built on June 8, 2025, 1:56 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
rliger
Linked Inference of Genomic Experimental Relationships

R/h5ad.R
In rliger: Linked Inference of Genomic Experimental Relationships

Defines functions .H5AD.stringType .boolToInt .H5AD.guessDType .createMapping .writeCategroricalToH5AD .writeDataFrameToH5AD .writeMatrixToH5AD writeH5AD.liger writeH5AD.ligerDataset writeH5AD.dgCMatrix writeH5AD

Documented in writeH5AD writeH5AD.dgCMatrix writeH5AD.liger writeH5AD.ligerDataset

Try the rliger package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rliger Linked Inference of Genomic Experimental Relationships

R/h5ad.R In rliger: Linked Inference of Genomic Experimental Relationships

Defines functions .H5AD.stringType .boolToInt .H5AD.guessDType .createMapping .writeCategroricalToH5AD .writeDataFrameToH5AD .writeMatrixToH5AD writeH5AD.liger writeH5AD.ligerDataset writeH5AD.dgCMatrix writeH5AD

Documented in writeH5AD writeH5AD.dgCMatrix writeH5AD.liger writeH5AD.ligerDataset

Try the rliger package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rliger
Linked Inference of Genomic Experimental Relationships

R/h5ad.R
In rliger: Linked Inference of Genomic Experimental Relationships