R/optimus_loom_annotation.R

Defines functions optimus_loom_annotation.LoomExperiment optimus_loom_annotation.character optimus_loom_annotation

Documented in optimus_loom_annotation optimus_loom_annotation.character optimus_loom_annotation.LoomExperiment

#' @rdname optimus_loom_annotation
#'
#' @name optimus_loom_annotation
#'
#' @title HCA loom file annotation
#'
#' @description `optimus_loom_annotation()` takes the file path
#'     location of a .loom file generated by the Optimus pipeline, for
#'     which additional data will be extracted from the appropriate
#'     manifest.  The .loom file will be imported as a LoomExperiment
#'     object, and the additional manifest information will be added
#'     to the object for return.
#'
#' @importFrom dplyr mutate arrange filter .data
#' @importFrom tools file_ext
#'
#' @param loom Either a character(1) file path to a loom file on
#'     user's system, or a loom file obtained from the HCA and
#'     imported into R using `LoomExperiment::import()`.
#'
#' @param catalog character() HCA catalog from which the .loom file
#'     originated.
#'
#' @seealso `manifest()` and related functions for working with data
#'     returned from the `*/manifest/*` HCA API endpoints.
#'
#' @return A 'LoomExperiment' object annotated with additional
#'     `metadata()` and `colData()` derived from the manifest file
#'     describing samples in the object.
#'
#' @export
optimus_loom_annotation <- function(loom, catalog = NULL)
    UseMethod("optimus_loom_annotation")

#' @rdname optimus_loom_annotation
#'
#' @export
optimus_loom_annotation.character <-
    function(loom, catalog = NULL)
{
    if (is.null(catalog))
        catalog <- catalogs()[1]

    stopifnot(
        ## loom must be a character string
        `loom must be a non-null file path` = .is_scalar_character(loom),
        ## loom must be an existing file
        `loom must be an existing file` = file.exists(loom),
        ## loom must be a loom file
        `loom must be an existing file` = file_ext(loom) == 'loom',
        ## catalog validation
        `catalog must be a character scalar returned by catalogs()` =
            .is_catalog(catalog)
    )

    if (!requireNamespace("LoomExperiment", quietly = TRUE)) {
        install_msg <- .wrap_lines(c(
            "Please install the Bioconductor 'LoomExperiment' package",
            "following directions at",
            "https://bioconductor.org/pacakges/LoomExperiment"
        ))
        stop(install_msg)
    }

    loom_exp <- LoomExperiment::import(
        loom,
        type = "SingleCellLoomExperiment"
    )

    optimus_loom_annotation(loom_exp, catalog)

}

#' @rdname optimus_loom_annotation
#'
#' @export
optimus_loom_annotation.LoomExperiment <-
    function(loom, catalog = NULL)
{
    ## need manifest rows corresponding to each input_id of this file
    ## projectId facet is equal to the metadata$project.provenance.document_id

    project_id <- S4Vectors::metadata(loom)$project.provenance.document_id
    if (is.null(project_id)) {
        msg <- .wrap_lines(c(
            "'project.provenance.document_id' not present in loom file ",
            "metadata, cannot annotate loom file from manifest."
        ))
        stop(msg)
    }

    loom_filter <- hca::filters(
        projectId = list(is = project_id)
    )

    loom_manifest_tbl <- hca::manifest(
        filters = loom_filter,
        catalog = catalog
    )

    ## taking only the loom file entries to avoid multiplicity
    input_ids <- unique(SummarizedExperiment::colData(loom)$input_id)
    loom_manifest_subset_tbl <-
        loom_manifest_tbl |>
        filter(
            .data$sequencing_process.provenance.document_id %in% input_ids
        ) |>
        filter(.data$file_format == "loom")

    ## before merging the additional manifest information with the colData
    ## we must make note of the original order of the colData rows
    original_coldata_order <- SummarizedExperiment::colData(loom)$cell_names

    joined_coldata_merge <-
        SummarizedExperiment::colData(loom) |>
        merge(
            loom_manifest_subset_tbl,
            by.x = "input_id",
            by.y = "sequencing_process.provenance.document_id",
            all.x = TRUE
        )

    reorder_idx <- match(
        original_coldata_order,
        joined_coldata_merge$cell_names
    )
    new_coldata <-
        joined_coldata_merge |>
        arrange(reorder_idx)
    casted_coldata <- methods::as(new_coldata, "DataFrame")

    ## add manifest as a metadata field
    extended_metadata <- c(
        S4Vectors::metadata(loom),
        list("manifest" = loom_manifest_subset_tbl)
    )

    ## constructor w/ copy (S4 class generic)
    ## this avoids copying static information like rowData
    ## while modifying object
    methods::initialize(
        loom,
        colData = casted_coldata,
        metadata = extended_metadata
    )

}
Bioconductor/hca documentation built on March 27, 2024, 3:15 a.m.