R/BachMammaryData.R

Defines functions BachMammaryData

Documented in BachMammaryData

#' Obtain the Bach mammary data
#'
#' Obtain the mouse mammary gland single-cell RNA-seq data from Bach et al. (2017).
#'
#' @param samples A character vector with at least one element, specifying which samples(s) to retrieve.
#' @param location Logical scalar indicating whether genomic coordinates should be returned.
#' @param legacy Logical scalar indicating whether to pull data from ExperimentHub.
#' By default, we use data from the gypsum backend.
#' 
#' @details
#' Column metadata is extracted from the sample annotation in GSE106273,
#' and refers to the developmental stage of the mammary gland.
#'
#' If multiple samples are specified in \code{samples}, the count matrices will be \code{cbind}ed together.
#' Cells originating from different samples are identifiable by the \code{"Sample"} field in the column metadata.
#'
#' If \code{location=TRUE}, the coordinates of the Ensembl gene models are stored in the \code{\link{rowRanges}} of the output.
#'
#' All data are downloaded from ExperimentHub and cached for local re-use.
#' Specific resources can be retrieved by searching for \code{scRNAseq/bach-mammary}.
#' 
#' @return A \linkS4class{SingleCellExperiment} object with a single matrix of UMI counts.
#'
#' @author Aaron Lun
#'
#' @references
#' Bach K et al. (2017).
#' Differentiation dynamics of mammary epithelial cells revealed by single-cell RNA sequencing. 
#' \emph{Nat Commun.} 8(1), 2128
#'
#' @examples
#' sce <- BachMammaryData(samples="NP_1")
#' 
#' @export
#' @importFrom SummarizedExperiment rowData<- rowData
#' @importFrom ExperimentHub ExperimentHub
#' @importFrom BiocGenerics cbind
BachMammaryData <- function(samples=c("NP_1", "NP_2", "G_1", "G_2", "L_1", "L_2", "PI_1", "PI_2"), location=TRUE, legacy = FALSE) {
    if (!legacy && length(unique(samples)) == 8) {
        sce <- fetchDataset("bach-mammary-2017", "2023-12-14", realize.assays=TRUE)

    } else {
        version <- "2.0.0"
        host <- file.path("bach-mammary", version)
        samples <- match.arg(samples, several.ok=TRUE)

        collected <- vector("list", length(samples))
        for (i in seq_along(samples)) {
           collected[[i]] <- .create_sce(host, has.rowdata=FALSE, suffix=samples[i])
        }
        sce <- do.call(cbind, collected)

        ehub <- ExperimentHub()
        rowData(sce) <- ehub[ehub$rdatapath==file.path("scRNAseq", host, "rowdata.rds")][[1]]
        rownames(sce) <- rowData(sce)$Ensembl
    }

    .define_location_from_ensembl(sce, species="Mm", location=location)
}
LTLA/scRNAseq documentation built on June 28, 2024, 7:31 p.m.