R/read_steinbock.R

Defines functions read_steinbock

Documented in read_steinbock

#' @title Reads in single-cell data generated by the steinbock pipeline
#'
#' @description Reader function to generate a
#' \code{\linkS4class{SpatialExperiment}} or
#' \code{\linkS4class{SingleCellExperiment}} object from single-cell data
#' obtained by the 
#' \href{https://github.com/BodenmillerGroup/steinbock}{steinbock}
#' pipeline.
#'
#' @param path full path to the steinbock output folder
#' @param intensities_folder name of the folder containing the intensity 
#' measurements per image
#' @param regionprops_folder name of the folder containing the cell-specific
#' morphology and spatial measurements per image. Can be set to \code{NULL} to 
#' exclude reading in morphology measures.
#' @param graphs_folder name of the folder containing the spatial connectivity
#' graphs per image. Can be set to \code{NULL} to exclude reading in graphs.
#' @param pattern regular expression specifying a subset of files that should 
#' be read in.
#' @param extract_cellid_from single character indicating which column entry in 
#' the intensity files contains the integer cell id.
#' @param extract_coords_from character vector indicating which column entries 
#' in the regionprops files contain the x (first entry) and y (second entry) 
#' coordinates.
#' @param image_file single character indicating the file name storing meta data
#' per image (can be \code{NULL}).
#' @param extract_imagemetadata_from character vector indicating which
#' additional image specific metadata to extract from the \code{image_file}.
#' These will be stored in the \code{colData(x)} slot as object/cell-specific
#' entries.
#' @param panel_file single character containing the name of the panel file. 
#' This can either be inside the steinbock path (recommended) or located 
#' somewhere else.
#' @param extract_names_from single character indicating the column of the panel
#' file containing the channel names.
#' @param return_as should the object be returned as 
#' \code{\linkS4class{SpatialExperiment}} (\code{return_as = "spe"}) or
#' \code{\linkS4class{SingleCellExperiment}} (\code{return_as = "sce"}). 
#' @param BPPARAM parameters for parallelised processing. 
#' 
#' @return returns a \code{SpatialExperiment} or \code{SingleCellExperiment}
#' object markers in rows and cells in columns. 
#'
#' @section The returned data container:
#' In the case of both containers \code{x}, intensity features are stored in
#' the \code{counts(x)} slot. Morphological features are stored in the
#' \code{colData(x)} slot. The graphs are stored as 
#' \code{\link[S4Vectors]{SelfHits}} object in the 
#' \code{colPair(x, "neighborhood")} slot.
#' 
#' In the case of a returned \code{SpatialExperiment} object, the cell 
#' coordinates are stored in the \code{spatialCoords(x)} slot.
#' 
#' In the case of a returned \code{SingleCellExperiment} object, the cell 
#' coordinates are stored in the \code{colData(x)} slot named as \code{Pos_X}
#' and \code{Pos_Y}.
#'
#' @examples
#' path <- system.file("extdata/mockData/steinbock", package = "imcRtools")
#' 
#' # Read in as SpatialExperiment object
#' x <- read_steinbock(path)
#' x
#' 
#' # Read in as SingleCellExperiment object
#' x <- read_steinbock(path, return_as = "sce")
#' x
#' 
#' # Read in a subset of files
#' x <- read_steinbock(path, pattern = "mockData1")
#' x
#' 
#' # Only read in intensities
#' x <- read_steinbock(path, graphs_folder = NULL, regionprops_folder = NULL)
#' x
#' 
#' # Parallelisation
#' x <- read_steinbock(path, BPPARAM = BiocParallel::bpparam())
#' 
#' @seealso 
#' \url{https://github.com/BodenmillerGroup/steinbock} for the pipeline
#' 
#' \code{\link{read_cpout}} for reading in single-cell data as produced by the
#' ImcSegmentationPipeline
#' 
#' \code{\link[SingleCellExperiment]{SingleCellExperiment}} and 
#' \code{\link[SpatialExperiment]{SpatialExperiment}} for the constructor 
#' functions.
#' 
#' \code{\link[SingleCellExperiment]{colPair}} for information on how to work
#' with the cell-cell interaction graphs
#' 
#' \code{\link[BiocParallel]{bpparam}} for the parallelised backend
#' 
#' @author Nils Eling (\email{nils.eling@@dqbm.uzh.ch})
#' 
#' @importFrom SpatialExperiment SpatialExperiment
#' @export
read_steinbock <- function(path,
                            intensities_folder = "intensities",
                            regionprops_folder = "regionprops",
                            graphs_folder = "neighbors",
                            pattern = NULL,
                            extract_cellid_from = "Object",
                            extract_coords_from = c("centroid-1", "centroid-0"),
                            image_file = "images.csv",
                            extract_imagemetadata_from = c("width_px", 
                                                    "height_px"),
                            panel_file = "panel.csv",
                            extract_names_from = "name",
                            return_as = c("spe", "sce"),
                            BPPARAM = SerialParam()){
    
    .valid.read_steinbock.input(path, intensities_folder, graphs_folder,
                                regionprops_folder, extract_cellid_from, 
                                extract_coords_from, image_file,
                                extract_imagemetadata_from, panel_file, 
                                extract_names_from, pattern)
    
    return_as <- match.arg(return_as)
    
    # Read intensities
    int_file_names <- list.files(file.path(path, intensities_folder),
                                pattern = pattern, full.names = TRUE)
    object <- .steinbock_read_intensities(x = int_file_names,
                                cell_id = extract_cellid_from,
                                return_as = return_as,
                                BPPARAM = BPPARAM)
    
    # Read regionprops
    if (!is.null(regionprops_folder)) {
        object <- .steinbock_read_regionprops(x = object,
                                    cur_path = file.path(path, 
                                                    regionprops_folder),
                                    cell_id = extract_cellid_from, 
                                    coords = extract_coords_from,
                                    return_as = return_as,
                                    BPPARAM = BPPARAM)
    }
    
    # Read graphs
    if (!is.null(graphs_folder)) {
        object <- .steinbock_read_graphs(x = object,
                                cur_path = file.path(path, graphs_folder),
                                return_as = return_as,
                                BPPARAM = BPPARAM)
    }
    
    # Merge objects
    cur_intmeta <- int_metadata(object[[1]])
    object <- do.call("cbind", object)
    int_metadata(object) <- cur_intmeta
    
    # Add image metadata
    if (!is.null(image_file)) {
        object <- .steinbock_add_image_metadata(object, 
                        image_file = file.path(path, image_file),
                        extract_imagemetadata_from = extract_imagemetadata_from)
    }
    
    # Add panel data
    object <- .add_panel(object, path, panel_file, extract_names_from)
    
    # Add colnames
    colnames(object) <- paste0(object$sample_id, "_", object$ObjectNumber)
    
    return(object)
}
BodenmillerGroup/imcRtools documentation built on July 1, 2024, 5:15 p.m.