#' Create a SpatialExperiment object
#'
#' Create a \linkS4class{SpatialExperiment} object (used in the same manner as \linkS4class{SingleCellExperiment}) that will store all of the project data.
#'
#' @param x A data frame or a path to a file (can be gzipped).
#' @param expression_cols Column names of markers/antibody expression as a vector or a grep pattern. Auto-detected if not specified.
#' @param metadata_cols Column names of cell metadata (not markers/antibodies) as a vector or a grep pattern. Auto-detected if not specified.
#' @param skip_cols Column names to ignore (such as bad antibodies) as a vector or a grep pattern.
#' @param clean_names A logical scalar. Clean the data frame column names to remove problematic characters and make them unique.
#' @param transformation A character string indicating which transformation method should be used. See \code{\link[=transform]{transform()}}.
#' @param out_dir Name of the output analysis directory. If specified, the object and the corresponding plots will be saved there.
#'
#' @return A \linkS4class{SpatialExperiment} object.
#'
#' @import SpatialExperiment stringr
#' @importFrom data.table fread
#' @importFrom dplyr select
#' @importFrom glue glue
#' @importFrom janitor clean_names
#' @importFrom methods is
#' @importFrom stats median
#' @importFrom tibble column_to_rownames
#'
#' @export
#'
#' @examples
#' tonsil_csv <- system.file("extdata", "tonsil-akoya-2018-500.csv", package = "phenomenalist")
#' tonsil_spe <- create_object(tonsil_csv, skip_cols = "DAPI|Blank", transformation = "z")
create_object <- function(x, expression_cols = NULL, metadata_cols = NULL, skip_cols = NULL, clean_names = TRUE, transformation = NULL, out_dir = NULL) {
# check if the input is valid
if (is.character(x)) {
if (file.exists(x)) {
x <- data.table::fread(x, stringsAsFactors = FALSE, data.table = FALSE)
} else {
stop("input is not a file or a data frame")
}
}
if (!is.data.frame(x)) {
stop("input is not a file or a data frame")
}
if (!is.null(out_dir)) {
if (dir.exists(out_dir)) {
stop("output directory `", out_dir, "` already exists")
}
}
# check if the data frame dimensions make sense
if (nrow(x) < 500) {
stop("data frame has too few rows/cells")
}
if (ncol(x) < 10) {
stop("data frame has too few columns")
}
message("number of input table rows: ", nrow(x))
message("number of input table columns: ", ncol(x))
message("")
# fix duplicate columns (further cleanup later with clean_col_names())
if (anyDuplicated(names(x))) {
dup_logical <- duplicated(names(x))
message("duplicate column names detected: ", toString(names(x)[dup_logical]))
names(x) <- make.unique(names(x), sep = "_")
message("duplicate columns renamed to: ", toString(names(x)[dup_logical]))
message("")
}
# remove columns that should be ignored from the table
if (!is.null(skip_cols)) {
# treating as a pattern if length of 1
if (length(skip_cols) == 1) {
# get column names matching the pattern
skip_cols <- str_subset(names(x), pattern = skip_cols)
} else {
# subset the array to the real column names (for the message)
skip_cols <- intersect(names(x), skip_cols)
}
x <- x[, setdiff(names(x), skip_cols)]
message("skipped columns: ", toString(skip_cols), "\n")
}
# create the expression data frame
if (is.null(expression_cols)) {
expression_cols <- detect_exprs_cols(x)
}
# treating as a pattern to get matching column names if length of 1
if (length(expression_cols) == 1) {
expression_cols <- str_subset(names(x), pattern = expression_cols)
}
# check that specified column names are valid
if (length(setdiff(expression_cols, names(x))) > 0) {
stop("missing markers: ", toString(setdiff(expression_cols, names(x))))
}
exprs <- x[, expression_cols]
# use the non-expression columns for the metadata data frame if not specified
if (is.null(metadata_cols)) {
metadata_cols <- setdiff(names(x), expression_cols)
}
# treating as a pattern to get matching column names if length of 1
if (length(metadata_cols) == 1) {
metadata_cols <- str_subset(names(x), pattern = metadata_cols)
}
# check that specified column names are valid
if (length(setdiff(metadata_cols, names(x))) > 0) {
stop("missing metadata columns: ", toString(setdiff(metadata_cols, names(x))))
}
x <- x[, metadata_cols, drop = FALSE]
# clean column names
if (clean_names) {
# run generic column name cleanup
exprs <- clean_col_names(exprs)
x <- clean_col_names(x)
# identify cell IDs
if (!"cell_id" %in% names(x)) {
names(x)[names(x) == "CellID"] <- "cell_id"
names(x)[names(x) == "label"] <- "cell_id"
names(x)[names(x) == "Object_Id"] <- "cell_id"
}
if (!"cell_id" %in% names(x)) {
x$cell_id <- rownames(x)
}
# force cell IDs to be strings to avoid any downstream issues
if (is.numeric(x$cell_id)) {
x$cell_id <- str_pad(as.character(x$cell_id), width = 7, pad = "0")
x$cell_id <- str_c("C", x$cell_id)
}
# make sure cell IDs are unique
x$cell_id <- make.names(x$cell_id, unique = TRUE)
}
# confirm that all the necessary metadata is present
if (!"cell_id" %in% names(x)) {
stop("data frame must contain `cell_id` column")
}
if (!"x" %in% names(x)) {
stop("data frame must contain `x` column")
}
if (!"y" %in% names(x)) {
stop("data frame must contain `y` column")
}
# set cell IDs as rownames
rownames(x) <- x$cell_id
# create the expression matrix
exprs <- as.matrix(exprs)
exprs <- exprs[, sort(colnames(exprs))]
# set cell IDs as rownames
rownames(exprs) <- rownames(x)
# check for expression columns with a high fraction of identical values
for (col_name in colnames(exprs)) {
col_vals <- exprs[, col_name]
col_med_val <- median(col_vals)
med_vals <- length(col_vals[col_vals == col_med_val])
med_vals_pct <- round((med_vals / nrow(exprs)) * 100, 1)
if (med_vals_pct > 90) {
warning(glue("{med_vals_pct}% of {col_name} values are {col_med_val} "), call. = FALSE, immediate. = TRUE)
}
}
message("number of expression columns: ", ncol(exprs))
message("number of metadata columns: ", ncol(x))
message("")
message("expression columns: ", toString(colnames(exprs)), "\n")
message("metadata columns: ", toString(colnames(x)), "\n")
# create a SpatialExperiment object
# in v1.5.2 (1/2022) spatialData slot was deprecated (move the contents to colData)
s <-
SpatialExperiment::SpatialExperiment(
assay = list(counts = t(exprs)),
colData = x,
spatialCoordsNames = c("x", "y")
)
# create output directory if specified
if (!is.null(out_dir)) {
dir.create(out_dir)
}
# apply transformation if specified
if (!is.null(transformation)) {
s <- transform(s, method = transformation, out_dir = out_dir)
s <- run_umap(s, n_threads = 4)
}
# save object if output directory is specified
if (!is.null(out_dir)) {
message("saving object")
saveRDS(s, paste0(out_dir, "/spe.rds"))
}
return(s)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.