#' @title Join barcodes with additional variables
#'
#' @description Each member of the \code{joinWith()}-family takes a data.frame as
#' input that contains at least the variables \emph{barcodes} and \emph{sample}.
#' (Easily obtained with the \code{get*()}-family.) It extracts the specified
#' variables and joins them over the barcode variable with the provided data.frame.
#'
#' @param object A valid spata-object.
#' @inherit check_spata_df params
#' @inherit check_features params
#' @inherit check_gene_sets params
#' @inherit check_genes params
#' @inherit check_smooth params
#' @inherit check_method params
#' @inherit verbose params
#' @inherit check_variables params
#' @inherit normalize params
#' @inherit check_uniform_genes params
#' @param average_genes Logical. If set to TRUE the average expression of the
#' specified genes is calculated and saved under one variable named 'mean_genes'.
#'
#' @details Hint: Variables of the specified data.frame \code{spata_df} that have equal names as
#' the specified features, genes and gene-sets are overwritten!
#'
#' @return The input data.frame of \code{spata_df} joined with all the
#' specified genes, gene-sets and/or features by the key-variable \emph{barcodes}.
#'
#' @export
joinWith <- function(object,
spata_df,
features = NULL,
gene_sets = NULL,
method_gs = "mean",
genes = NULL,
average_genes = FALSE,
uniform_genes = "discard",
smooth = FALSE,
smooth_span = 0.02,
verbose = TRUE,
normalize = TRUE){
# 1. Control --------------------------------------------------------------
check_object(object)
check_uniform_genes(uniform_genes)
check_smooth(df = spata_df, smooth = smooth, smooth_span = smooth_span)
confuns::check_data_frame(
df = spata_df,
var.class = list(
"barcodes" = "character",
"sample" = "character"),
ref = "spata_df")
input_list <- list("gene_sets" = gene_sets,
"genes" = genes,
"features" = features)
input_list <-
purrr::map2(.x = input_list,
.y = c("gene_sets", "genes", "features"),
.f = function(input, type){
if(!base::is.null(input)){
confuns::is_vec(x = input, mode = "character", ref = type)
fn <- stringr::str_c("check", type, sep = "_")
input <- base::do.call(fn, list(object, input))
base::return(input)
}
}) %>% purrr::discard(.p = base::is.null)
# -----
output_df <-
joinWithVariables(
object = object,
spata_df = spata_df,
variables = input_list,
method_gs = method_gs,
average_genes = average_genes,
uniform_genes = uniform_genes,
smooth = smooth,
smooth_span = smooth_span,
verbose = verbose,
normalize = normalize
)
base::return(output_df)
}
#' @rdname joinWith
#' @export
joinWithFeatures <- function(object,
spata_df,
features,
smooth = FALSE,
smooth_span = 0.02,
verbose = TRUE){
# 1. Control --------------------------------------------------------------
# lazy check
check_object(object)
check_spata_df(spata_df)
check_smooth(df = spata_df, smooth = smooth, smooth_span = smooth_span)
# adjusting check
features <- check_features(object, features = features)
smooth_ref <- base::ifelse(test = base::isTRUE(smooth), yes = " and smoothing ", no = " " )
if(base::isTRUE(verbose)){
feature_ref <- base::ifelse(base::length(features) == 1, "feature", "features")
base::message(glue::glue("Joining{smooth_ref}{base::length(features)} {feature_ref}."))
}
# overwrite check
discard <- features[features %in% base::colnames(spata_df)]
n_discard <- base::length(discard)
if(n_discard > 0){
var_ref <- base::ifelse(n_discard == 1, "variable", "variables")
base::message(glue::glue("Overwriting {n_discard} feature-{var_ref}."))
spata_df <- dplyr::select(.data = spata_df, -dplyr::all_of(discard))
}
# -----
# 2. Join data ------------------------------------------------------------
fdata <-
featureData(object, of_sample = base::unique(spata_df$sample)) %>%
dplyr::select(dplyr::all_of(x = c("barcodes", features)))
joined_df <-
dplyr::left_join(x = spata_df, y = fdata, by = "barcodes")
# -----
# 3. Smooth if specified -------------------------------------------------
if(base::isTRUE(smooth)){
joined_df <-
purrr::imap_dfr(.x = joined_df,
.f = hlpr_smooth,
coords_df = joined_df,
smooth_span = smooth_span,
aspect = "feature",
subset = features)
}
if(base::isTRUE(verbose)){base::message("Done.")}
# -----
base::return(joined_df)
}
#' @rdname joinWith
#' @export
joinWithGenes <- function(object,
spata_df,
genes,
average_genes = FALSE,
uniform_genes = "keep",
smooth = FALSE,
smooth_span = 0.02,
normalize = TRUE,
verbose = TRUE){
# 1. Control --------------------------------------------------------------
# lazy check
check_object(object)
check_spata_df(spata_df)
check_smooth(df = spata_df, smooth = smooth, smooth_span = smooth_span)
check_uniform_genes(uniform_genes)
# adjusting check
rna_assay <- exprMtr(object, of_sample = base::unique(spata_df$sample))
genes <- check_genes(object, genes = genes, rna_assay = rna_assay)
# -----
barcodes <- spata_df$barcodes
rna_assay <- base::as.matrix(rna_assay[genes, barcodes])
# 2. Discard uniformly expressed genes ------------------------------------
n_genes <- base::length(genes)
if(uniform_genes == "discard"){
ref <- base::ifelse(n_genes > 1, "genes", "gene")
if(base::isTRUE(verbose)){
base::message(glue::glue("Checking {n_genes} {ref} for uniform expression across all barcode-spots."))
pb <-
progress::progress_bar$new(
format = "Progress: [:bar] :percent eta: :eta",
total = n_genes, clear = FALSE, width = 100)
} else {
pb <- NULL
}
if(n_genes == 1){
uniformly_expressed <- hlpr_one_distinct(x = 1, base::t(rna_assay), pb = NULL, verbose = FALSE)
} else {
uniformly_expressed <-
purrr::map_lgl(.x = 1:n_genes,
.f = hlpr_one_distinct,
rna_assay = rna_assay,
pb = pb,
verbose = verbose)
}
n_uniformly_expressed <- base::sum(uniformly_expressed)
if(n_uniformly_expressed >= 1){
genes <- genes[!uniformly_expressed]
n_genes <- base::length(genes)
if(base::isTRUE(verbose)){
base::message(glue::glue("Discarded {n_uniformly_expressed} genes."))
}
if(n_genes < 1){
base::stop("All genes have been discarded due to uniform expression.")
}
} else if(base::isTRUE(verbose)){
base::message("No uniformly expressed genes found.")
}
}
# -----
# 3. Extract genes and join values with spata_df -------------------------
ref <- base::ifelse(n_genes == 1, "gene", "genes")
if(n_genes > 1 && average_genes){
if(base::isTRUE(verbose) && base::isTRUE(smooth)){
base::message(glue::glue("Averaging, joining and smoothing {n_genes} {ref}."))
} else if(base::isTRUE(verbose)){
base::message(glue::glue("Averaging and joining {n_genes} {ref}."))
}
rna_assay <- base::colMeans(rna_assay[genes, barcodes])
col_names <- "mean_genes"
n_genes <- "averaged"
} else if(n_genes > 1){
if(base::isTRUE(verbose) && base::isTRUE(smooth)){
base::message(glue::glue("Joining and smoothing {n_genes} {ref}."))
} else if(base::isTRUE(verbose)){
base::message(glue::glue("Joining {n_genes} {ref}."))
}
rna_assay <- base::t(rna_assay[genes, barcodes])
col_names <- genes
} else if(n_genes == 1){
if(base::isTRUE(average_genes)){
col_names <- "mean_genes"
n_genes <- "averaged"
} else {
col_names <- genes
}
}
# convert results to data frame with appropriate column names
gene_vls <-
base::as.data.frame(rna_assay, row.names = NULL) %>%
magrittr::set_colnames(value = col_names) %>%
dplyr::mutate(barcodes = spata_df$barcodes)
# overwrite check
discard <- col_names[col_names %in% base::colnames(spata_df)]
n_discard <- base::length(discard)
if(n_discard > 0){
ref <- base::ifelse(n_discard == 1, "variable", "variables")
base::message(glue::glue("Overwriting {n_discard} gene-{ref}."))
spata_df <- dplyr::select(.data = spata_df, -dplyr::all_of(discard))
}
# join both
joined_df <-
dplyr::left_join(x = spata_df, y = gene_vls, by = "barcodes")
# -----
# 4. Smooth and normalize if specified ------------------------------------
if(base::isTRUE(smooth)){
if(base::isTRUE(verbose)){
pb <- progress::progress_bar$new(
format = "Progress: [:bar] :percent eta: :eta",
total = base::ncol(joined_df), clear = FALSE, width = 100)
} else {
pb <- NULL
}
joined_df <-
purrr::imap_dfr(.x = joined_df,
.f = hlpr_smooth,
coords_df = joined_df,
smooth_span = smooth_span,
aspect = "gene",
subset = col_names,
pb = pb)
}
if(base::isTRUE(normalize)){
if(base::isTRUE(verbose)){base::message(glue::glue("Normalizing values."))}
joined_df <-
purrr::imap_dfr(.x = joined_df,
.f = hlpr_normalize_imap,
aspect = "Gene",
subset = col_names
)
}
if(base::isTRUE(verbose)){
base::message("Done.")
}
# -----
base::return(joined_df)
}
#' @rdname joinWith
#' @export
joinWithGeneSets <- function(object,
spata_df,
gene_sets,
method_gs = "mean",
smooth = FALSE,
smooth_span = 0.02,
normalize = TRUE,
verbose = TRUE){
# 1. Control --------------------------------------------------------------
# lazy check
check_object(object)
check_spata_df(spata_df)
check_smooth(df = spata_df, smooth = smooth, smooth_span = smooth_span)
check_method(method_gs = method_gs)
# adjusting check
gene_sets <- check_gene_sets(object, gene_sets = gene_sets)
# overwrite check
discard <- gene_sets[gene_sets %in% base::colnames(spata_df)]
n_discard <- base::length(discard)
if(n_discard > 0){
ref <- base::ifelse(n_discard == 1, "variable", "variables")
base::message(glue::glue("Overwriting {n_discard} gene-set-{ref}."))
spata_df <- dplyr::select(.data = spata_df, -dplyr::all_of(discard))
}
# -----
# 2. Extract gene set data and join with spata_df ------------------------
rna_assay <- exprMtr(object = object, of_sample = base::unique(spata_df$sample))
gene_set_df <- object@used_genesets
joined_df <- spata_df
if(base::isTRUE(smooth)){
x <- dplyr::pull(spata_df, var = x)
y <- dplyr::pull(spata_df, var = y)
smooth_ref <- glue::glue(" and smoothing ")
} else {
smooth_ref <- " "
}
#feedback vectors
filter_gs <- 0.25
ignored_gs <- glue::glue("\nIgnored gene-sets due to insufficient gene representation (less then {filter_gs*100}%) in expression matrix:")
skipped_gs <- base::character()
num_gs <- base::length(gene_sets)
if(base::isTRUE(verbose)){
ref <- base::ifelse(num_gs == 1, "gene-set", "gene-sets")
base::message(glue::glue("Calculating{smooth_ref}expression score for {base::length(gene_sets)} {ref} according to method '{method_gs}'."))
}
if(base::isTRUE(verbose)){
pb <- progress::progress_bar$new(
format = "Progress: [:bar] :percent eta: :eta",
total = num_gs, clear = FALSE, width = 100)
}
for(i in base::seq_along(gene_sets)){
if(base::isTRUE(verbose)){pb$tick()}
# get genes of gene set
gs_df <- dplyr::filter(gene_set_df, ont %in% gene_sets[i])
n_genes <- base::nrow(gs_df)
# get genes found in expression matrix
genes <-
dplyr::filter(gs_df, gene %in% base::rownames(rna_assay)) %>%
dplyr::pull(gene)
n_found_genes <- base::length(genes)
# calculate percentage of genes found
p_found_genes <- base::round(n_found_genes/n_genes, digits = 2)
# make sure that percentage is equal to or higher than the threshold
if(p_found_genes >= filter_gs){
# apply specified method to handle gene sets
if(method_gs == "mean"){
geneset_vls <-
base::colMeans(rna_assay[genes, ]) %>%
base::as.data.frame() %>%
magrittr::set_colnames(value = gene_sets[i]) %>%
tibble::rownames_to_column(var = "barcodes")
} else if(method_gs %in% c("gsva", "ssgsea", "zscore", "plage")) {
geneset_vls <-
GSVA::gsva(expr = rna_assay[genes,],
gset.idx.list = gene_set_df,
mx.diff = 1,
parallel.sz = 2,
method = method_gs,
verbose = FALSE) %>%
t() %>%
as.data.frame() %>%
magrittr::set_colnames(value = gene_sets[i]) %>%
tibble::rownames_to_column(var = "barcodes")
}
# smoothing
if(base::isTRUE(smooth)){
variable <- dplyr::pull(.data = geneset_vls, var = gene_sets[i])
model <- stats::loess(formula = variable ~ x*y, span = smooth_span)
geneset_vls[, gene_sets[i]] <- stats::predict(model)
}
# gradually add gene_set columns to joined_df
joined_df <-
dplyr::left_join(x = joined_df, y = geneset_vls, by = "barcodes")
} else {
skipped_gs <- base::append(x = skipped_gs, values = gene_sets[i])
ignored_gs <-
base::append(x = ignored_gs,
values = glue::glue("\n- '{gene_sets[i]}'. Percentage of genes found: {p_found_genes}"))
}
}
# -----
# 3. Normalize if specified -----------------------------------------------
if(base::isTRUE(normalize)){
if(base::isTRUE(verbose)){base::message(glue::glue("Normalizing values."))}
# normalize
joined_df <-
purrr::imap_dfr(.x = joined_df,
.f = hlpr_normalize_imap,
aspect = "Gene set",
subset = gene_sets)
}
if(base::isTRUE(verbose)){base::message("Done.")}
if(base::length(ignored_gs) > 1){
base::message("Warning:")
base::append(x = ignored_gs,
values = "\n(Run 'adjustGeneSetDf()' in order to avoid this warning message.)") %>%
base::writeLines()
}
# -----
base::return(joined_df)
}
#' @rdname joinWith
#' @export
joinWithVariables <- function(object,
spata_df,
variables,
method_gs = "mean",
average_genes = FALSE,
uniform_genes = "discard",
smooth = FALSE,
smooth_span = 0.02,
normalize = TRUE,
verbose = TRUE){
stopifnot(base::is.list(variables))
stopifnot(base::any(c("features", "genes", "gene_sets") %in% base::names(variables)))
if("features" %in% base::names(variables)){
spata_df <-
joinWithFeatures(object = object,
features = variables$features,
spata_df = spata_df,
smooth = smooth,
smooth_span = smooth_span,
verbose = verbose)
}
if("genes" %in% base::names(variables)){
spata_df <-
joinWithGenes(object = object,
spata_df = spata_df,
genes = variables$genes,
average_genes = average_genes,
uniform_genes = uniform_genes,
smooth = smooth,
smooth_span = smooth_span,
normalize = normalize,
verbose = verbose)
}
if("gene_sets" %in% base::names(variables)){
spata_df <-
joinWithGeneSets(object = object,
spata_df = spata_df,
gene_sets = variables$gene_sets,
method_gs = method_gs,
smooth = smooth,
smooth_span = smooth_span,
normalize = normalize,
verbose = verbose)
}
base::return(spata_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.