# runA --------------------------------------------------------------------
#' @title Assessment of Neural Network Set Up
#'
#' @description Assesses different neural network set ups regarding
#' the activation function and the number of bottleneck neurons.
#'
#' @inherit check_object params
#' @param expr_mtr The expression matrix that is to be used as input for the neural network.
#' @param activations Character vector. Denotes the activation functions to be assessed.
#' @param bottlenecks Numeric vector. Denotes the different numbers of bottleneck neurons to be assessed.
#' @inherit runAutoencoderDenoising params
#'
#' @return
#'
#' \itemize{
#' \item{\code{runAutoencoderAssessment()}: The spata object containing the list that holds the total variance measured by \code{irlba::prcomp_irlba()} after each
#' combination of activations/bottlenecks as well as the additional set up.}
#' \item{\code{assessAutoencoderOptions()}:
#' The list that holds the total variance measured by \code{irlba::prcomp_irlba()} after each combination
#' of activations/bottlenecks as well as the additional set up.}
#' }
#'
#' @export
runAutoencoderAssessment <- function(object,
activations,
bottlenecks,
layers = c(128, 64, 32),
dropout = 0.1,
epochs = 20,
verbose = TRUE){
check_object(object)
assessment_list <-
assessAutoencoderOptions(expr_mtr = getExpressionMatrix(object, of_sample = "", mtr_name = "scaled"),
activations = activations,
bottlenecks = bottlenecks,
layers = layers,
dropout = dropout,
epochs = epochs,
verbose = verbose)
object <- setAutoencoderAssessment(object = object, assessment_list = assessment_list)
return(object)
}
#' @title Denoise expression matrix
#'
#' @description This function constructs and uses a neural network to denoise
#' expression levels spatially.
#'
#' @inherit check_object params
#' @param layers Numeric vector of length 3. Denotes the number of neurons in the three hidden layers.
#' (default = c(128, 64, 32))
#' @param bottleneck Numeric value. Denotes the number of bottleneck neurons.
#' @param mtr_name_output Character value. Denotes the name under which the denoised matrix is stored
#' in the data slot.
#' @param dropout Numeric value. Denotes the dropout. (defaults to 0.1)
#' @param activation Character value. Denotes the activation function. (defaults to \emph{'relu'})
#' @param epochs Numeric value. Denotes the epochs of the neural network. (defaults to 20)
#' @param display_plot Logical. If set to TRUE a scatter plot of the result is displayed in the viewer pane.
#' See documentation for \code{plotAutoencoderResults()} for more information.
#' @param genes Character vector of length two. Denotes the genes to be used for the validation plot.
#' @param set_as_active Logical. If set to TRUE the denoised matrix is set as the active matrix via
#' \code{setActiveExpressionMatrix()}.
#'
#' @return A spata-object containing the denoised expression matrix in slot @@data$denoised. This matrix
#' is then denoted as the active matrix.
#'
#' @importFrom Seurat ScaleData
#'
#' @export
runAutoencoderDenoising <- function(object,
activation,
bottleneck,
mtr_name_output = "denoised",
layers = c(128, 64, 32),
dropout = 0.1,
epochs = 20,
display_plot = FALSE,
genes,
set_as_active = FALSE,
verbose = TRUE,
of_sample = NA){
# 1. Control --------------------------------------------------------------
confuns::give_feedback(
msg = base::message("Checking input for validity."),
verbose = verbose
)
check_object(object)
confuns::are_values(c("dropout", "epochs"), mode = "numeric")
confuns::are_vectors(c("activation"), mode = "character")
confuns::are_values(c("display_plot", "set_as_active", "verbose"), mode = "logical")
confuns::is_vec(x = layers, mode = "numeric", of.length = 3)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
if(base::isTRUE(display_plot)){
# check validation genes
val_genes <- check_genes(object, genes = genes, max_length = 2, of.length = 2)
base::stopifnot(base::length(val_genes) == 2)
}
x_train <- getExpressionMatrix(object, mtr_name = "scaled" , of_sample = of_sample)
# assess optimum if any of the two inputs are vectors
if(base::any(purrr::map_int(.x = list(bottleneck, activation), .f = base::length) > 1)){
assessment_list <-
assessAutoencoderOptions2(expr_mtr = x_train,
dropout = dropout,
epochs = epochs,
layers = layers,
bottlenecks = bottleneck,
activations = activation,
verbose = FALSE)
assessment_df <- assessment_list$df
max_df <- dplyr::filter(assessment_df, total_var == base::max(total_var))
activation <- max_df$activation[1]
bottleneck <- base::as.character(max_df$bottleneck[1]) %>% base::as.numeric()
msg <- glue::glue("Assessment done. Running autoencoder with: \nActivation function: '{activation}'\nBottleneck neurons: {bottleneck} ")
confuns::give_feedback(
msg = msg,
verbose = verbose
)
} else {
assessment_list <- base::tryCatch({
getAutoencoderAssessment(object, of_sample = of_sample)
}, error = function(error){
return(list())
})
}
# -----
# 2. Create network --------------------------------------------------------
input_layer <-
keras::layer_input(shape = c(ncol(x_train)))
encoder <-
input_layer %>%
keras::layer_dense(units = layers[1], activation = activation) %>%
keras::layer_batch_normalization() %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[2], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[3], activation = activation) %>%
keras::layer_dense(units = bottleneck)
decoder <-
encoder %>%
keras::layer_dense(units = layers[3], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[2], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[1], activation = activation) %>%
keras::layer_dense(units = c(ncol(x_train)))
autoencoder_model <- keras::keras_model(inputs = input_layer, outputs = decoder)
autoencoder_model %>% keras::compile(
loss = 'mean_squared_error',
optimizer = 'adam',
metrics = c('accuracy')
)
history <-
autoencoder_model %>%
keras::fit(x_train, x_train, epochs = epochs, shuffle = TRUE,
validation_data = list(x_train, x_train), verbose = verbose)
reconstructed_points <-
autoencoder_model %>%
keras::predict_on_batch(x = x_train)
base::rownames(reconstructed_points) <- base::rownames(x_train)
base::colnames(reconstructed_points) <- base::colnames(x_train)
if(base::isTRUE(display_plot)){
plot_df <-
base::rbind(
data.frame(base::t(reconstructed_points[val_genes, ]), type = "Denoised"),
data.frame(base::t(x_train[val_genes, ]), type = "Scaled")
) %>%
dplyr::mutate(type = base::factor(x = type, levels = c("Scaled", "Denoised")))
val_plot <-
ggplot2::ggplot(data = plot_df, ggplot2::aes(x = .data[[val_genes[1]]], y = .data[[val_genes[2]]], color = type)) +
ggplot2::geom_point(alpha = 0.75) +
ggplot2::geom_smooth(method = "lm", formula = y ~ x) +
ggplot2::facet_wrap(. ~ type, scales = "free") +
ggplot2::theme_classic() +
ggplot2::theme(
strip.background = ggplot2::element_blank(),
legend.position = "none"
) +
scale_color_add_on(variable = "discrete", clrp = "milo")
plot(val_plot)
}
# 3. Return updated object ------------------------------------------------
set_up <- list("activation" = activation,
"bottleneck" = bottleneck,
"dropout" = dropout,
"epochs" = epochs,
"input_mtr" = "scaled",
"output_mtr" = mtr_name_output,
"layers" = layers)
object <- addAutoencoderSetUp(object = object,
mtr_name = mtr_name_output,
set_up_list = set_up,
of_sample = of_sample)
object <- addExpressionMatrix(object = object,
mtr_name = mtr_name_output,
expr_mtr = reconstructed_points,
of_sample = of_sample)
object <- computeGeneMetaData(object,
mtr_name = mtr_name_output,
of_sample = of_sample)
object <-
setActiveExpressionMatrix(object = object, mtr_name = mtr_name_output)
confuns::give_feedback(
msg = "Done.",
verbose = verbose
)
return(object)
}
# runB --------------------------------------------------------------------
#' @title Clustering with BayesSpace
#'
#' @description A wrapper around the BayesSpace clustering pipeline introduced
#' by \emph{Zhao et al. 2021}.
#'
#' @param q_force Numeric value or `FALSE`. If numeric, it forces the number
#' of output clusters with input value. If `FALSE`, the optimal number
#' of clusters is chosen for `q` determined by the elbow point of `BayesSpace::qTune()`.
#' @param name Character value. The name the cluster variable has in
#' the feature data of the \code{SPATA2} object. Defaults to \emph{bayes_space}.
#' @param prefix Character value. Prefix of the cluster groups.
#' @param overwrite Logical value. If TRUE, \code{name} overwrites features
#' in feature data of the \code{SPATA2} object.
#' @param assign_sce Character value or `NULL`. If character, specifies the
#' name under which the bayes space output (object of class `SingleCellExperiment`)
#' is assigned to the global environment. This makes the whole output
#' of the bayes space pipeline available instead of only adding the clustering
#' output as a grouping variable to the `SPATA2` object.
#' @param qs The values of q to evaluate. If `qs` is only one value exactly
#' that is given to `q` of `BayesSpace::spatialCluster()`. Else the optimal
#' `q` from all provided values is identified using `BayesSpace::qTune()`.
#'
#' @inherit BayesSpace::readVisium params
#' @inherit BayesSpace::qTune params
#' @inherit BayesSpace::spatialPreprocess params
#' @inherit BayesSpace::spatialCluster params
#'
#' @param ... Additional arguments given to `BayesSpace::spatialCluster()`. Exception:
#' `sce`, `q` are specified within the function.
#'
#' @inherit argument_dummy params
#'
#' @details This function is a wrapper around \code{readVisium()},
#' \code{spatialPreprocess()}, \code{qTune()} and \code{spatialCluster()}
#' of the `BayesSpace` package. The results are stored in form of a grouping
#' variable in the feature data.frame of the returned \code{SPATA2} object.
#'
#' @author Zhao E, Stone MR, Ren X, Guenthoer J, Smythe KS, Pulliam T,
#' Williams SR, Uytingco CR, Taylor SEB, Nghiem P, Bielas JH, Gottardo R.
#' Spatial transcriptomics at subspot resolution with BayesSpace.
#' Nat Biotechnol. 2021 Nov;39(11):1375-1384. doi: 10.1038/s41587-021-00935-2.
#' Epub 2021 Jun 3. PMID: 34083791; PMCID: PMC8763026.
#'
#' @return An updated \code{SPATA2} object.
#'
#' @export
#'
runBayesSpaceClustering <- function(object,
name = "bayes_space",
# given to spatialPreprocess()
n.Pcs = 15,
n.HVGs = 2000,
skip.PCA = FALSE,
log.normalize = TRUE,
assay.type = "logcounts",
BSPARAM = BiocSingular::ExactParam(),
# given to qTune()
qs = 3:10,
burn.in = c(100, 1000),
nrep = c(1000, 50000),
# given to spatialCluster()
use.dimred = "PCA",
d = 15,
init.method = "mclust",
model = "t",
gamma = 3,
mu0 = NULL,
lambda0 = NULL,
alpha = 1,
beta = 0.01,
save.chain = FALSE,
chain.fname = NULL,
# miscellaneous
prefix = "",
return_model = TRUE,
empty_remove = FALSE,
overwrite = FALSE,
assign_sce = NULL,
assign_envir = .GlobalEnv,
seed = NULL,
verbose = NULL,
...){
deprecated(...)
hlpr_assign_arguments(object)
confuns::is_vec(x = burn.in, mode = "numeric", of.length = 2)
confuns::is_vec(x = nrep, mode = "numeric", of.length = 2)
platform <- getSpatialMethod(object)@name
confuns::check_none_of(
input = name,
against = getFeatureNames(object),
ref.against = "feature names",
overwrite = overwrite
)
directory_10X <- object@information$initiation$input$directory_10X
#if(base::is.character(directory_10X) & base::dir.exists(directory_10X)){
if(FALSE){
confuns::give_feedback(
msg = glue::glue("Reading from {directory_10X}."),
verbose = verbose
)
sce <- BayesSpace::readVisium(dirname = directory_10X)
barcodes <- getBarcodes(object)
if(!base::is.character(barcodes)){
barcodes <- barcodes[[1]]
}
sce <- sce[,barcodes]
} else {
# use asSingleCellExperiment
sce <- asSingleCellExperiment(object)
}
if(base::isTRUE(empty_remove)){
require(SingleCellExperiment)
sce <- sce[, colSums(SingleCellExperiment::counts(sce)) > 0]
} else {
spots_no_read <-
SingleCellExperiment::counts(sce) %>%
base::as.matrix() %>%
base::colSums() %>%
base::as.data.frame() %>%
tibble::rownames_to_column("barcodes") %>%
dplyr::filter(.==0) %>%
dplyr::pull(barcodes)
if(base::length(spots_no_read) > 0){
confuns::give_feedback(
msg = " --- Size factor was optimized ---- ",
verbose = verbose
)
row_sub <-
stats::runif(
n = base::length(spots_no_read),
min = 1,
max = base::nrow(sce@assays@data$counts)
) %>%
base::round()
sce@assays@data$counts[row_sub, spots.no.read] <- 1
}
}
if(base::is.numeric(seed)){ base::set.seed(seed) }
confuns::give_feedback(
msg = "Running BayesSpace::spatialPreprocess().",
verbose = verbose
)
bayes_space_out <-
BayesSpace::spatialPreprocess(
sce = sce,
platform = platform,
n.PCs = n.Pcs,
n.HVGs = n.HVGs,
skip.PCA = skip.PCA,
log.normalize = log.normalize,
assay.type = assay.type,
BSPARAM = BSPARAM
)
confuns::give_feedback(
msg = "Running BayesSpace::qTune().",
verbose = verbose
)
if(base::length(qs) >= 2){
bayes_space_out <-
BayesSpace::qTune(
sce = bayes_space_out,
qs = qs,
burn.in = burn.in[1],
nrep = nrep[1]
)
logliks <- base::attr(bayes_space_out, "q.logliks")
optimal_cluster <- find_elbow_point(logliks)
confuns::give_feedback(
msg = glue::glue("Calculated optimal input for `q`: {optimal_cluster}."),
verbose = verbose
)
} else {
optimal_cluster <- base::as.integer(qs[1])
confuns::give_feedback(
msg = glue::glue("Using input for `q`: {optimal_cluster}."),
verbose = verbose
)
}
confuns::give_feedback(
msg = "Running BayesSpace::spatialCluster().",
verbose = verbose
)
bayes_space_out <-
BayesSpace::spatialCluster(
sce = bayes_space_out,
q = optimal_cluster,
use.dimred = use.dimred,
d = d,
platform = platform,
init.method = init.method,
model = model,
nrep = nrep[2],
burn.in = burn.in[2],
gamma = gamma,
mu0 = mu0,
lambda0 = lambda0,
alpha = alpha,
beta = beta,
save.chain = save.chain,
chain.fname = chain.fname
)
cluster_df <-
bayes_space_out@colData %>%
base::as.data.frame() %>%
tibble::as_tibble() %>%
dplyr::select(spot, spatial.cluster) %>%
dplyr::rename(barcodes = spot, !!rlang::sym(name) := spatial.cluster) %>%
dplyr::mutate(!!rlang::sym(name) := base::as.factor(!!rlang::sym(name)))
cluster_levels <-
base::levels(cluster_df[[name]]) %>%
stringr::str_c(prefix, .)
cluster_df <-
dplyr::mutate(
.data = cluster_df,
!!rlang::sym(name) := stringr::str_c(prefix, !!rlang::sym(name)),
!!rlang::sym(name) := base::factor(!!rlang::sym(name), levels = cluster_levels)
)
if(base::is.character(assign_sce)){
message(glue::glue("Assigning SingleCell Experiment object in global environment under {assign_sce}."))
base::assign(x = assign_sce, value = bayes_space_out, envir = assign_envir)
}
object <- SPATA2::addFeatures(object, feature_df = cluster_df, overwrite = overwrite)
return(object)
}
# runC --------------------------------------------------------------------
#' @title Identify large-scale chromosomal copy number variations
#'
#' @description This functions integrates large-scale copy number variations analysis
#' using the inferncnv-package. For more detailed information about infercnv works
#' visit \emph{https://github.com/broadinstitute/inferCNV/wiki}.
#'
#' @inherit argument_dummy params
#' @inherit check_sample params
#'
#' @param ref_annotation A data.frame in which the row names refer to the barcodes of
#' the reference matrix provided in argument \code{ref_mtr} and
#' and a column named \emph{sample} that refers to the reference group names.
#'
#' Defaults to the data.frame stored in slot \code{$annotation} of list \code{SPATA2::cnv_ref}.
#'
#' If you provide your own reference, make sure that barcodes of the reference
#' input do not overlap with barcodes of the spata-object. (e.g. by suffixing as
#' exemplified in the default list \code{SPATA2::cnv_ref}.)
#'
#' @param ref_mtr The count matrix that is supposed to be used as the reference.
#' Row names must refer to the gene names and column names must refer to
#' the barcodes. Barcodes must be identical to the row names of the data.frame
#' provided in argument \code{ref_annotation.}
#'
#' Defaults to the count matrix stored in slot \code{$mtr} of list \code{SPATA2::cnv_ref}.
#'
#' If you provide your own reference, make sure that barcodes of the reference
#' input do not overlap with barcodes of the spata-object. (e.g. by suffixing as
#' exemplified in the default list \code{SPATA2::cnv_ref}.)
#'
#' @param ref_regions A data.frame that contains information about chromosome positions.
#'
#' Defaults to the data.frame stored in slot \code{$regions} of list \code{SPATA2::cnv_ref}.
#'
#' If you provide your own regions reference, make sure that the data.frame has equal column names
#' and row names as the default input.
#'
#' @param directory_cnv_folder Character value. A directory that leads to the folder
#' in which to store temporary files, the infercnv-object as well as the output
#' heatmap.
#'
#' @param gene_pos_df Either NULL or a data.frame. If data.frame, it replaces
#' the output of \code{CONICsmat::getGenePositions()}. Must contain three
#' character variables \emph{ensembl_gene_id}, \emph{hgnc_symbol}, \emph{chromosome_name}
#' and two numeric variables \emph{start_position} and \emph{end_position.}.
#'
#' If NULL the data.frame is created via \code{CONICsmat::getGenePositions()} using
#' all gene names that appear in the count matrix and in the reference matrix.
#'
#' Defaults to the SPATA2 intern data.frame \code{SPATA2::gene_pos_df}.
#'
#' @param cnv_prefix Character value. Denotes the string with which the
#' the feature variables in which the information about the chromosomal gains and
#' losses are stored are prefixed.
#' @param save_infercnv_object Logical value. If set to TRUE the infercnv-object
#' is stored in the folder denoted in argument \code{directory_cnv_folder} under
#' \emph{'infercnv-object.RDS}.
#' @param CreateInfercnvObject A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param require_above_min_mean_expr_cutoff A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param require_above_min_cells_ref A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param normalize_counts_by_seq_depth A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param anscombe_transform A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param log2xplus1 A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param apply_max_threshold_bounds A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param smooth_by_chromosome A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param center_cell_expr_across_chromosome A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param subtract_ref_expr_from_obs A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param invert_log2 A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param clear_noise_via_ref_mean_sd A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param remove_outliers_norm A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param define_signif_tumor_subclusters A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param plot_cnv A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} and must not be specified. Input for argument
#' \code{out_dir} is taken from argument \code{directory_cnv_folder}.
#'
#' @details \code{runCnvAnalysis()} is a wrapper around all functions the infercnv-pipeline
#' is composed of. Argument \code{directory_cnv_folder} should lead to an empty folder as
#' temporary files as well as the output heatmap and the infercnv-object are stored
#' there without asking for permission which can lead to overwriting due to naming issues.
#'
#' Results (including a PCA) are stored in the slot @@cnv of the spata-object
#' which can be obtained via \code{getCnvResults()}. Additionally, the variables
#' that store the copy-number-variations for each barcode-spot are added to
#' the spata-object's feature data. The corresponding feature variables are named according
#' to the chromosome's number and the prefix denoted with the argument \code{cnv_prefix.}
#'
#' Regarding the reference data:
#' In the list \code{SPATA2::cnv_ref} we offer reference data including a count matrix
#' that results from stRNA-seq of healthy human brain tissue, an annotation data.frame as
#' well as a data.frame containing information regarding the chromosome positions.
#' You can choose to provide your own reference data by specifying the \code{ref_*}-arguments.
#' Check out the content of list \code{SPATA2::cnv_ref} and make sure that your own
#' reference input is of similiar structure regarding column names, rownames, etc.
#'
#' @return An updated spata-object containg the results in the respective slot.
#' @export
#'
runCnvAnalysis <- function(object,
ref_annotation = cnv_ref[["annotation"]], # data.frame denoting reference data as reference
ref_mtr = cnv_ref[["mtr"]], # reference data set of healthy tissue
ref_regions = cnv_ref[["regions"]], # chromosome positions
gene_pos_df = SPATA2::gene_pos_df,
directory_cnv_folder = "data-development/cnv-results", # output folder
directory_regions_df = NA, # deprecated (chromosome positions)
n_pcs = 30,
cnv_prefix = "Chr",
save_infercnv_object = TRUE,
verbose = NULL,
of_sample = NA,
CreateInfercnvObject = list(ref_group_names = "ref"),
require_above_min_mean_expr_cutoff = list(min_mean_expr_cutoff = 0.1),
require_above_min_cells_ref = list(min_cells_per_gene = 3),
normalize_counts_by_seq_depth = list(),
anscombe_transform = list(),
log2xplus1 = list(),
apply_max_threshold_bounds = list(),
smooth_by_chromosome = list(window_length = 101, smooth_ends = TRUE),
center_cell_expr_across_chromosome = list(method = "median"),
subtract_ref_expr_from_obs = list(inv_log = TRUE),
invert_log2 = list(),
clear_noise_via_ref_mean_sd = list(sd_amplifier = 1.5),
remove_outliers_norm = list(),
define_signif_tumor_subclusters = list(p_val = 0.05, hclust_method = "ward.D2", cluster_by_groups = TRUE, partition_method = "qnorm"),
plot_cnv = list(k_obs_groups = 5, cluster_by_groups = TRUE, output_filename = "infercnv.outliers_removed", color_safe_pal = FALSE,
x.range = "auto", x.center = 1, output_format = "pdf", title = "Outliers Removed")
){
# 1. Control --------------------------------------------------------------
hlpr_assign_arguments(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.lenght = 1)
confuns::are_values(c("save_infercnv_object"), mode = "logical")
confuns::check_directories(directories = directory_cnv_folder, type = "folders")
if(!base::is.na(directory_regions_df)){
base::message(
"Redirecting input for argument 'directory_regions_df' (deprecated) to ",
"argument 'ref_regions'. Please use 'ref_regions' instead."
)
ref_regions <- directory_regions_df
base::warning(
"The argument 'directory_regions_df' is deprecated in favor of 'ref_regions'. ",
"See documentation for more details."
)
}
# -----
# 2. Data extraction ------------------------------------------------------
# preparing object derived data
count_mtr <- getCountMatrix(object = object, of_sample = of_sample)
obj_anno <-
getFeatureDf(object = object, of_sample = of_sample) %>%
dplyr::select(barcodes, sample) %>%
tibble::column_to_rownames(var = "barcodes")
# reading and preparing reference data
confuns::give_feedback(msg = "Checking input for reference data.", verbose = verbose)
if(base::is.character(ref_mtr) && stringr::str_detect(ref_mtr, pattern = "\\.RDS$")){
confuns::give_feedback(
msg = glue::glue("Reading in reference matrix from directory '{ref_mtr}'."),
verbose = verbose
)
ref_mtr <- base::readRDS(file = ref_mtr)
confuns::give_feedback(msg = "Done.", verbose = verbose)
} else if(base::is.matrix(ref_mtr)){
ref_mtr <- ref_mtr
} else {
base::stop("Input for argument 'ref_mtr' must either be a directory leading to an .RDS-file or a matrix.")
}
if(base::is.character(ref_annotation) && stringr::str_detect(ref_annotation, pattern = "\\.RDS$")){
confuns::give_feedback(
msg = glue::glue("Reading in reference annotation from directory '{ref_annotation}'."),
verbose = verbose
)
ref_anno <- base::readRDS(file = ref_annotation)
confuns::give_feedback(msg = "Done.", verbose = verbose)
} else if(base::is.data.frame(ref_annotation)){
ref_anno <- ref_annotation
} else {
base::stop("Input for argument 'ref_annotation' must either be a directory leading to an .RDS-file or a data.frame.")
}
# combine data sets
confuns::give_feedback(msg = "Combining input and reference data.", verbose = verbose)
genes_inter <-
base::intersect(x = base::rownames(count_mtr), y = base::rownames(ref_mtr)) %>% base::unique()
if(base::length(genes_inter) < 500){
msg <- "Less than 500 genes match ref and count matrix."
confuns::give_feedback(msg = msg, fdb.fn = "stop", with.time = FALSE)
}
expr_inter <- base::cbind(count_mtr[genes_inter, ], ref_mtr[genes_inter, ])
anno_inter <- base::rbind(obj_anno, ref_anno)
# read and process gene positions
confuns::give_feedback(msg = "Getting gene positions.", verbose = verbose)
if(base::is.character(ref_regions) && stringr::str_detect(ref_regions, pattern = "\\.RDS$")){
regions_df <- base::readRDS(file = ref_regions)
} else if(base::is.data.frame(ref_regions)) {
regions_df <- ref_regions
} else {
base::stop("Input for argument 'ref_regions' must either be a directory leading to an .RDS-file or a data.frame")
}
if(base::is.data.frame(gene_pos_df)){
confuns::check_data_frame(
df = gene_pos_df,
var.class = list(
ensembl_gene_id = "character",
hgnc_symbol = "character",
chromosome_name = "character",
start_position = "integer",
end_position = "integer"
)
)
} else {
gene_pos_df <-
CONICSmat::getGenePositions(gene_names = base::rownames(expr_inter))
}
# -----
# 3. Analysis pipeline ----------------------------------------------------
gene_order_df <-
dplyr::select(gene_pos_df, chromosome_name, start_position, end_position, hgnc_symbol) %>%
magrittr::set_rownames(value = gene_pos_df$hgnc_symbol)
confuns::give_feedback(msg = "Starting analysis pipeline.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "CreateInfercnvObject",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("raw_counts_matrix" = expr_inter,
"annotations_file" = anno_inter,
"gene_order_file" = gene_order_df
)
)
confuns::give_feedback(msg = glue::glue("Removing genes from matrix with mean expression below threshold."), verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "require_above_min_mean_expr_cutoff",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Removing low quality barcode spots.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "require_above_min_cells_ref",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Normalizing counts by sequencing depth.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "normalize_counts_by_seq_depth",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Conducting anscombe and logarithmic transformation.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "anscombe_transform",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
infercnv_obj <-
confuns::call_flexibly(
fn = "log2xplus1",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Applying maximal threshold bounds.", verbose = verbose)
threshold <-
base::mean(base::abs(infercnv:::get_average_bounds(infercnv_obj))) %>%
base::round(digits = 2)
infercnv_obj <-
confuns::call_flexibly(
fn = "apply_max_threshold_bounds",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj, "threshold" = threshold),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Smoothing by chromosome.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "smooth_by_chromosome",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Centering cell expression across chromosome.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "center_cell_expr_across_chromosome",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Subtracting reference expression from observed expression.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "subtract_ref_expr_from_obs",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Clearing noise via reference mean standard deviation.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "invert_log2",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
infercnv_obj <-
confuns::call_flexibly(
fn = "clear_noise_via_ref_mean_sd",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Removing outliers.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "remove_outliers_norm",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Defining significant tumor subclusters.", verbose = verbose)
infercnv_obj <-
confuns::call_flexibly(
fn = "define_signif_tumor_subclusters",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj),
v.fail = infercnv_obj
)
confuns::give_feedback(msg = "Copy number variation pipeline completed.", verbose = verbose)
if(base::isTRUE(save_infercnv_object)){
save_dir <- stringr::str_c(directory_cnv_folder, "infercnv-obj.RDS", sep = "/")
msg <- glue::glue("Saving infercnv-object under '{save_dir}'.")
confuns::give_feedback(msg = msg, verbose = verbose)
if(class(infercnv_obj)!="infercnv"){infercnv_obj <- infercnv_obj[[1]]}
base::saveRDS(infercnv_obj, file = save_dir)
}
confuns::give_feedback(msg = "Plotting results.", verbose = verbose)
if(class(infercnv_obj)!="infercnv"){infercnv_obj <- infercnv_obj[[1]]}
plot_results <-
confuns::call_flexibly(
fn = "plot_cnv",
fn.ns = "infercnv",
fn.ns.sep = ":::",
default = list("infercnv_obj" = infercnv_obj, "out_dir" = directory_cnv_folder, "write_expr_matrix"=T),
v.fail = NULL
)
# work around a weird error in plot_cnv()
if(base::is.null(plot_results)){
confuns::give_feedback(
msg = "infercnv:::plot_cnv() failed. Attempting to plot with default setting.",
verbose = TRUE
)
plot_results <-
base::tryCatch({
infercnv::plot_cnv(infercnv_obj = infercnv_obj, out_dir = directory_cnv_folder, write_expr_matrix=T)
}, error = function(error){
NULL
})
if(base::is.null(plot_results)){
confuns::give_feedback(
msg = "inferncnv::plot_cnv() failed with default setting, too.",
verbose = TRUE
)
}
if(base::isTRUE(save_infercnv_object)){
msg <-
glue::glue(
"The infercnv-object has been saved under '{save_dir}'.",
"Please try to plot the heatmap manually."
)
confuns::give_feedback(msg = msg, verbose = TRUE)
} else {
msg <-
glue::glue(
"Please consider to run runCnvAnalysis() again with argument 'save_infercnv_object' set to TRUE.",
"This way you can plot the results manually."
)
confuns::give_feedback(msg = msg, verbose = TRUE)
}
msg <- "If the error in infercnv:::plot_cnv() persists, consider to open an issue at https://github.com/theMILOlab/SPATA2/issues."
confuns::give_feedback(msg = msg, verbose = TRUE)
}
# ----
# 4. Storing results ------------------------------------------------------
result_dir <-
stringr::str_c(directory_cnv_folder, "/", plot_cnv$output_filename, ".observations.txt")
results <- utils::read.table(result_dir)
barcodes <- base::colnames(results)
confuns::give_feedback(msg = "Summarizing cnv-results by chromosome.", verbose = verbose)
# join cnv results (per gene) with chromosome positions and summarize by chromosome
ordered_cnv_df <-
base::as.data.frame(results) %>%
tibble::rownames_to_column("hgnc_symbol") %>%
dplyr::left_join(., gene_pos_df, by = "hgnc_symbol") %>%
dplyr::group_by(chromosome_name) %>%
dplyr::select(chromosome_name, dplyr::any_of(barcodes)) %>%
dplyr::summarise_all(base::mean) %>%
dplyr::mutate(Chr = stringr::str_c(cnv_prefix, chromosome_name)) %>%
dplyr::select(Chr, dplyr::everything()) %>%
dplyr::ungroup() %>%
dplyr::select(-chromosome_name)
cnames <- c("barcodes", ordered_cnv_df$Chr)
ordered_cnv_df2 <-
dplyr::select(ordered_cnv_df, -Chr) %>%
base::t() %>%
base::as.data.frame() %>%
tibble::rownames_to_column(var = "barcodes") %>%
magrittr::set_colnames(value = cnames) %>%
dplyr::mutate(barcodes = stringr::str_replace_all(string = barcodes, pattern = "\\.", replacement = "-")) %>%
dplyr::mutate(dplyr::across(dplyr::starts_with(match = cnv_prefix), .fns = base::as.numeric)) %>%
tibble::as_tibble()
# add results to spata object
confuns::give_feedback(msg = "Adding results to the spata-object's feature data.", verbose = verbose)
# feature variables
object <-
addFeatures(
object = object,
feature_df = ordered_cnv_df2,
overwrite = TRUE
)
# cnv matrix
base::colnames(results) <-
stringr::str_replace_all(
string = base::colnames(results),
pattern = "\\.",
replacement = "-"
)
cnv_mtr <- base::as.matrix(results)
# cnv list
cnv_res <-
list(
prefix = cnv_prefix,
cnv_df = ordered_cnv_df2,
cnv_mtr = cnv_mtr,
gene_pos_df = gene_pos_df,
regions_df = regions_df
)
# post processing of data structure
# mainly renamining
cnv_res$regions_df <-
tibble::rownames_to_column(cnv_res$regions_df, var = "chrom_arm") %>%
dplyr::mutate(
chrom_arm = base::factor(chrom_arm, levels = chrom_arm_levels),
chrom = base::factor(Chrom, levels = chrom_levels),
arm =
stringr::str_extract(string = chrom_arm, pattern = "p|q") %>%
base::factor(levels = c("p", "q")),
start = Start,
end = End,
length = Length
) %>%
dplyr::select(chrom_arm, chrom, arm, start, end, length) %>%
tibble::as_tibble()
# create wide format with observational unit = chromosome instead of = chromosome arm
regions_df_wide <-
dplyr::select(cnv_res$regions_df, -length, -chrom_arm) %>%
tidyr::pivot_wider(
names_from = arm,
values_from = c(start, end),
names_sep = "_"
) %>%
dplyr::select(chrom, start_p, end_p, start_q, end_q)
gene_pos_df <-
tibble::as_tibble(cnv_res$gene_pos_df) %>%
dplyr::rename(chrom = chromosome_name) %>%
dplyr::filter(chrom %in% {{chrom_levels}}) %>% # remove not annotated genes
dplyr::mutate(
chrom = base::factor(chrom, levels = chrom_levels),
genes = hgnc_symbol
) %>%
# join wide format to compute gene wise arm location
dplyr::left_join(
x = .,
y = regions_df_wide,
by = "chrom"
) %>%
dplyr::mutate(
arm = dplyr::case_when(
# if gene starts at position bigger than end of arm p it must be located
# on arm q
start_position > end_p ~ "q",
# else it's located on arm p
TRUE ~ "p"
),
arm = base::factor(x = arm, levels = c("p", "q")),
chrom_arm = stringr::str_c(chrom, arm, sep = ""),
chrom_arm = base::factor(chrom_arm, levels = chrom_arm_levels)
) %>%
dplyr::select(-start_p, -end_p, -start_q, -end_q) %>%
dplyr::select(genes, chrom_arm, chrom, arm, start_position, end_position, dplyr::everything())
cnv_res$gene_pos_df <- gene_pos_df
# remove genes that are not annotated by chromosome
cnv_res$cnv_mtr <-
cnv_res$cnv_mtr[base::rownames(cnv_res$cnv_mtr) %in% gene_pos_df$genes,]
object <-
setCnvResults(
object = object,
cnv_list = cnv_res,
of_sample = of_sample
)
object <- computeCnvByChrArm(object, overwrite = TRUE)
# -----
confuns::give_feedback(msg = "Done.", verbose = verbose)
return(object)
}
# runD --------------------------------------------------------------------
#' @title Find differently expressed genes
#'
#' @description This function makes use of \code{Seurat::FindAllMarkers()} to compute
#' the differently expressed genes across the groups of
#' the grouping variable denoted in the argument \code{across}.
#'
#' See details for more.
#'
#' @inherit across_dummy params
#' @inherit check_sample params
#' @inherit check_method params
#' @param fc_name,base Given to corresponding arguments of \code{Seurat::FindAllMarkers()}.
#' @param ... Additional arguments given to \code{Seurat::FindAllMarkers()}
#'
#' @details This function is a wrapper around the DEA pipeline from the `Seurat`
#' package. It creates a temporary `Seurat` object via `Seurat::CreateSeuratObject()`,
#' and `Seurat::SCTransform()`. Then, `Seurat::FindAllMarkers()` is run. The output data.frame
#' is stored in the `SPATA2` object which is returned at the end.
#'
#' If \code{across} and/or \code{method_de} are vectors instead of single
#' values \code{runDEA()} iterates over all combinations in a for-loop and
#' stores the results in the respective slots. (e.g.: If \code{across} = \emph{'seurat_clusters'}
#' and \code{method_de} = \emph{c('wilcox', 'bimod')} the function computes the differently expressed genes
#' across all groups found in the feature variable \emph{seurat_clusters} according to method \emph{wilcox} and
#' stores the results in the respective slot. Then it does the same according to method \emph{bimod}.)
#'
#' The results are obtainable via \code{getDeaResults()}, `getDeaResultsDf()` and \code{getDeaGenes()}.
#'
#' @inherit update_dummy return
#'
#' @export
runDEA <- function(object,
across,
method_de = NULL,
verbose = NULL,
base = 2,
variable.features.n = 3000,
...){
hlpr_assign_arguments(object)
purrr::walk(.x = method_de, .f = ~ check_method(method_de = .x))
valid_across <- check_features(object = object, valid_classes = c("factor"), features = across)
# prepare seurat object
seurat_object <-
Seurat::CreateSeuratObject(
counts = getCountMatrix(object)
)
seurat_object <-
Seurat::SCTransform(
object = seurat_object,
variable.features.n = variable.features.n
)
seurat_object@meta.data <-
getFeatureDf(object) %>%
tibble::column_to_rownames(var = "barcodes") %>%
base::as.data.frame()
for(across in valid_across){
for(method in method_de){
if(base::isTRUE(verbose)){base::message(glue::glue("Calculating differently expressed genes across '{across}' with method '{method}'."))}
object <-
base::tryCatch({
# set the grouping based on which DEA is conducted
groups <-
purrr::set_names(
x = seurat_object@meta.data[[across]],
nm = base::rownames(seurat_object@meta.data) # set barcodes as names
)
seurat_object@meta.data$orig.ident <- base::unname(groups)
seurat_object@active.ident <- groups
n_groups <- dplyr::n_distinct(groups)
if(n_groups >= 20){
base::stop(glue::glue("The number of different groups is to high for DE-analysis. Is currently {n_groups}. Must be lower than 20. "))
} else if(n_groups < 2){
base::stop(glue::glue("There is only one unique group in the object's '{across}'-variable. runDeAnalysis() needs a minimum of two different groups."))
} else {
base::message(glue::glue("Number of groups/clusters: {n_groups}"))
}
# De analysis ----------------------------------------------------------
# perform analysis and remove seurat object afterwards
dea_results <-
Seurat::FindAllMarkers(
object = seurat_object,
test.use = method_de,
base = base,
...
)
# save results in spata object
object <-
setDeaResultsDf(
object = object,
grouping_variable = across,
dea_results = dea_results,
across = across,
method_de = method_de,
variable.features.n = variable.features.n,
...
)
object
},
error = function(error){
base::message(glue::glue("Skipping de-analysis on across-input '{across}' with method '{method}' as it resulted in the following error message: {error}"))
return(object)
}
)
}
}
return(object)
}
#' @rdname runDEA
#' @export
runDeAnalysis <- function(...){
deprecated(fn = TRUE)
object <- runDEA(...)
return(object)
}
# runG --------------------------------------------------------------------
#' @title Compute gene set enrichment
#'
#' @description Computes gene set enrichment based on the results of
#' \code{runDeAnalysis()}. See details for more.
#'
#' @param across Character vector. All grouping variables of interest.
#' @param methods_de Character vector. All differential expression methods
#' of interest.
#' @inherit runDeAnalysis params
#' @inherit getDeaResultsDf params
#' @inherit argument_dummy params
#' @inherit hypeR::hypeR params
#' @param gene_set_list A named list of character vectors. Names of slots correspond to the
#' gene set names. The slot contains the genes of the gene sets.Holds priority over
#' \code{gene_set_names}.
#' @param gene_set_names Character vector of gene set names that are taken
#' from the object's gene set data.frame.
#' @param reduce Logical value. If set to TRUE (the default) the return value
#' of \code{hypeR::hypeR()} is reduced to what is necessary for \code{SPATA2}s
#' function to work. If FALSE, the complete objects are stored. This will
#' grow the spata-objects size quickly!
#'
#' @details Computes gene set enrichment analysis using \code{hypeR::hypeR()}.
#' It does so by iterating about all possible combinations of \code{across} and
#' \code{methods_de}. Combinations for which no DE-results are found are silently
#' skipped.
#'
#' If gene sets are provided via \code{gene_set_list} argument \code{gene_set_names}
#' is ignored. Else the latter determines the gene sets used which are then taken
#' from the spata-objects gene set data.frame.
#'
#' @return An updated spata-object.
#'
#' @export
#'
runGSEA <- function(object,
across,
methods_de = "wilcox",
max_adj_pval = 0.05,
min_lfc = 0,
n_highest_lfc = NULL,
n_lowest_pval = NULL,
gene_set_list = NULL,
gene_set_names = NULL,
test = c("hypergeometric", "kstest"),
background = nGenes(object),
absolute = FALSE,
power = 1,
pval = 1,
fdr = 1,
reduce = TRUE,
quiet = TRUE,
chr_to_fct = TRUE,
verbose = NULL){
check_object(object)
hlpr_assign_arguments(object)
of_sample <- check_sample(object)
dea_overview <- getDeaOverview(object)
across <- base::unique(across)
check_one_of(
input = across,
against = base::names(dea_overview),
fdb.opt = 2,
ref.opt.2 = "grouping options across which de-analysis has been computed"
)
methods_de <- base::unique(methods_de)
check_one_of(
input = methods_de,
against = validDeAnalysisMethods()
)
# prepare gene set list
if(base::is.list(gene_set_list) && confuns::is_named(gene_set_list)){
give_feedback(msg = "Using input gene set list.", verbose = verbose)
} else {
if(base::is.character(gene_set_names)){
give_feedback(msg = "Using subset of default gene set list.", verbose = verbose)
check_one_of(
input = gene_set_names,
against = getGeneSets(object)
)
} else {
give_feedback(msg = "Using default gene set list.", verbose = verbose)
gene_set_names <- getGeneSets(object)
}
gene_set_list <- getGeneSetList(object)
gene_set_list <- gene_set_list[gene_set_names]
}
for(across_value in across){
for(method_de in methods_de){
dea_df <-
getDeaResultsDf(
object = object,
across = across_value,
method_de = method_de,
max_adj_pval = max_adj_pval,
min_lfc = min_lfc,
n_highest_lfc = n_highest_lfc,
n_lowest_pval = n_lowest_pval
)
if(!base::is.null(dea_df)){
group_names <- getGroupNames(object, discrete_feature = across_value)
n_groups <- base::length(group_names)
msg <-
glue::glue(
"Calculating enrichment of signatures across '{across_value}' (n = {n_groups}). ",
"Based on results of method '{method_de}'."
)
give_feedback(msg = msg, verbose = verbose)
object@dea[[of_sample]][[across_value]][[method_de]][["hypeR_gsea"]] <-
purrr::map2(
.x = group_names,
.y = base::seq_along(group_names),
.f = function(group, index){
signature <-
dplyr::filter(dea_df, !!rlang::sym(across_value) == {{group}}) %>%
dplyr::pull(var = "gene")
give_feedback(
msg = glue::glue("Working on group: '{group}' ({index}/{n_groups})"),
verbose = verbose
)
out <-
base::tryCatch({
hypeR::hypeR(
signature = signature,
genesets = gene_set_list,
test = test,
background = background,
power = power,
absolute = absolute,
fdr = fdr,
pval = pval,
quiet = quiet
)
}, error = function(error){
msg <-
glue::glue(
"Computing enrichment for group '{group}' resulted in an error: {error}."
) %>%
base::as.character()
})
if(base::is.character(out)){
give_feedback(msg = out, fdb.fn = "warning")
out <- NA
} else {
if(base::isTRUE(reduce)){
out <- confuns::lselect(lst = base::as.list(out), any_of(c("args", "info")), data)
}
out$data <-
dplyr::mutate(
.data = out$data,
overlap_perc = overlap/geneset,
label = base::as.factor(label)
)
}
return(out)
}
) %>%
purrr::set_names(nm = group_names) %>%
purrr::discard(.p = ~ base::any(base::is.na(.x)))
} else {
give_feedback(msg = "GSEA results already present.", verbose = verbose)
}
}
}
give_feedback(msg = "Done.", verbose = verbose)
return(object)
}
# runP --------------------------------------------------------------------
#' @title Run Principal Component Analysis
#'
#' @description Takes the expression matrix of choice and passes it to
#' \code{irlba::prcomp_irlba()}.
#'
#' @inherit check_sample params
#' @inherit getExpressionMatrix params
#' @param n_pcs Numeric value. Denotes the number of principal components to be computed.
#' @param ... Additional arguments given to \code{irlba::prcomp_irlba()}.
#'
#' @return
#'
#' \itemize{
#' \item{\code{runPca()}:}{ An updated spata-object containing the reduction variables in the pca data.frame.}
#' \item{\code{runPca2()}:}{ The direct output-object of \code{irlba::prcomp_irlba()}}.
#' }
#'
#' @export
runPca <- function(object, n_pcs = 30, mtr_name = NULL, ...){
check_object(object)
s <- object@samples
pca_res <- runPca2(object = object,
n_pcs = n_pcs,
mtr_name = mtr_name,
...)
expr_mtr <- getExpressionMatrix(object, mtr_name = mtr_name)
pca_df <-
base::as.data.frame(x = pca_res[["x"]]) %>%
dplyr::mutate(barcodes = base::colnames(expr_mtr), sample = {{s}}) %>%
dplyr::select(barcodes, sample, dplyr::everything())
object <- setPcaDf(object = object, pca_df = pca_df)
return(object)
}
#' @rdname runPca
#' @export
runPca2 <- function(object, n_pcs = 30, mtr_name = NULL, ...){
check_object(object)
expr_mtr <- getExpressionMatrix(object, mtr_name = mtr_name)
pca_res <- irlba::prcomp_irlba(x = base::t(expr_mtr), n = n_pcs, ...)
return(pca_res)
}
# runS --------------------------------------------------------------------
#' @title Clustering with Seurat
#'
#' @description A wrapper around the Seurat clustering pipeline suggested by
#' *Hao and Hao et al., 2021*.
#'
#' @param FindVariableFeatures,RunPCA,FindNeighbors,FindClusters Each argument
#' takes a list of arguments that is given to the equivalent function.
#'
#' @inherit update_dummy return
#' @export
#'
#' @examples
#'
#' object <- SPATAData::downloadSpataObject("275_T")
#'
#' object <- runSeuratClustering(object, name = "seurat_clusters")
#'
#' plotSurface(object, color_by = "seurat_clusters")
#'
runSeuratClustering <- function(object,
name = "seurat_clusters",
mtr_name = getActiveMatrixName(object),
FindVariableFeatures = list(selection.method = "vst", nfeatures = 2000),
RunPCA = list(npcs = 60),
FindNeighbors = list(dims = 1:30),
FindClusters = list(resolution = 0.8)){
confuns::check_none_of(
input = name,
against = getFeatureNames(object),
ref.against = "feature names",
)
cluster_df <-
findSeuratClusters(
object = object,
mtr_name = mtr_name,
FindVariableFeatures = FindVariableFeatures,
RunPCA = RunPCA,
FindNeighbors = FindNeighbors,
FindClusters = FindClusters
) %>%
dplyr::select(barcodes, !!rlang::sym(name) := seurat_clusters)
object <- addFeatures(object, feature_df = cluster_df)
return(object)
}
#' @title Identify genes of interest with SPARKX
#'
#' @description A wrapper around the algorithm introduced by \emph{Zhu et al. 2021}
#' to identify genes with spatial expression pattern with SPARK-X.
#'
#' @inherit SPARK::sparkx params
#' @inherit argument_dummy params
#'
#' @author Zhu, J., Sun, S. & Zhou, X. SPARK-X: non-parametric modeling enables
#' scalable and robust detection of spatial expression patterns for large spatial
#' transcriptomic studies. Genome Biol 22, 184 (2021). https://doi.org/10.1186/s13059-021-02404-0
#'
#' @return An updated spata object.
#' @export
#'
runSparkx <- function(object, numCores = 1, option = "mixture", verbose = NULL){
hlpr_assign_arguments(object)
coords_mtr <-
getCoordsDf(object) %>%
tibble::column_to_rownames(var = "barcodes") %>%
dplyr::select(x, y, -sample) %>%
base::as.matrix()
count_mtr <- getCountMatrix(object)
sparkx_out <-
SPARK::sparkx(
count_in = count_mtr,
locus_in = coords_mtr,
numCores = numCores,
option = option,
verbose = verbose
)
object@spatial[[object@samples]][["sparkx"]] <- sparkx_out
return(object)
}
# runT --------------------------------------------------------------------
#' @title Run t-Stochastic Neighbour Embedding
#'
#' @description Takes the pca-data of the object up to the principal component denoted
#' in argument \code{n_pcs} and performs tSNE with it.
#'
#' @inherit check_sample params
#' @param n_pcs Numeric value. Denotes the number of principal components used. Must be
#' smaller or equal to the number of principal components the pca data.frame contains.
#' @param tsne_perplexity Numeric value. Given to argument \code{perplexity} of
#' \code{Rtsne::Rtsne()}.
#' @param ... Additional arguments given to \code{Rtsne::Rtsne()}.
#'
#' @return
#'
#' \itemize{
#' \item{\code{runTsne()}:}{ An updated spata-object containing the reduction variables in the tsne data.frame.}
#' \item{\code{runTsne2()}:}{ The direct output-object of \code{Rtsne::Rtsne()}}
#' }
#'
#' @export
runTsne <- function(object, n_pcs = 20, tsne_perplexity = 30, of_sample = NA, ...){
check_object(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
confuns::are_values(c("n_pcs", "tsne_perplexity"), mode = "numeric")
tsne_res <- runTsne2(object = object,
of_sample = of_sample,
tsne_perplexity = tsne_perplexity,
...)
pca_mtr <- getPcaMtr(object = object, of_sample = of_sample, n_pcs = n_pcs)
tsne_df <-
base::data.frame(barcodes = base::rownames(pca_mtr),
sample = of_sample,
tsne1 = tsne_res$Y[,1],
tsne2 = tsne_res$Y[,2])
object <- setTsneDf(object = object, tsne_df = tsne_df, of_sample = of_sample)
}
#' @rdname runTsne
#' @export
runTsne2 <- function(object, n_pcs = 20, tsne_perplexity = 30, of_sample = NA, ...){
check_object(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
pca_mtr <- getPcaMtr(object = object, of_sample = of_sample, n_pcs = n_pcs)
tsne_res <- Rtsne::Rtsne(pca_mtr, perplexity = tsne_perplexity, ...)
return(tsne_res)
}
# runU --------------------------------------------------------------------
#' @title Run UMAP-Analysis
#'
#' @description Takes the pca-data of the object up to the principal component denoted
#' in argument \code{n_pcs} and performs UMAP with it.
#'
#' @inherit check_sample params
#' @inherit runTsne params
#' @param ... Additional arguments given to \code{umap::umap()}.
#'
#' @return
#'
#' \itemize{
#' \item{\code{runUmap()}:}{ An updated spata-object containing the reduction variables in the umap data.frame.}
#' \item{\code{runUmap2()}:}{ The direct output-object of \code{umap::umap()}}
#' }
#'
#' @export
runUmap <- function(object, n_pcs = 20, of_sample = NA, ...){
check_object(object)
of_sample <-
check_sample(object = object, of_sample = of_sample, of.length = 1)
umap_res <-
runUmap2(object = object, of_sample = of_sample, n_pcs = n_pcs, ...)
pca_mtr <-
getPcaMtr(object = object, of_sample = of_sample)
umap_df <-
base::data.frame(
barcodes = base::rownames(pca_mtr),
sample = of_sample,
umap1 = umap_res$layout[,1],
umap2 = umap_res$layout[,2]
)
object <- setUmapDf(object = object, umap_df = umap_df, of_sample = of_sample)
return(object)
}
#' @rdname runUmap
#' @export
runUmap2 <- function(object, n_pcs = 20, of_sample = NA, ...){
check_object(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
pca_mtr <- getPcaMtr(object = object, of_sample = of_sample)
umap_res <- umap::umap(d = pca_mtr, ...)
return(umap_res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.