#' @rdname runAutoencoderAssessment
#' @keywords internal
assessAutoencoderOptions <- function(expr_mtr,
activations,
bottlenecks,
layers = c(128, 64, 32),
dropout = 0.1,
epochs = 20,
verbose = TRUE){
# 1. Control --------------------------------------------------------------
confuns::check_one_of(input = activations, against = activation_fns)
confuns::are_values(c("dropout", "epochs"), mode = "numeric")
confuns::is_vec(x = layers, mode = "numeric", of.length = 3)
confuns::is_vec(x = bottlenecks, mode = "numeric")
# 2. Assess all combinations in for loop ----------------------------------
activations_list <-
base::vector(mode = "list", length = base::length(activations)) %>%
purrr::set_names(nm = activations)
for(a in base::seq_along(activations)){
activation <- activations[a]
bottlenecks_list <-
base::vector(mode = "list", length = base::length(bottlenecks)) %>%
purrr::set_names(nm = stringr::str_c("bn", bottlenecks, sep = "_"))
for(b in base::seq_along(bottlenecks)){
bottleneck <- bottlenecks[b]
base::message(Sys.time())
base::message(glue::glue("Assessing activation option {a}/{base::length(activations)}:'{activation}' and bottleneck option {b}/{base::length(bottlenecks)}: {bottleneck}"))
# Neural network ----------------------------------------------------------
input_layer <-
keras::layer_input(shape = c(base::ncol(expr_mtr)))
encoder <-
input_layer %>%
keras::layer_dense(units = layers[1], activation = activation) %>%
keras::layer_batch_normalization() %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[2], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[3], activation = activation) %>%
keras::layer_dense(units = bottleneck)
decoder <-
encoder %>%
keras::layer_dense(units = layers[3], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[2], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[1], activation = activation) %>%
keras::layer_dense(units = c(ncol(expr_mtr)))
autoencoder_model <- keras::keras_model(inputs = input_layer, outputs = decoder)
autoencoder_model %>% keras::compile(
loss = 'mean_squared_error',
optimizer = 'adam',
metrics = c('accuracy')
)
history <-
autoencoder_model %>%
keras::fit(expr_mtr, expr_mtr, epochs = epochs, shuffle = TRUE,
validation_data = list(expr_mtr, expr_mtr), verbose = verbose)
reconstructed_points <-
autoencoder_model %>%
keras::predict_on_batch(x = expr_mtr)
base::rownames(reconstructed_points) <- base::rownames(expr_mtr)
base::colnames(reconstructed_points) <- base::colnames(expr_mtr)
# PCA afterwards ----------------------------------------------------------
bottlenecks_list[[b]] <- irlba::prcomp_irlba(base::t(reconstructed_points), n = 30)
}
activations_list[[a]] <- bottlenecks_list
}
# 3. Summarize in data.frame ----------------------------------------------
res_df <-
purrr::imap_dfr(.x = activations_list, .f = function(.list, .name){
data.frame(
activation = .name,
bottleneck = stringr::str_remove(string = base::names(.list), pattern = "^bn_"),
total_var = purrr::map_dbl(.x = .list, .f = "totalvar")
)
}) %>% tibble::remove_rownames()
res_df$bottleneck <- base::factor(res_df$bottleneck, levels = base::unique(res_df$bottleneck))
pca_scaled <- irlba::prcomp_irlba(x = base::t(expr_mtr), n = 30)
assessment_list <- list("df" = res_df,
"set_up" = list("epochs" = epochs, "dropout" = dropout, "layers" = layers),
"scaled_var" = pca_scaled$totalvar)
return(assessment_list)
}
#' @title Print autoencoder summary
#'
#' @description Prints a human readable summary about the set up of the last neural network that
#' was constructed to generate a denoised expression matrix.
#'
#' @inherit check_sample params
#'
#' @inherit print_family return
#' @export
#' @keywords internal
printAutoencoderSummary <- function(object, mtr_name = "denoised", of_sample = ""){
check_object(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
info_list <- object@information$autoencoder[[of_sample]][["nn_set_ups"]]
info_list <- getAutoencoderSetUp(object = object, of_sample = of_sample, mtr_name = mtr_name)
if(base::is.null(info_list)){
base::stop("Could not find any information. It seems as if function 'runAutoEncoderDenoising()' has not been run yet.")
}
feedback <- glue::glue("{introduction}: \n\nActivation function: {activation}\nBottleneck neurons: {bn}\nDropout: {do}\nEpochs: {epochs}\nLayers: {layers}",
introduction = glue::glue("The neural network that generated matrix '{mtr_name}' was constructed with the following adjustments"),
activation = info_list$activation,
bn = info_list$bottleneck,
do = info_list$dropout,
epochs = info_list$epochs,
layers = glue::glue_collapse(x = info_list$layers, sep = ", ", last = " and "))
base::return(feedback)
}
#' @title Plot total variance of different neural networks
#'
#' @description Visualizes the results of \code{assessAutoencoderOptions()} by displaying the
#' total variance of each combination of an activation function (such as \emph{relu, sigmoid})
#' and the number of bottleneck neurons.
#'
#' The results depend on further adjustments like number of layers, dropout and number of epochs.
#'
#' @inherit argument_dummy params
#'
#' @return ggplot_family return
#' @export
#' @keywords internal
plotAutoencoderAssessment <- function(object, activation_subset = NULL, clrp = NULL, verbose = NULL){
hlpr_assign_arguments(object)
assessment_list <- getAutoencoderAssessment(object)
plot_df <- assessment_list$df
if(base::is.character(activation_subset)){
confuns::check_vector(input = activation_subset,
against = activation_fns,
ref.input = "input for argumetn 'activation_subset'",
ref.against = "valid activation functions.")
plot_df <- dplyr::filter(plot_df, activation %in% activation_subset)
}
if(base::isTRUE(verbose)){
msg <- glue::glue("Additional set up of neural network: \n\nEpochs: {epochs}\nDropout: {dropout}\nLayers: {layers}",
epochs = assessment_list$set_up$epochs,
dropout = assessment_list$set_up$dropout,
layers = glue::glue_collapse(x = assessment_list$set_up$layers, sep = ", ", last = " and "))
base::writeLines(text = msg)
}
ggplot2::ggplot(data = plot_df, mapping = ggplot2::aes(x = bottleneck, y = total_var)) +
ggplot2::geom_point(mapping = ggplot2::aes(color = activation), size = 3) +
ggplot2::geom_line(mapping = ggplot2::aes(group = activation, color = activation), size = 1.5) +
ggplot2::facet_wrap(facets = . ~ activation) +
scale_color_add_on(aes = "color", clrp = clrp, variable = plot_df$activation) +
ggplot2::theme_bw() +
ggplot2::theme(legend.position = "none") +
ggplot2::labs(x = "Number of Bottleneck Neurons", y = "Total Variance")
}
#' @title Plot scaled vs. denoised expression
#'
#' @description Compares the distribution of the expression levels of \code{genes}
#' between the scaled matrix and the denoised matrix.
#'
#' @inherit check_sample params
#' @inherit runAutoencoderDenoising params
#' @inherit check_pt params
#' @inherit ggplot_family return
#'
#' @details This function requires a denoised matrix in slot @@data generated
#' by \code{runAutoEncoderDenoising()} as well as a scaled matrix.
#'
#' @export
#' @keywords internal
plotAutoencoderResults <- function(object,
genes,
mtr_name = "denoised",
normalize = NULL,
scales = NULL,
pt_alpha = NULL,
pt_clrp = NULL,
pt_size = NULL,
verbose = NULL,
of_sample = NA,
...){
# 1. Control --------------------------------------------------------------
hlpr_assign_arguments(object)
check_pt(pt_size = pt_size, pt_alpha = pt_alpha, pt_clrp = pt_clrp)
of_sample <- check_sample(object, of_sample = "")
genes <- check_genes(object, genes = genes, of.length = 2, fdb_fn = "stop")
denoised <- getExpressionMatrix(object, of_sample = of_sample, mtr_name = mtr_name)
scaled <- getExpressionMatrix(object, of_sample = of_sample, mtr_name = "scaled")
# 2. Join data ------------------------------------------------------------
plot_df <-
base::rbind(
data.frame(base::t(denoised[genes, ]), type = "Denoised"),
data.frame(base::t(scaled[genes, ]), type = "Scaled")
) %>%
dplyr::mutate(type = base::factor(x = type, levels = c("Scaled", "Denoised")))
if(base::isTRUE(normalize)){
plot_df <-
dplyr::group_by(plot_df, type) %>%
dplyr::mutate_at(.vars = {{genes}}, .funs = confuns::normalize)
}
# 3. Plot -----------------------------------------------------------------
ggplot2::ggplot(data = plot_df, ggplot2::aes(x = .data[[genes[1]]], y = .data[[genes[2]]], color = type)) +
ggplot2::geom_point(alpha = pt_alpha, size = pt_size) +
ggplot2::geom_smooth(method = "lm", formula = y ~ x) +
confuns::call_flexibly(fn = "facet_wrap", fn.ns = "ggplot2", default = list(facets = stats::as.formula(. ~ type), scales = scales)) +
ggplot2::theme_classic() +
ggplot2::theme(
strip.background = ggplot2::element_blank(),
legend.position = "none"
) +
scale_color_add_on(aes = "color", variable = "discrete", clrp = pt_clrp)
}
#' @title Assessment of Neural Network Set Up
#'
#' @description Assesses different neural network set ups regarding
#' the activation function and the number of bottleneck neurons.
#'
#' @inherit argument_dummy params
#' @param expr_mtr The expression matrix that is to be used as input for the neural network.
#' @param activations Character vector. Denotes the activation functions to be assessed.
#' @param bottlenecks Numeric vector. Denotes the different numbers of bottleneck neurons to be assessed.
#' @inherit runAutoencoderDenoising params
#'
#' @return
#'
#' \itemize{
#' \item{\code{runAutoencoderAssessment()}: The spata object containing the list that holds the total variance measured by \code{irlba::prcomp_irlba()} after each
#' combination of activations/bottlenecks as well as the additional set up.}
#' \item{\code{assessAutoencoderOptions()}:
#' The list that holds the total variance measured by \code{irlba::prcomp_irlba()} after each combination
#' of activations/bottlenecks as well as the additional set up.}
#' }
#'
#' @keywords internal
runAutoencoderAssessment <- function(object,
activations,
bottlenecks,
layers = c(128, 64, 32),
dropout = 0.1,
epochs = 20,
verbose = TRUE){
check_object(object)
assessment_list <-
assessAutoencoderOptions(expr_mtr = getExpressionMatrix(object, of_sample = "", mtr_name = "scaled"),
activations = activations,
bottlenecks = bottlenecks,
layers = layers,
dropout = dropout,
epochs = epochs,
verbose = verbose)
object <- setAutoencoderAssessment(object = object, assessment_list = assessment_list)
return(object)
}
#' @title Denoise expression matrix
#'
#' @description This function constructs and uses a neural network to denoise
#' expression levels spatially.
#'
#' @inherit argument_dummy params
#' @param layers Numeric vector of length 3. Denotes the number of neurons in the three hidden layers.
#' (default = c(128, 64, 32))
#' @param bottleneck Numeric value. Denotes the number of bottleneck neurons.
#' @param mtr_name_output Character value. Denotes the name under which the denoised matrix is stored
#' in the data slot.
#' @param dropout Numeric value. Denotes the dropout. (defaults to 0.1)
#' @param activation Character value. Denotes the activation function. (defaults to \emph{'relu'})
#' @param epochs Numeric value. Denotes the epochs of the neural network. (defaults to 20)
#' @param display_plot Logical. If set to TRUE a scatter plot of the result is displayed in the viewer pane.
#' See documentation for \code{plotAutoencoderResults()} for more information.
#' @param genes Character vector of length two. Denotes the genes to be used for the validation plot.
#' @param set_as_active Logical. If set to TRUE the denoised matrix is set as the active matrix via
#' \code{setActiveExpressionMatrix()}.
#'
#' @return A spata-object containing the denoised expression matrix in slot @@data$denoised. This matrix
#' is then denoted as the active matrix.
#'
#' @importFrom Seurat ScaleData
#'
#' @export
#' @keywords internal
runAutoencoderDenoising <- function(object,
activation = "relu",
bottleneck = 56,
mtr_name_output = "denoised",
layers = c(128, 64, 32),
dropout = 0.1,
epochs = 20,
display_plot = FALSE,
genes = NULL,
set_as_active = FALSE,
verbose = TRUE,
of_sample = NA){
# 1. Control --------------------------------------------------------------
confuns::give_feedback(
msg = base::message("Checking input for validity."),
verbose = verbose
)
check_object(object)
confuns::are_values(c("dropout", "epochs"), mode = "numeric")
confuns::are_vectors(c("activation"), mode = "character")
confuns::are_values(c("display_plot", "set_as_active", "verbose"), mode = "logical")
confuns::is_vec(x = layers, mode = "numeric", of.length = 3)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
if(base::isTRUE(display_plot)){
# check validation genes
val_genes <- check_genes(object, genes = genes, max_length = 2, of.length = 2)
base::stopifnot(base::length(val_genes) == 2)
}
x_train <- getExpressionMatrix(object, mtr_name = "scaled" , of_sample = of_sample)
# assess optimum if any of the two inputs are vectors
if(base::any(purrr::map_int(.x = list(bottleneck, activation), .f = base::length) > 1)){
assessment_list <-
assessAutoencoderOptions2(
expr_mtr = x_train,
dropout = dropout,
epochs = epochs,
layers = layers,
bottlenecks = bottleneck,
activations = activation,
verbose = FALSE
)
assessment_df <- assessment_list$df
max_df <- dplyr::filter(assessment_df, total_var == base::max(total_var))
activation <- max_df$activation[1]
bottleneck <- base::as.character(max_df$bottleneck[1]) %>% base::as.numeric()
msg <- glue::glue("Assessment done. Running autoencoder with: \nActivation function: '{activation}'\nBottleneck neurons: {bottleneck} ")
confuns::give_feedback(
msg = msg,
verbose = verbose
)
} else {
assessment_list <- base::tryCatch({
getAutoencoderAssessment(object, of_sample = of_sample)
}, error = function(error){
return(list())
})
}
# -----
# 2. Create network --------------------------------------------------------
input_layer <-
keras::layer_input(shape = c(ncol(x_train)))
encoder <-
input_layer %>%
keras::layer_dense(units = layers[1], activation = activation) %>%
keras::layer_batch_normalization() %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[2], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[3], activation = activation) %>%
keras::layer_dense(units = bottleneck)
decoder <-
encoder %>%
keras::layer_dense(units = layers[3], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[2], activation = activation) %>%
keras::layer_dropout(rate = dropout) %>%
keras::layer_dense(units = layers[1], activation = activation) %>%
keras::layer_dense(units = c(ncol(x_train)))
autoencoder_model <- keras::keras_model(inputs = input_layer, outputs = decoder)
autoencoder_model %>% keras::compile(
loss = 'mean_squared_error',
optimizer = 'adam',
metrics = c('accuracy')
)
history <-
autoencoder_model %>%
keras::fit(x_train, x_train, epochs = epochs, shuffle = TRUE,
validation_data = list(x_train, x_train), verbose = verbose)
reconstructed_points <-
autoencoder_model %>%
keras::predict_on_batch(x = x_train)
base::rownames(reconstructed_points) <- base::rownames(x_train)
base::colnames(reconstructed_points) <- base::colnames(x_train)
if(base::isTRUE(display_plot)){
plot_df <-
base::rbind(
data.frame(base::t(reconstructed_points[val_genes, ]), type = "Denoised"),
data.frame(base::t(x_train[val_genes, ]), type = "Scaled")
) %>%
dplyr::mutate(type = base::factor(x = type, levels = c("Scaled", "Denoised")))
val_plot <-
ggplot2::ggplot(data = plot_df, ggplot2::aes(x = .data[[val_genes[1]]], y = .data[[val_genes[2]]], color = type)) +
ggplot2::geom_point(alpha = 0.75) +
ggplot2::geom_smooth(method = "lm", formula = y ~ x) +
ggplot2::facet_wrap(. ~ type, scales = "free") +
ggplot2::theme_classic() +
ggplot2::theme(
strip.background = ggplot2::element_blank(),
legend.position = "none"
) +
scale_color_add_on(variable = "discrete", clrp = "milo")
plot(val_plot)
}
# 3. Return updated object ------------------------------------------------
set_up <- list("activation" = activation,
"bottleneck" = bottleneck,
"dropout" = dropout,
"epochs" = epochs,
"input_mtr" = "scaled",
"output_mtr" = mtr_name_output,
"layers" = layers)
object <- addAutoencoderSetUp(object = object,
mtr_name = mtr_name_output,
set_up_list = set_up,
of_sample = of_sample)
object <- addExpressionMatrix(object = object,
mtr_name = mtr_name_output,
expr_mtr = reconstructed_points,
of_sample = of_sample)
object <-
setActiveMatrix(object = object, mtr_name = mtr_name_output)
confuns::give_feedback(
msg = "Done.",
verbose = verbose
)
return(object)
}
#' @title Obtain information about the optimal neural network set up
#'
#' @description Extracts the results from \code{assessAutoencoderOptions()}.
#'
#' @inherit argument_dummy params
#'
#' @return A data.frame containing the total variance measured by \code{irlba::prcomp_irlba()} after each
#' combination of activations/bottlenecks.
#' @export
#' @keywords internal
getAutoencoderAssessment <- function(object, ...){
check_object(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
assessment <- object@autoencoder[[of_sample]]$assessment
test <- !(base::identical(assessment, list()) & base::is.null(assessment))
ref_x <- "autoencoder assessment information"
ref_fns <- "function runAutoencoderAssessment() first"
check_availability(
test = test,
ref_x = ref_x,
ref_fns = ref_fns
)
base::return(assessment)
}
#' @title Obtain information on neural network
#'
#' @description Returns the argument input that was chosen to construct the
#' neural network that generated the matrix denoted in \code{mtr_name}.
#'
#' @inherit getExpressionMatrix params
#'
#' @return A named list.
#' @export
#' @keywords internal
getAutoencoderSetUp <- function(object, mtr_name, of_sample = NA){
check_object(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
nn_set_up <-
object@autoencoder[[of_sample]][["nn_set_ups"]][[mtr_name]]
if(base::is.null(nn_set_up)){
base::stop(glue::glue("Could not find any autoencoder information for matrix '{mtr_name}' of sample '{of_sample}'"))
}
base::return(nn_set_up)
}
#' @title Set results of autoencoder assessment
#'
#' @inherit argument_dummy params
#' @param assessment_list Named list with slots \code{$df} and \code{$set_up}.
#'
#' @return A spata-object.
#' @keywords internal
setAutoencoderAssessment <- function(object, assessment_list, of_sample = ""){
check_object(object)
of_sample <- check_sample(object = object, of_sample = of_sample, of.length = 1)
confuns::check_data_frame(
df = assessment_list$df,
var.class = list("activation" = c("character", "factor"),
"bottleneck" = c("character", "factor"),
"total_var" = c("numeric", "integer", "double")),
ref = "assessment_list$df"
)
object@autoencoder[[of_sample]][["assessment"]] <- assessment_list
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.