#' Calculates the signature model with AutoGeneS.
#' This function can be used in case that the signature should be examined. The final signature is stored as a pickle file.
#'
#' @param single_cell_object A matrix with the single-cell data. Rows are genes, columns are
#' samples. Row and column names need to be set.
#' @param cell_type_annotations A vector of the cell type annotations. Has to be in the same order
#' as the samples in single_cell_object.
#' @param bulk_gene_expression OPTIONAL: A matrix of bulk data. Rows are genes, columns are samples.
#' Row and column names need to be set. If the bulk data is supplied, the single cell object
#' loses all gene rows not contained in the bulk data as not to create solution sets with them.
#' @param ngen Number of generations. The higher, the longer it takes.
#' @param mode In standard mode, the number of genes of a selection is allowed to vary arbitrarily.
#' In fixed mode, the number of selected genes is fixed (using nfeatures).
#' @param nfeatures Number of genes to be selected in fixed mode.
#' @param weights Weights applied to the objectives. For the optimization, only the sign is
#' relevant: 1 means to maximize the respective objective, -1 to minimize it and 0 means to ignore
#' it. The weight supplied here will be the default weight for selection. There must be as many
#' weights as there are objectives.
#' @param objectives The objectives to maximize or minimize. Must have the same length as weights.
#' The default objectives (correlation, distance) can be referred to using strings. For custom
#' objectives, a function has to be passed.
#' @param seed Seed for random number generators.
#' @param population_size Size of every generation (mu parameter).
#' @param offspring_size Number of individuals created in every generation (lambda parameter).
#' @param crossover_pb Crossover probability.
#' @param mutation_pb Mutation probability.
#' @param mutate_flip_pb Mutation flipping probability (fixed mode).
#' @param crossover_thres Crossover threshold (standard mode).
#' @param ind_standard_pb Probability used to generate initial population in standard mode.
#' @param plot_weights Plotting: Weights with which to weight the objective values. For example,
#' (-1,2) will minimize the first objective and maximize the the second (with higher weight).
#' @param plot_objectives Plotting: The objectives to be plotted. Contains indices of objectives.
#' The first index refers to the objective that is plotted on the x-axis. For example, (2,1) will
#' plot the third objective on the x-axis and the second on the y-axis.
#' @param index Plotting: If one int is passed, return pareto\[index\] If two ints are passed, the
#' first is an objective (0 for the first). The second is the nth element if the solutions have
#' been sorted by the objective in ascending order. For example, (0,1) will return the solution
#' that has the second-lowest value in the first objective. (1,-1) will return the solution with
#' the highest value in the second objective.
#' @param close_to Plotting: Select the solution whose objective value is closest to a certain
#' value. Assumes (objective,value). For example, (0,100) will select the solution whose value
#' for the first objective is closest to 100.
#' @param plot Whether to produce a plot at all. This just hands over the reticulate plot and
#' has some visualization problems. To get a normal plot, use the pickle file, open it in python
#' and use the plot method there.
#' @param output_dir path to directory where the picke output file will be saved. Default is tempdir().
#' @param verbose Whether to produce an output on the console.
#'
#' @return The path to the pickle file needed for the deconvolution with AutoGeneS.
#' @export
#'
#'
build_model_autogenes <- function(single_cell_object, cell_type_annotations,
bulk_gene_expression = NULL, ngen = 5000,
mode = c("fixed", "standard"), nfeatures = 500,
weights = list(-1, 1), objectives = list("correlation", "distance"),
seed = 0, population_size = 100, offspring_size = 100,
crossover_pb = 0.7, mutation_pb = 0.3, mutate_flip_pb = 1E-3,
crossover_thres = 1000, ind_standard_pb = 0.1,
plot_weights = NULL, plot_objectives = c(0, 1), index = NULL,
close_to = NULL, plot = FALSE, output_dir = tempdir(),
verbose = FALSE) {
message(
"The deconvolution with AutoGeneS is done in only two steps, however due to runtime reasons we chose to implement
it as a one-step method. Please just use the deconvolute method.
If you only want to calculate the signature, without deconvolution, please use this function."
)
if (is.null(single_cell_object)) {
stop("Parameter 'single_cell_object' is missing or null, but it is required.")
}
if (is.null(cell_type_annotations)) {
stop("Parameter 'cell_type_annotations' is missing or null, but it is required.")
}
if (!is.null(bulk_gene_expression)) {
single_cell_object <- single_cell_object[intersect(
rownames(single_cell_object),
rownames(bulk_gene_expression)
), ]
}
if (length(mode) > 1) {
mode <- mode[1]
message(paste0(mode, " was chosen because multiple values were supplied for \"mode\""))
}
# anndata_checkload()
sce <- matrix_to_singlecellexperiment(single_cell_object, cell_type_annotations)
ad <- singlecellexperiment_to_anndata(sce)
# autogenes_checkload()
ag <- reticulate::import("autogenes")
ag$init(ad, celltype_key = "label")
params <- list(
ngen = as.integer(ngen), weights = weights, objectives = objectives,
verbose = verbose, seed = as.integer(seed), mode = mode,
population_size = as.integer(population_size),
offspring_size = as.integer(offspring_size), crossover_pb = crossover_pb,
mutation_pb = mutation_pb
)
if (mode == "standard") {
params <- c(params,
crossover_thres = as.integer(crossover_thres),
ind_standard_pb = ind_standard_pb
)
} else if (mode == "fixed") {
params <- c(params, nfeatures = as.integer(nfeatures), mutate_flip_pb = mutate_flip_pb)
} else {
stop(paste0("Mode ", mode, " not recognized. Please try 'standard' or 'fixed'"))
}
do.call(ag$optimize, params)
if (plot) {
if (sum(!sapply(list(weights, index, close_to), is.null)) > 1) {
stop(
"When selecting a solution in autogenes only one of the parameters 'plot_weights', ",
"'index' and 'close_to' can be used. It is also possible to use none of them"
)
}
plot_objectives <- as.integer(plot_objectives)
if (!is.null(plot_weights)) {
ag$plot(objectives = plot_objectives, weights = as.integer(plot_weights))
} else if (!is.null(index)) {
ag$plot(objectives = plot_objectives, index = as.integer(index))
} else if (!is.null(close_to)) {
ag$plot(objectives = plot_objectives, close_to = as.integer(close_to))
} else {
ag$plot(objectives = plot_objectives)
}
}
filename <- tempfile(fileext = ".pickle", tmpdir = output_dir)
ag$save(filename)
if (verbose) {
message("Successfully saved")
}
return(filename)
}
#' Extraction of the signature matrix from autogenes .pickle files
#'
#' @param autogenes_pickle_path The path of the .pickle file generated
#' by autogenes, exactly as returned by the `build_model_autogenes` function
#' @param single_cell_object The matrix with the single-cell data used to build the signature.
#' Rows are genes, columns are samples. Row and column names need to be set.
#' @param cell_type_annotations The vector of the cell type annotations used to
#' build the signature. Has to be in the same order as the samples in single_cell_object.
#'
#' @return The signature matrix.
#' @export
#'
#'
extract_signature_autogenes <- function(autogenes_pickle_path,
single_cell_object, cell_type_annotations) {
sce <- matrix_to_singlecellexperiment(
as.matrix(single_cell_object),
cell_type_annotations
)
ad <- singlecellexperiment_to_anndata(sce)
ag <- reticulate::import("autogenes")
ag$init(ad, celltype_key = "label")
expr.median <- ag$init(ad, celltype_key = "label")
median.gene.expr <- expr.median$X
ag$load(autogenes_pickle_path)
genes <- ag$adata()$var_names
selection <- ag$select()
genes.used <- genes[selection]
signature <- median.gene.expr[, genes.used]
signature <- t(signature)
return(signature)
}
#' Deconvolution Analysis using AutoGeneS.
#' One-step function that performs signature building and deconvolution in one step without saving the signature in between.
#' A signature that has been created with @seealso [build_model_autogenes()] can be supplied as input to this function with the
#' signature parameter, although it is not mandatory.
#'
#' @param single_cell_object A matrix with the single-cell data. Rows are genes, columns are
#' samples. Row and column names need to be set.
#' @param bulk_gene_expression A matrix of bulk data. Rows are genes, columns are samples.
#' Row and column names need to be set.
#' @param cell_type_annotations A vector of the cell type annotations. Has to be in the same order
#' as the samples in single_cell_object.
#' @param signature OPTIONAL: Path to a .pickle file, created with the build_model method of AutoGeneS.
#' @param ngen (SIGNATURE) Number of generations. The higher, the longer it takes.
#' @param mode (SIGNATURE) In standard mode, the number of genes of a selection is allowed to vary arbitrarily.
#' In fixed mode, the number of selected genes is fixed (using nfeatures).
#' @param nfeatures (SIGNATURE) Number of genes to be selected in fixed mode.
#' @param weights_signature (SIGNATURE) Weights applied to the objectives. For the optimization, only the sign is
#' relevant: 1 means to maximize the respective objective, -1 to minimize it and 0 means to ignore
#' it. The weight supplied here will be the default weight for selection. There must be as many
#' weights as there are objectives.
#' @param objectives (SIGNATURE) The objectives to maximize or minimize. Must have the same length as weights.
#' The default objectives (correlation, distance) can be referred to using strings. For custom
#' objectives, a function has to be passed.
#' @param seed (SIGNATURE) Seed for random number generators.
#' @param population_size (SIGNATURE) Size of every generation (mu parameter).
#' @param offspring_size (SIGNATURE) Number of individuals created in every generation (lambda parameter).
#' @param crossover_pb (SIGNATURE) Crossover probability.
#' @param mutation_pb (SIGNATURE) Mutation probability.
#' @param mutate_flip_pb (SIGNATURE) Mutation flipping probability (fixed mode).
#' @param crossover_thres (SIGNATURE) Crossover threshold (standard mode).
#' @param ind_standard_pb (SIGNATURE) Probability used to generate initial population in standard mode.
#' @param plot_weights (SIGNATURE) Plotting: Weights with which to weight the objective values. For example,
#' (-1,2) will minimize the first objective and maximize the the second (with higher weight).
#' @param plot_objectives (SIGNATURE) Plotting: The objectives to be plotted. Contains indices of objectives.
#' The first index refers to the objective that is plotted on the x-axis. For example, (2,1) will
#' plot the third objective on the x-axis and the second on the y-axis.
#' @param plot (SIGNATURE) Whether to produce a plot at all. This just hands over the reticulate plot and
#' has some visualization problems. To get a normal plot, use the pickle file, open it in python
#' and use the plot method there.
#' @param output_dir (SIGNATURE) path to directory where the picke output file will be saved. Default is tempdir().
#' @param model (DECONVOLUTION) Regression model. Available options: NuSVR ("nusvr"), non-negative least
#' squares("nnls") and linear model ("linear").
#' @param nu (DECONVOLUTION) Nu parameter for NuSVR.
#' @param C (DECONVOLUTION) C parameter for NuSVR.
#' @param normalize_results (DECONVOLUTION) wether to normalize results according to the regression model used.
#' Default is TRUE
#' @param kernel (DECONVOLUTION) Kernel parameter for NuSVR.
#' @param degree (DECONVOLUTION) Degree parameter for NuSVR.
#' @param gamma (DECONVOLUTION) Gamma parameter for NuSVR.
#' @param coef0 (DECONVOLUTION) Coef0 parameter for NuSVR.
#' @param shrinking (DECONVOLUTION) Shrinking parameter for NuSVR.
#' @param tol (DECONVOLUTION) Tol parameter for NuSVR.
#' @param cache_size (DECONVOLUTION) Cache_size parameter for NuSVR.
#' @param max_iter (DECONVOLUTION) Max_iter parameter for NuSVR.
#' @param weights_deconvolution (DECONVOLUTION) Select Solution: Weights with which to weight the objective values. For example,
#' (-1,1) will minimize the first objective and maximize the the second (with more weight).
#' @param close_to Select Solution: Select the solution whose objective value is close to a certain
#' value. Assumes (objective,value). For example, (0,100) will select the solution whose value
#' for the first objective is closest to 100.
#' @param index Select Solution: If one int is passed, return pareto\[index\] If two ints are passed,
#' the first is an objective (0 for the first). The second is the nth element if the solutions
#' have been sorted by the objective in ascending order. For example, (0,1) will return the
#' solution that has the second-lowest value in the first objective. (1,-1) will return the
#' solution with the highest value in the second objective.
#' @param verbose Whether to produce an output on the console.
#'
#' @return A list with two elements: 'proportions' is the matrix of cell proportions and
#' 'genes_used' is a vector containing the names of the genes used for the deconvolution, what
#' is called "solution" by AutoGeneS.
#' @export
#'
deconvolute_autogenes <- function(single_cell_object, bulk_gene_expression, cell_type_annotations,
signature = NULL, ngen = 5000,
mode = c("fixed", "standard"), nfeatures = 500,
weights_signature = list(-1, 1), objectives = list("correlation", "distance"),
seed = 0, population_size = 100, offspring_size = 100,
crossover_pb = 0.7, mutation_pb = 0.3, mutate_flip_pb = 1E-3,
crossover_thres = 1000, ind_standard_pb = 0.1,
plot_weights = NULL, plot_objectives = c(0, 1),
plot = FALSE, output_dir = tempdir(),
model = c("nusvr", "nnls", "linear"), nu = 0.5, C = 0.5,
normalize_results = TRUE, kernel = "linear", degree = 3,
gamma = "scale", coef0 = 0.0, shrinking = TRUE,
tol = 1E-3, cache_size = 200, max_iter = -1,
weights_deconvolution = list(1, 0), index = NULL, close_to = NULL,
verbose = FALSE) {
if (is.null(single_cell_object)) {
stop("Parameter 'single_cell_object' is missing or null, but it is required.")
}
if (is.null(cell_type_annotations)) {
stop("Parameter 'cell_type_annotations' is missing or null, but it is required.")
}
if (is.null(bulk_gene_expression)) {
stop("Parameter 'bulk_gene_expression' is missing or null, but it is required.")
}
if (!is.null(signature)) {
if ("matrix" %in% class(signature)) {
stop(
"Parameter 'signature' requires the .pickle file created in the AutoGeneS build model ",
"method, not a matrix of values."
)
}
if (!file.exists(signature)) {
stop(
"The signature parameter has to be a file path to a .pickle file, created with the ",
"build_model method. This file was not found."
)
}
}
if (length(mode) > 1) {
mode <- mode[1]
message(paste0(mode, " was chosen because multiple values were supplied for \"mode\""))
}
if (length(model) > 1) {
model <- model[1]
message(paste0(model, " was chosen because multiple values were supplied for \"model\""))
}
if (sum(!sapply(list(weights_deconvolution, index, close_to), is.null)) > 1) {
stop(
"When selecting a solution in autogenes only one of the parameters 'weights_deconvolution', ",
"'index' and 'close_to' can be used"
)
}
sce <- matrix_to_singlecellexperiment(single_cell_object, cell_type_annotations)
ad <- singlecellexperiment_to_anndata(sce)
## Start building the signature ##
ag <- reticulate::import("autogenes")
ag$init(ad, celltype_key = "label")
params <- list(
ngen = as.integer(ngen), weights = weights_signature, objectives = objectives,
verbose = verbose, seed = as.integer(seed), mode = mode,
population_size = as.integer(population_size),
offspring_size = as.integer(offspring_size), crossover_pb = crossover_pb,
mutation_pb = mutation_pb
)
if (mode == "standard") {
params <- c(params,
crossover_thres = as.integer(crossover_thres),
ind_standard_pb = ind_standard_pb
)
} else if (mode == "fixed") {
params <- c(params, nfeatures = as.integer(nfeatures), mutate_flip_pb = mutate_flip_pb)
} else {
stop(paste0("Mode ", mode, " not recognized. Please try 'standard' or 'fixed'"))
}
do.call(ag$optimize, params)
if (plot) {
if (sum(!sapply(list(plot_weights, index, close_to), is.null)) > 1) {
stop(
"When selecting a solution in autogenes only one of the parameters 'plot_weights', ",
"'index' and 'close_to' can be used. It is also possible to use none of them"
)
}
plot_objectives <- as.integer(plot_objectives)
if (!is.null(plot_weights)) {
ag$plot(objectives = plot_objectives, weights = as.integer(plot_weights))
} else if (!is.null(index)) {
ag$plot(objectives = plot_objectives, index = as.integer(index))
} else if (!is.null(close_to)) {
ag$plot(objectives = plot_objectives, close_to = as.integer(close_to))
} else {
ag$plot(objectives = plot_objectives)
}
}
## Start deconvolution ##
if (!is.null(weights_deconvolution)) {
ag$select(weights = weights_deconvolution)
} else if (!is.null(index)) {
ag$select(index = index)
} else if (!is.null(close_to)) {
ag$select(close_to = close_to)
} else {
ag$select()
}
selection <- ag$selection()
genes <- ag$adata()$var_names
genes_used <- genes[selection]
if (verbose) {
message(length(genes_used), " genes are used for the deconvolution")
}
bulk_data <- data.frame(t(bulk_gene_expression))
result <- ag$deconvolve(bulk_data,
model = model, nu = nu, C = C,
kernel = kernel, degree = as.integer(degree), gamma = gamma, coef0 = coef0,
shrinking = shrinking, tol = tol, cache_size = as.integer(cache_size),
verbose = verbose, max_iter = as.integer(max_iter)
)
# Addition: this is needed to adjust the format of column names
names <- ag$adata()$obs_names
if (is.atomic(names)) {
colnames(result) <- names
} else {
col_names <- names$to_frame()
col_names_vectorized <- as.vector(col_names[, 1])
colnames(result) <- col_names_vectorized
}
rownames(result) <- rownames(bulk_data)
if (normalize_results) {
celltypes <- colnames(result)
if (model == "nusvr") {
result[result < 0] <- 0
}
result <- t(apply(result, 1, function(row) row / sum(row)))
if (length(celltypes) == 1) {
result <- t(result)
colnames(result) <- celltypes
}
}
return(list(proportions = result, genes_used = genes_used))
}
#' Install AutoGeneS
#'
#' Install AutoGeneS into a previously defined python environment.
#' A python environment can be defined by set_virtualenv() or set_python() command.
#' Alternatively a new environment can be created via create_virtualenv() method.
#'
install_autogenes <- function() {
# reticulate::py_install("autogenes", pip = TRUE)
system("pip install git+https://github.com/omnideconv/AutoGeneS.git")
}
#' Checks if python and the autogenes module are available and installs them if they are not.
#' @param python the python env
#'
autogenes_checkload <- function(python = NULL) {
if (!python_available()) {
message("Setting up python environment..")
init_python(python)
if (!python_available()) {
stop(
"Could not initiate miniconda python environment. Please set up manually with ",
"init_python(python=your/python/version)"
)
}
}
anndata_checkload()
if (!reticulate::py_module_available("autogenes")) {
install_autogenes()
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.