#' Preprocess COSMOS Inputs For Signaling to Metabolism
#'
#' Runs checks on the input data and simplifies the prior knowledge network.
#' Simplification includes the removal of (1) nodes that are not reachable from
#' signaling nodes and (2) interactions between transcription factors and target
#' genes if the target gene does not respond or the response is contradictory
#' with the change in the transcription factor activity.
#' Optionally, further TF activities are estimated via network optimization via
#' CARNIVAL and the interactions between TF and genes are filtered again.
#'
#' @param meta_network prior knowledge network (PKN). A PKN released with COSMOS
#' and derived from Omnipath, STITCHdb and Recon3D can be used. See details on
#' the data \code{\link{meta_network}}.
#' @param tf_regulon collection of transcription factor - target interactions.
#' A default collection from dorothea can be obtained by the
#' \code{\link{load_tf_regulon_dorothea}} function.
#' @param signaling_data numerical vector, where names are signaling nodes
#' in the PKN and values are from \{1, 0, -1\}. Continuous data will be
#' discretized using the \code{\link{sign}} function.
#' @param metabolic_data numerical vector, where names are metabolic nodes
#' in the PKN and values are continuous values that represents log2 fold change
#' or t-values from a differential analysis. These values are compared to
#' the simulation results (simulated nodes can take value -1, 0 or 1)
#' @param diff_expression_data (optional) numerical vector that represents the
#' results of a differential gene expression analysis. Names are gene
#' names using gene symbole and values are log fold change or
#' t-values. We use the \dQuote{\code{diff_exp_threshold}} parameter to decide
#' which genes changed significantly. Genes with NA values are considered none
#' expressed and they will be removed from the TF-gene expression interactions.
#' @param diff_exp_threshold threshold parameter (default 1) used to binarize
#' the values of \dQuote{\code{diff_expression_data}}.
#' @param maximum_network_depth integer > 0 (default: 8). Nodes that are further
#' than \dQuote{\code{maximum_network_depth}} steps from the signaling nodes on
#' the directed graph of the PKN are considered non-reachable and are removed.
#' @param remove_unexpressed_nodes if TRUE (default) removes nodes from the PKN
#' that are not expressed, see input \dQuote{\code{expressed_genes}}.
#' @param expressed_genes character vector. Names of nodes that are expressed. By
#' default we consider all the nodes that appear in \code{diff_expression_data} with
#' a numeric value (i.e. nodes with NA are removed)
#' @param filter_tf_gene_interaction_by_optimization (default:TRUE), if TRUE then runs
#' a network optimization that estimates TF activity not included in the inputs
#' and checks the consistency between the estimated activity and change in gene
#' expression. Removes interactions where TF and gene expression are inconsistent
#' @param CARNIVAL_options list that controls the options of CARNIVAL. See details
#' in \code{\link{default_CARNIVAL_options}}.
#' @importFrom utils data
#' @export
#' @return cosmos_data object with the following fields:
#' \describe{
#' \item{\code{meta_network}}{Filtered PKN}
#' \item{\code{tf_regulon}}{TF - target regulatory network}
#' \item{\code{signaling_data_bin}}{Binarised signaling data}
#' \item{\code{metabolic_data}}{Metabolomics data}
#' \item{\code{diff_expression_data_bin}}{Binarized gene expression data}
#' \item{\code{optimized_network}}{Initial optimized network if
#' \code{filter_tf_gene_interaction_by_optimization is TRUE}}
#' }
#' @seealso \code{\link{meta_network}} for meta PKN,
#' \code{\link{load_tf_regulon_dorothea}} for tf regulon,
#' \code{\link[CARNIVAL]{runCARNIVAL}}.
#'
#' @examples
#' data(toy_network)
#' data(toy_signaling_input)
#' data(toy_metabolic_input)
#' data(toy_RNA)
#' test_for <- preprocess_COSMOS_signaling_to_metabolism(meta_network = toy_network,
#' signaling_data = toy_signaling_input,
#' metabolic_data = toy_metabolic_input,
#' diff_expression_data = toy_RNA,
#' maximum_network_depth = 15,
#' remove_unexpressed_nodes = TRUE,
#' CARNIVAL_options = default_CARNIVAL_options("lpSolve"))
preprocess_COSMOS_signaling_to_metabolism <- function(meta_network = meta_network,
tf_regulon = load_tf_regulon_dorothea(),
signaling_data,
metabolic_data,
diff_expression_data = NULL,
diff_exp_threshold = 1,
maximum_network_depth = 8,
expressed_genes = NULL,
remove_unexpressed_nodes = TRUE,
filter_tf_gene_interaction_by_optimization = TRUE,
CARNIVAL_options = default_CARNIVAL_options("lpSolve")){
if(!is.null(diff_expression_data))
{
expressed_genes <- names(diff_expression_data)[!is.na(diff_expression_data)]
} else
{
expressed_genes <- NULL
remove_unexpressed_nodes <- FALSE
}
out_data <- preprocess_COSMOS_core(meta_network = meta_network,
tf_regulon = tf_regulon,
signaling_data=signaling_data,
metabolic_data=metabolic_data,
input_layer = "signaling_data",
output_layer = "metabolic_data",
diff_expression_data=diff_expression_data,
diff_exp_threshold = diff_exp_threshold,
maximum_network_depth = maximum_network_depth,
expressed_genes = expressed_genes,
remove_unexpressed_nodes = remove_unexpressed_nodes,
filter_tf_gene_interaction_by_optimization = filter_tf_gene_interaction_by_optimization,
CARNIVAL_options = CARNIVAL_options)
out_data$history = c(out_data$history,"created by preprocess_COSMOS_signaling_to_metaboism\n")
return(out_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.