#' Rename genes in the differential network analysis results
#'
#' @param results A 'dnapath_list' or 'dnapath' object.
#' @param gene_mat A matrix of key value pairs. The first column should contain
#' current gene names, and the second column the new names. Any genes that are
#' not in this matrix will retain their current names.
#' @return A 'dnapath_list' object containing only those pathways with differential
#' connectivity p-values below `alpha`.
#' @export
rename_genes <- function(results, gene_mat) {
if(is.vector(gene_mat) && length(gene_mat) == 2) {
gene_mat <- matrix(gene_mat, ncol = 2)
} else if(ncol(gene_mat) != 2) {
stop("'gene_mat' must be a 2 column matrix or vector of length 2.")
}
if(class(results) == "dnapath_list") {
new_gene_names <- results[[1]]$genes
# Subset gene_mat onto genes contained in the dnapath results.
new_gene_names <- sapply(new_gene_names, function(x) {
index <- which(gene_mat[, 1] == x)
if(length(index) == 0)
return(x) # No new name found, keep original.
if(length(index) > 1)
warning("Duplicate names found for ", x, ".")
return(gene_mat[index, 2])
})
for(i in 1:length(results)) {
results[[i]]$genes <- new_gene_names
}
} else if (class(results) == "dnapath") {
new_gene_names <- results$genes
# Subset gene_mat onto genes contained in the dnapath results.
gene_mat <- gene_mat[gene_mat[, 1] %in% results$genes, ]
# Replace each gene with new gene names.
new_gene_names[sapply(gene_mat[, 1],
function(x) which(new_gene_names == x))] <-
gene_mat[, 2]
results$genes <- new_gene_names
} else {
stop("'results' must be a ", '"dnapath_list" or "dnapath" object.')
}
return(results)
}
#' Remove pathways with non-significant DC scores.
#'
#' @param x A 'dnapath_list' object.
#' @param alpha_pathway Threshold for pathway p-values to determine significance.
#' If NULL, defaults to 0.05 or the minimum possible threshold (based on the
#' number of permutatiosn that were run).
#' @return A 'dnapath_list' object containing only those pathways with differential
#' connectivity p-values below `alpha`.
#' @export
filter_pathways <- function(results, alpha_pathway = NULL) {
if(class(results) != "dnapath_list")
stop("'results' must be a 'dnapath_list' object.")
if(is.null(alpha_pathway)) {
alpha_pathway <- max(0.05, get_min_alpha(results))
}
if(alpha_pathway < get_min_alpha(results)) {
warning("alpha_pathway = ", alpha_pathway, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(results))
alpha_pathway <- get_min_alpha(results)
}
index <- which(sapply(results, function(x) x$p_value_path <= alpha_pathway))
if(length(index) > 0) {
results <- subset(results, pathways = index)
} else {
return(NULL)
}
results <- sort(results)
return(results)
}
#' Summarize differential connections for a pathway
#'
#' @param results A 'dnapath' object.
#' @param alpha_edge Threshold for p-values of edge DC scores. If NULL,
#' defaults to 0.05 or the minimum possible threshold (based on the
#' number of permutatiosn that were run).
#' @return A tibble summarizing the differential connections in
#' the pathway.
#' @export
summarize_edges_for_pathway <- function(results, alpha_edge = NULL) {
if(class(results) != "dnapath")
stop("'results' must be a 'dnapath' object.")
if(is.null(alpha_edge)) {
alpha_edge <- max(0.05, get_min_alpha(results))
}
if(alpha_edge < get_min_alpha(results)) {
warning("alpha_edge = ", alpha_edge, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(results))
alpha_edge <- get_min_alpha(results)
}
# Note, d_edges, p_value_edges, nw1, nw2, etc. are lower tri of matrix,
# and these fill the matrix by column.
genes <- results$genes[results$pathway_genes]
edges <- sapply(combn(length(genes), 2, simplify = FALSE),
function(x) paste(genes[x[1]], genes[x[2]], sep = " - "))
# Subset on results with p-values below alpha_edge
index <- which(results$p_value_edges <= alpha_edge)
if(length(index) == 0)
# If no edges meet the threshold, return an empty tibble.
return(tibble(pathway = character(),
edges = character(),
dc_score = numeric(),
p_value = numeric(),
p_value_m = numeric(),
nw1 = numeric(),
nw2 = numeric()))
if(is.null(results$pathway)) results$pathway <- "unnamed"
df <- tibble(pathway = results$pathway,
edges = edges[index],
dc_score = results$d_edges[index],
p_value = results$p_value_edges[index],
p_value_m = results$p_value_edges_mono[index],
nw1 = results$nw1[index],
nw2 = results$nw2[index])
return(df)
}
#' Summarize differential connectivity of genes in a pathway
#'
#' @param results A 'dnapath' object on a single pathway.
#' @param alpha_gene Threshold for p-values of gene DC scores, used to
#' subset the results. If NULL (or 1), results for all genes are shown.
#' @return A tibble summarizing the differential connectivity of genes in
#' the pathway.
#' @export
summarize_genes_for_pathway <- function(results, alpha_gene = NULL) {
if(class(results) != "dnapath")
stop("'results' must be a 'dnapath' object.")
if(is.null(alpha_gene)) {
alpha_gene <- 1
}
if(alpha_gene < get_min_alpha(results)) {
warning("alpha_gene = ", alpha_gene, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(results))
alpha_gene <- get_min_alpha(results)
}
genes <- results$genes[results$pathway_genes]
if(is.null(results$pathway)) results$pathway <- "unnamed"
index <- which(results$p_value_genes <= alpha_gene)
if(length(index) == 0)
# If no genes meet the threshold, return an empty tibble.
return(tibble(pathway = character(),
genes = character(),
dc_score = numeric(),
p_value = numeric(),
p_value_m = numeric(),
mean_expr1 = numeric(),
mean_expr2 = numeric()))
df <- tibble(pathway = results$pathway,
genes = genes[index],
dc_score = results$d_genes[index],
p_value = results$p_value_genes[index],
p_value_m = results$p_value_genes_mono[index],
mean_expr1 = results$mean_expr[1, index],
mean_expr2 = results$mean_expr[2, index])
return(df)
}
#' Summarize the differential connectivity of pathways.
#'
#' @param results A 'dnapath_list' object.
#' @param alpha_pathway Threshold for p-values of pathway DC scores.
#' If NULL (or 1), results for all pathways are shown.
#' @param alpha_gene Threshold for p-values of gene DC scores. Used to determine
#' the number of genes that are differentially connected within each pathway.
#' If NULL,
#' defaults to 0.05 or the minimum possible threshold (based on the
#' number of permutatiosn that were run).
#' @return A tibble summarizing the differential connectivity of genes in
#' the pathway.
#' @export
summarize_pathways <- function(results, alpha_pathway = NULL, alpha_gene = NULL) {
if(class(results) == "dnapath") {
# If a single pathway is provided, return a summary of the edges.
return(summarize_edges_for_pathway(results,))
} else if(class(results) != "dnapath_list"){
stop("'results' must be a 'dnapath_list' or 'dnapath' object.")
}
if(is.null(alpha_gene)) {
alpha_gene <- max(0.05, get_min_alpha(results))
}
if(alpha_gene < get_min_alpha(results)) {
warning("alpha_gene = ", alpha_gene, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(results))
alpha_gene <- get_min_alpha(results)
}
if(is.null(alpha_pathway)) {
alpha_pathway <- 1
}
if(alpha_pathway < get_min_alpha(results)) {
warning("alpha_pathway = ", alpha_pathway, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(results))
alpha_pathway <- get_min_alpha(results)
}
p_value <- sapply(results, function(x) x$p_value_path)
index <- which(p_value <= alpha_pathway)
if(length(index) == 0)
# If no genes meet the threshold, return an empty tibble.
return(tibble(pathway = character(),
dc_score = numeric(),
p_value = numeric(),
n_genes = numeric(),
n_dc = numeric(),
mean_expr1 = numeric(),
mean_expr2 = numeric()))
results <- results[index]
# Remove any NULL results from the list.
pathway <- sapply(results, function(x) x$pathway)
d_pathway <- sapply(results, function(x) x$d_pathway)
p_value <- p_value[index]
n_genes <- sapply(results, function(x) length(x$pathway_genes))
n_genes_dc <- sapply(results, function(x) sum(x$p_value_genes <= alpha_gene))
mean_expr1 <- sapply(results, function(x) sum(x$mean_expr[1, ])) / n_genes
mean_expr2 <- sapply(results, function(x) sum(x$mean_expr[2, ])) / n_genes
# Note, mean_expr are divided by n_genes, the total number of possible genes
# in the pathway. This way, if there are any missing genes from the expression
# profile, they are counted as having 0 expression.
df <- tibble(pathway = pathway,
dc_score = d_pathway,
p_value = p_value,
n_genes = n_genes,
n_dc = n_genes_dc,
mean_expr1 = mean_expr1,
mean_expr2 = mean_expr2)
alpha_gene <- round(alpha_gene, 3)
colnames(df)[5] <- paste0("n_dc (", ifelse(alpha_gene == 0,
"<0.001", alpha_gene), ")")
return(df)
}
#' Summarize the differential connectivity of genes over all pathways.
#'
#' @param results A 'dnapath_list' object.
#' @param alpha_gene Threshold for p-values of gene DC scores. Used to determine
#' the number of pathways that each gene is differentially connected in. If NULL,
#' defaults to 0.05 or the minimum possible threshold (based on the
#' number of permutatiosn that were run).
#' @return A tibble summarizing the differential connectivity of genes across
#' all pathways.
#' @export
summarize_genes <- function(results, alpha_gene = NULL) {
if(class(results) == "dnapath") {
return(summarize_genes_for_pathway(results,))
} else if(class(results) != "dnapath_list"){
stop("'results' must be a 'dnapath_list' or 'dnapath' object.")
}
if(is.null(alpha_gene)) {
alpha_gene <- max(0.05, get_min_alpha(results))
}
if(alpha_gene < get_min_alpha(results)) {
warning("alpha_gene = ", alpha_gene, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(results))
alpha_gene <- get_min_alpha(results)
}
gene_list <- results[[1]]$genes
p <- length(gene_list)
n_pathways <- numeric(p)
n_dc <- numeric(p)
mean_expr1 <- numeric(p)
mean_expr2 <- numeric(p)
for(i in 1:length(results)) {
x <- results[[i]]
pathway_genes <- x$gene[x$pathway_genes]
index <- sapply(pathway_genes, function(gene) which(gene_list == gene)[1])
n_pathways[index] <- n_pathways[index] + 1
n_dc[index] <- n_dc[index] + (x$p_value_genes <= alpha_gene)
mean_expr1[index] <- x$mean_expr[1, ]
mean_expr2[index] <- x$mean_expr[2, ]
}
df <- tibble(gene = gene_list,
n_pathways = n_pathways,
n_dc = n_dc,
mean_expr1 = mean_expr1,
mean_expr2 = mean_expr2)
# Note, pathways containing a gene can be found by using:
# > subset(results, genes = gene_name)
alpha_gene <- round(alpha_gene, 3)
colnames(df)[3] <- paste0("n_dc (", ifelse(alpha_gene == 0,
"<0.001", alpha_gene), ")")
return(df)
}
#' Get the minimum alpha level for the permutation test
#'
#' @param results A 'dnapath_list' or 'dnapath' object.
#' @return The minimum alpha level that can be used based on the number
#' of permutations performed in the analysis.
#' @export
get_min_alpha <- function(results) {
if(class(results) == "dnapath") {
return(1 / results$n_perm)
} else if(class(results) == "dnapath_list"){
return(1 / results[[1]]$n_perm)
} else {
return(NA)
}
}
get_number_of_tests <- function(pathway_list) {
if(!is.list(pathway_list)) {
n_tests <- length(pathway_list) * (length(pathway_list) - 1) / 2
} else {
n_tests <- sum(sapply(pathway_list,
function(x) length(x) * (length(x) - 1) / 2))
}
return(n_tests)
}
get_alpha_level <- function(pathway_list, alpha_overall = 0.05) {
N <- get_number_of_tests(pathway_list)
# Significance level for each test (edge, gene, and pathway):
alpha <- ((1 - (1 - alpha_overall)^(1 / N)) / 2)^(1 / 3)
return(alpha)
}
get_perm_needed <- function(pathway_list, alpha_overall = 0.05) {
alpha <- get_alpha_level(pathway_list, alpha_overall)
n_perm <- ceiling(1 / alpha)
return(n_perm)
}
get_alpha_level2 <- function(pathway_list, alpha_overall = 0.05,
interval = c(0, 0.2)) {
if(!is.list(pathway_list)) pathway_list <- list(pathway = pathway_list)
genes <- unique(unlist(pathway_list))
edges <- matrix(0, nrow = length(genes), ncol = length(genes))
for(pathway in pathway_list) {
index <- which(genes %in% pathway)
edges[index, index] <- edges[index, index] + 1
}
edges <- edges[lower.tri(edges)]
edges <- edges[edges != 0]
alpha <- alpha_overall^3
f <- function(x) {
1 - exp(sum(log(1 - 2 * edges * x^3))) - alpha_overall
}
alpha <- uniroot(f, interval = interval)$root
return(alpha)
}
#' Summarize differential connectivity scores for a pathway
#'
#' @param results The results from performing [dnapath()] on a single pathway.
#' @param alpha The desired significance level. DC scores with p-values below
#' this threshold will be set to zero.
#' @param monotonized If true, monotonized (i.e. step-down) p-values from the
#' permutation test will be used.
#' @return The p by p differential network, with nonzero entries containing
#' the significant DC scores.
summarize_dnw_for_pathway <- function(results, alpha = 0.1, monotonized = FALSE) {
if(class(results) != "dnapath")
stop("'results' must be a 'dnapath' object.")
genes_in_dnw <- results$genes[results$pathway_genes]
p <- length(genes_in_dnw)
if(p == 0) {
warning("results pathway does not contain any genes.")
return(NULL)
}
dnw <- matrix(0, p, p)
colnames(dnw) <- genes_in_dnw
d_edges <- results$d_edges
# If p-value is above alpha, set to zero.
if(monotonized) {
d_edges[results$p_value_edges_mono > alpha] <- 0
} else {
d_edges[results$p_value_edges > alpha] <- 0
}
dnw[lower.tri(dnw)] <- d_edges
dnw <- dnw + t(dnw)
return(dnw)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.