#' Differential network analysis
#'
#' @param expr_pair Either (1) a list of two matrices, or objects to be coerced
#' to one, that contain the gene
#' expression profile from two populations (groups), or (2) a single matrix, or
#' object to be coerced to one, that contains the expression profiles for both
#' groups. In either case, the rows of each matrix should
#' correspond to samples and the columns should correspond to individual genes.
#' If a single matrix is provided, the `groups` argument
#' must also be set to specify which rows belong to which group.
#' @param pathway_list A single vector or list of vectors containing gene names
#' to indicate pathway membership. The vectors are used to subset the matrices
#' in 'expr_pair'.
#' @param groups (Optional) If `expr_pair` is a single matrix, `groups` must be
#' a vector of length equal to the number of rows in `expr_pair`, and it should
#' contain two unique elements (for example, the two group names). This argument
#' is used to identify which samples belong to which group.
#' @param network_inference A function used to infer the pathway network. It
#' should take in an n by p matrix and return a p by p matrix of association
#' scores. (Built-in options include: \code{\link{run_aracne}},
#' \code{\link{run_bc3net}}, \code{\link{runc3net}},
#' \code{\link{run_clr}}, \code{\link{run_corr}},
#' \code{\link{run_dwlasso}}, \code{\link{run_genie3}},
#' \code{\link{run_glasso}}, \code{\link{run_mrnet}},
#' \code{\link{run_pcor}}, and \code{\link{run_silence}}.)
#' Defaults to \code{\link{run_pcor}} for partial correlations.
#' @param n_perm The number of random permutations to perform during
#' permutation testing. If n_perm == 1, the permutation tests are not performed.
#' If n_perm is larger than the number of possible permutations, n_perm will
#' be set to this value with a warning message.
#' @param lp_set A vector of lp values used to compute differential connectivity
#' scores. If multiple values are given, then the results are returned as
#' a list having the same length. (Note: the option of providing a vector of lp
#' values is available so that network inference methods only need to be
#' run once for each pathway).
#' @param seed (Optional) Used to set.seed prior to permutation test for
#' each pathway. This allows results for individual pathways to be easily
#' reproduced.
#' @return A list containing results for each pathway in 'pathway_list'. These
#' results include the p-values for differential connectivity of each gene, the
#' overall differential connectivity of the pathway. Pathways without any significance
#' are excluded from the results.
#' @export
dnapath <- function(expr_pair, pathway_list, groups = NULL,
network_inference = run_pcor, n_perm = 100, lp_set = 2,
seed = NULL, verbose = TRUE) {
#################################################################
# If a list of pathways are given, perform dnapath over each pathway.
#################################################################
if(!is.null(pathway_list) && is.list(pathway_list)) {
# Subset the input expression data onto only those genes contained
# within all of the pathways.
genes <- unique(unlist(pathway_list))
# Make sure there are no duplicate genes in any pathway.
pathway_list <- lapply(pathway_list, unique)
if(inherits(expr_pair, "list")) {
# Expecting expr_pair to be a list of two matrices.
if(length(expr_pair) != 2)
stop("'expr_pair' list does not contain two matrices.")
expr_pair[[1]] <- expr_pair[[1]][, colnames(expr_pair[[1]]) %in% genes]
expr_pair[[2]] <- expr_pair[[2]][, colnames(expr_pair[[2]]) %in% genes]
} else {
expr_pair <- expr_pair[, colnames(expr_pair) %in% genes]
}
results_by_pathway <- lapply(pathway_list, function(pathway) {
dnapath(expr_pair, pathway, groups, network_inference, n_perm, lp_set, seed)
})
index_null <- which(sapply(results_by_pathway, is.null))
# If all pathways returned NULL results, stop here and return NULL.
if(length(index_null) == length(pathway_list)) {
warning("No pathways resulted in differential networks.\n")
return(NULL)
}
# If multiple lp values used, re-arrange results as a list of
# dnapath_list objects by lp value.
if(length(lp_set) > 1) {
results_by_lp <- vector("list", length(lp_set))
for(i in 1:length(lp_set)) {
results_by_lp[[i]] <- lapply(results_by_pathway,
function(results) results[[i]])
# Add pathway name to each result, if pathway_list is named.
if(!is.null(names(pathway_list))) {
for(j in 1:length(results_by_lp[[i]])) {
results_by_lp[[i]][[j]]$pathway <- names(pathway_list)[j]
}
}
class(results_by_lp[[i]]) <- "dnapath_list"
}
# Remove pathways that returned NULL results from both the results list.
# Remove pathways that returned NULL results from both the results list.
if(length(index_null) > 0) {
results_by_lp <- lapply(results_by_lp, function(results) results[-index_null])
}
names(results_by_lp) <- paste0("lp = ", lp_set)
} else {
results_by_lp <- results_by_pathway
# Add pathway name to each result, if pathway_list is named.
if(!is.null(names(pathway_list))) {
for(i in 1:length(results_by_lp)) {
results_by_lp[[i]]$pathway <- names(pathway_list)[i]
}
}
# Remove pathways that returned NULL results from both the results list.
if(length(index_null) > 0) {
results_by_lp <- results_by_lp[-index_null]
}
class(results_by_lp) <- "dnapath_list"
}
return(results_by_lp)
} else {
# pathway_list is a vector of genes (a single pathway). Proceed
# with the differential network analysis on this pathway.
pathway <- pathway_list
}
#################################################################
# Run differential network analysis on the pathway
#################################################################
# Set seed for reproducibility of permutation test, if one is provided.
if(!is.null(seed)) {
set.seed(seed)
}
# Process the expr_pair input.
# Handle the list and single matrix cases separately.
if(inherits(expr_pair, "list")) {
# Expecting expr_pair to be a list of two matrices.
if(length(expr_pair) != 2)
stop("'expr_pair' list does not contain two matrices.")
# Save the group labels, if there are any.
group_labels <- names(expr_pair)
if(is.null(group_labels))
group_labels <- c("1", "2")
# Store the gene names in the original gene expression datasets.
gene_names <- unique(union(colnames(expr_pair[[1]]),
colnames(expr_pair[[2]])))
# Store the number of samples in each dataset.
n <- c(nrow(expr_pair[[1]]), nrow(expr_pair[[2]]))
# If a pathway is provided, subset counts.
if(!is.null(pathway)) {
index <- which(colnames(expr_pair[[1]]) %in% pathway)
if(length(index) < 2) {
if(verbose)
cat("Fewer than two genes are expressed in one group. Returning NULL.\n")
return(NULL)
}
expr_pair[[1]] <- expr_pair[[1]][, index]
index <- which(colnames(expr_pair[[2]]) %in% pathway)
if(length(index) < 2) {
if(verbose)
cat("Fewer than two genes are expressed in one group. Returning NULL.\n")
return(NULL)
}
expr_pair[[2]] <- expr_pair[[2]][, index]
}
# Join the two datasets and coerce the output into a matrix.
expr_pair <- as.matrix(merge(expr_pair[[1]], expr_pair[[2]],
all = TRUE, sort = FALSE))
} else {
# expr_pair must be a matrix.
# Allow data.frame, tibble, and other objects that are coercible to matrix.
expr_pair <- tryCatch(as.matrix(expr_pair),
error = function(e)
stop("'expr_pair' is not a list and as.matrix() failed."))
if(is.null(groups))
stop("'groups' must be specified when 'expr_pair' is a single matrix.")
if(length(unique(groups)) != 2)
stop("length(unique(groups)) = ", length(unique(groups)), ", ",
"expected to equal 2. For example, if the first n rows ",
'are from group "a" and the last m rows are from group "b", then ',
"'expr_pair' should contain n + m rows and 'group' may be the ",
'vector c(rep("a", n), rep("b", m)).')
if(length(groups) != nrow(expr_pair))
stop("length(groups) = ", length(groups), ", but nrow(expr_pair) = ",
nrow(expr_pair), ". These must be equal.")
# Store the gene names in the original gene expression dataset.
gene_names <- colnames(expr_pair)
# If a pathway is provided, subset counts.
if(!is.null(pathway)) {
index <- which(colnames(expr_pair) %in% pathway)
if(length(index) < 2) {
if(verbose)
cat("Fewer than two genes are expressed in one group. Returning NULL.\n")
return(NULL)
}
expr_pair <- expr_pair[, index]
}
# Order rows to ensure that group 1 is first, followed by group 2.
group_labels <- unique(groups)
index_group_1 <- which(groups == group_labels[1])
index_group_2 <- which(groups == group_labels[2])
expr_pair <- expr_pair[c(index_group_1, index_group_2), ]
# Store the number of samples in each dataset.
n <- c(length(index_group_1), length(index_group_2))
}
pathway_genes <- sapply(colnames(expr_pair),
function(gene) which(gene_names == gene)[1])
# Fill in missing values with 0
index_na <- which(is.na(expr_pair))
if(length(index_na) > 0) {
# # Don't give a warning message.
# warning("Setting", length(index_na), "NA values to 0.")
expr_pair[which(is.na(expr_pair))] <- 0
}
N <- nrow(expr_pair) # Total number of observations across both samples.
p <- ncol(expr_pair) # Total number of genes in union.
n_lp <- length(lp_set)
# If total number of observations is small, use exact set of permutations.
# Order of samples doesn't matter, so keep only first half if sample sizes
# are the same. If sample sizes are unequal, all permutations are used.
if((n[1] == n[2]) && (choose(N, n[1]) / 2 <= n_perm)) {
permutations <- combn(1:N, n[1])
permutations <- permutations[, 1:(ncol(permutations) / 2)]
permutations <- permutations[, -1]
} else if((n[1] != n[2]) && (choose(N, n[1]) <= n_perm)) {
permutations <- combn(1:N, n[1])
permutations <- permutations[, -1]
} else {
permutations <- cbind(sapply(1:n_perm, function(x) sample(1:N, n[1])))
}
if(ncol(permutations) < n_perm)
warning("Only ", ncol(permutations), " possible permutations. Setting ",
"'n_perm' to this value.")
n_perm <- ncol(permutations)
# Initialize lists to store DC scores and p-values for each lp in lp_set.
# Scores are initialized to 0 and p-values to 1.
scores_1 <- network_inference(expr_pair[1:n[1], ])
scores_2 <- network_inference(expr_pair[-(1:n[1]), ])
d_path_list <- lapply(lp_set, function(lp) d_pathwayC(scores_1, scores_2, lp = lp))
d_gene_list <- lapply(lp_set, function(lp) d_genesC(scores_1, scores_2, lp = lp))
d_edge_list <- lapply(lp_set, function(lp) d_edgesC(scores_1, scores_2, lp = lp))
# Obtain ranks of original DC scores for monotonization.
d_path_ranks <- lapply(d_path_list, function(d_path) order(d_path, decreasing = TRUE))
d_gene_ranks <- lapply(d_gene_list, function(d_gene) order(d_gene, decreasing = TRUE))
d_edge_ranks <- lapply(d_edge_list, function(d_edge) order(d_edge, decreasing = TRUE))
pval_path_list <- lapply(lp_set, function(lp) 1)
pval_gene_list <- lapply(lp_set, function(lp) rep(1, p))
pval_edge_list <- lapply(lp_set, function(lp) rep(1, p * (p - 1) / 2))
pval_path_list_mono <- lapply(lp_set, function(lp) 1)
pval_gene_list_mono <- lapply(lp_set, function(lp) rep(1, p))
pval_edge_list_mono <- lapply(lp_set, function(lp) rep(1, p * (p - 1) / 2))
# Save inferred networks.
nw1 <- scores_1[lower.tri(scores_1)]
nw2 <- scores_2[lower.tri(scores_2)]
# If inferred networks are empty, then quit without running permutation test.
if(all(scores_1 == 0) && all(scores_2 == 0)) {
if(verbose)
cat("Inferred network is empty. Permutation test not performed.\n")
n_perm <- 1 # Only original data were considered.
} else {
for(i in 1:n_perm) {
network_1 <- permutations[, i]
scores_1 <- network_inference(expr_pair[network_1, ])
scores_1[which(is.na(scores_1))] <- 0
scores_2 <- network_inference(expr_pair[-network_1, ])
scores_2[which(is.na(scores_2))] <- 0
for(k in 1:n_lp) {
pval_path_list[[k]] <- pval_path_list[[k]] +
(d_pathwayC(scores_1, scores_2, lp = lp_set[[k]]) >= d_path_list[[k]])
pval_gene_list[[k]] <- pval_gene_list[[k]] +
(d_genesC(scores_1, scores_2, lp = lp_set[[k]]) >= d_gene_list[[k]])
pval_edge_list[[k]] <- pval_edge_list[[k]] +
(d_edgesC(scores_1, scores_2, lp = lp_set[[k]]) >= d_edge_list[[k]])
# Compute step-down p-values.
# Rank index for original scores:
index <- d_path_ranks[[k]]
n_index <- length(index)
# Calculate scores of permuted data.
d_path_perm <- d_pathwayC(scores_1, scores_2, lp = lp_set[[k]])
# Take the cumulative max, in rank order, starting from the min.
d_path_perm <- cummax(d_path_perm[rev(index)])[n_index:1][order(index)]
# Accumulate number of permuted scores that are larger than original.
pval_path_list_mono[[k]] <- pval_path_list_mono[[k]] +
(d_path_perm >= d_path_list[[k]])
# Repeat step-down procedure for genes and edges.
index <- d_gene_ranks[[k]]
n_index <- length(index)
d_gene_perm <- d_genesC(scores_1, scores_2, lp = lp_set[[k]])
d_gene_perm <- cummax(d_gene_perm[rev(index)])[n_index:1][order(index)]
pval_gene_list_mono[[k]] <- pval_gene_list_mono[[k]] +
(d_gene_perm >= d_gene_list[[k]])
index <- d_edge_ranks[[k]]
n_index <- length(index)
d_edge_perm <- d_edgesC(scores_1, scores_2, lp = lp_set[[k]])[index]
d_edge_perm <- cummax(d_edge_perm[rev(index)])[n_index:1][order(index)]
pval_edge_list_mono[[k]] <- pval_edge_list_mono[[k]] +
(d_edge_perm >= d_edge_list[[k]])
}
}
}
# If no permutation testing is done, set p-values to NA.
if(n_perm == 1) {
pval_path_list <- lapply(pval_path_list, function(pval) NA)
pval_gene_list <- lapply(pval_gene_list, function(pval) NA)
pval_edge_list <- lapply(pval_edge_list, function(pval) NA)
pval_path_list_mono <- lapply(pval_path_list_mono, function(pval) NA)
pval_gene_list_mono <- lapply(pval_gene_list_mono, function(pval) NA)
pval_edge_list_mono <- lapply(pval_edge_list_mono, function(pval) NA)
} else {
pval_path_list <- lapply(pval_path_list, function(pval) pval / (n_perm + 1))
pval_gene_list <- lapply(pval_gene_list, function(pval) pval / (n_perm + 1))
pval_edge_list <- lapply(pval_edge_list, function(pval) pval / (n_perm + 1))
for(k in 1:length(lp_set)) {
index <- d_path_ranks[[k]]
pval_path_list_mono[[k]] <-
cummax(pval_path_list_mono[[k]][index] / (n_perm + 1))[order(index)]
index <- d_gene_ranks[[k]]
pval_gene_list_mono[[k]] <-
cummax(pval_gene_list_mono[[k]][index] / (n_perm + 1))[order(index)]
index <- d_edge_ranks[[k]]
pval_edge_list_mono[[k]] <-
cummax(pval_edge_list_mono[[k]][index] / (n_perm + 1))[order(index)]
}
}
results <- vector("list", n_lp)
for(i in 1:n_lp) {
results[[i]] <- list(genes = gene_names,
pathway_genes = pathway_genes,
pathway = NULL,
p_value_path = pval_path_list[[i]],
p_value_genes = drop(pval_gene_list[[i]]),
p_value_edges = drop(pval_edge_list[[i]]),
p_value_path_mono = pval_path_list_mono[[i]],
p_value_genes_mono = pval_gene_list_mono[[i]],
p_value_edges_mono = pval_edge_list_mono[[i]],
d_pathway = d_path_list[[i]],
d_genes = drop(d_gene_list[[i]]),
d_edges = drop(d_edge_list[[i]]),
nw1 = nw1,
nw2 = nw2,
mean_expr = rbind(apply(expr_pair[1:n[1], ], 2, mean),
apply(expr_pair[-(1:n[1]), ], 2, mean)),
groups = group_labels,
n_perm = n_perm,
lp = lp_set[i],
seed = seed)
class(results[[i]]) <- "dnapath"
}
# If only one lp is considered, unlist over lp.
if(length(results) == 1) {
results <- results[[1]]
}
class(results) <- "dnapath"
return(results)
}
#' Print function for 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param ... Additional arguments are ignored.
#' @return Prints a summary of the module.
#' @export
print.dnapath_list <- function(x, ...) {
if(!is(x, "dnapath_list"))
stop(paste0("'", deparse(substitute(x)),
"' is not a 'dnapath_list' object."))
groups <- x[[1]]$groups
message <- paste0("Differential network analysis results between, ", groups[1],
" (group 1) and ", groups[2], " (group 2) over ", length(x),
" pathways.\n")
cat(message)
summary(x)
}
#' Print function for 'dnapath' object.
#'
#' @param x A 'dnapath' object.
#' @param ... Additional arguments are ignored.
#' @return Prints a summary of the module.
#' @export
print.dnapath <- function(x, ...) {
if(!is(x, "dnapath"))
stop(paste0("'", deparse(substitute(x)),
"' is not a 'dnapath' object."))
pathway <- drop(x$pathway)
p <- length(x$pathway_genes)
groups <- x$groups
message <- paste0("Differential network analysis between, ", groups[1],
" (group 1) and ", groups[2], " (group 2);")
if(is.null(pathway)) {
message <- paste0(message, " results for an unnamed ", "pathway containing ",
p, " genes.")
} else {
message <- paste0(message, ' results for the pathway "', pathway,
'" containing ', p, " genes.")
}
# Add pathway p-value to the message.
p_value <- round(x$p_value_path, 3)
if(p_value == 0) {
p_value <- "< 0.001"
} else {
p_value <- paste("=", p_value)
}
message <- paste0(message, ' Pathway p-value ', p_value, ".")
message <- paste0(message, " The mean expression of genes in the pathway is ",
round(mean(x$mean_expr[1, ]), 1), " in group 1 and ",
round(mean(x$mean_expr[2, ]), 1), " in group 2.\n")
cat(message)
summary(x)
}
#' Plot function for 'dnapath' object.
#'
#' @param x A 'dnapath' object.
#' @param alpha Threshold for p-values to infer differentially connected edges.
#' If NULL (the default) then no edges are removed from the plot.
#' @param monotonized If TRUE, monotonized (i.e. step-down) p-values from the
#' permutation test will be used.
#' @param ... Additional arguments are passed into the plotting function
#' \code{\link[SeqNet]{plot_network}}.
#' @return Plots the differential network and returns the graph object.
#' See \code{\link[SeqNet]{plot_network}} for details.
#' @export
plot.dnapath <- function(x, alpha = NULL, monotonized = FALSE, ...) {
if(!is(x, "dnapath"))
stop(paste0("'", deparse(substitute(x)),
"' is not a 'dnapath' object."))
# If there are any NA mean expression values, set them to 0.
if(any(is.na(x$mean_expr))) {
x$mean_expr[which(is.na(x$mean_expr))] <- 0
}
# If any mean expression values are negative, set all values equal
# to make fold-changes equal to 1.
if(any(x$mean_expr < 0)) {
x$mean_expr <- 10
}
genes_in_dnw <- x$genes[x$pathway_genes]
p <- length(genes_in_dnw)
dc_vals <- (x$nw2 - x$nw1)
if(monotonized) {
p_values <- x$p_value_edges_mono
} else {
p_values <- x$p_value_edges
}
if(!is.null(alpha)) {
# If p-value is above alpha, edges will be scaled to zero width.
p_values[p_values > alpha] <- 1
}
# Base edge weights are from -log(p-value), and are further scaled
# by degree of change in association strength.
edge_weights <- -log(p_values) *
abs(dc_vals) / max(c(abs(x$nw1), abs(x$nw2)))
if(any(is.nan(edge_weights))) {
# 0 / 0 association scores will result in NaN values. Set these to 0.
edge_weights[is.nan(edge_weights)] <- 0
}
fold_change <- log2(x$mean_expr[2, ] / x$mean_expr[1, ])
if(any(is.nan(fold_change))) {
# 0 / 0 expression will result in NaN values. Set these to 0.
fold_change[is.nan(fold_change)] <- 0
}
mean_expr <- apply(x$mean_expr, 2, max)
mean_expr <- mean_expr / max(mean_expr)
node_weights <- log2(1 + mean_expr) * 31 + 1
# Positive values indicate increase in association from group 1 to group 2.
# nw2 - nw1
dnw <- matrix(0, p, p)
dnw[lower.tri(dnw)] <- dc_vals
dnw <- dnw + t(dnw)
colnames(dnw) <- genes_in_dnw
# TODO: set color based on group with higher magnitude of association.
# what if going from -0.3 to 0.3? No change in magnitude, but largest difference.
# Maybe just set color = to sign in group 1.
g <- SeqNet::plot_network(dnw, display_plot = FALSE, ...)
g$vertex.size <- node_weights
g$edge.width <- edge_weights * 2.5
g$vertex.color <- ifelse(fold_change > 0,
rgb(1, 0.19, 0.19,
pmax(0, pmin(1, abs(fold_change) / 2))),
rgb(0.31, 0.58, 0.8,
pmax(0, pmin(1, abs(fold_change) / 2))))
g$vertex.color
g$vertex.frame.color <- rgb(0.3, 0.3, 0.3, 0.5)
# Rescale edge weights to be used for color transparency.
edge_weights <- edge_weights / max(edge_weights)
if(any(is.nan(edge_weights))) edge_weights <- 0 # No edges are significantly DC.
g$edge.color <- ifelse(abs(x$nw2) > abs(x$nw1),
rgb(1, 0.19, 0.19,
pmax(0.3, edge_weights)),
rgb(0.31, 0.58, 0.8,
pmax(0.3, edge_weights)))
g$vertex.label.cex <- 1
plot(g)
invisible(g)
}
#' Summary function for 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param by_gene If TRUE, summarizes the differential network analysis by genes
#' instead of by pathways.
#' @param alpha_pathway Threshold for p-values of pathway DC scores; used
#' to subset the results.
#' If NULL (or 1), results for all pathways are shown.
#' @param alpha_gene Threshold for p-values of gene DC scores. Used to determine
#' the number of genes that are differentially connected within each pathway. If NULL,
#' defaults to 0.05 or the minimum possible threshold (based on the
#' number of permutatiosn that were run).
#' @param ... Additional arguments are ignored.
#' @return Summarizes the differential network analysis results.
#' @export
summary.dnapath_list <- function(x, by_gene = FALSE,
alpha_pathway = NULL, alpha_gene = NULL, ...) {
if(is.null(alpha_gene)) {
alpha_gene <- max(0.05, get_min_alpha(x))
}
if(alpha_gene < get_min_alpha(x)) {
warning("alpha_gene = ", alpha_gene, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(x))
alpha_gene <- get_min_alpha(x)
}
if(is.null(alpha_pathway)) {
alpha_pathway <- 1
}
if(alpha_pathway < get_min_alpha(x)) {
warning("alpha_pathway = ", alpha_pathway, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(x))
alpha_pathway <- get_min_alpha(x)
}
if(by_gene) {
df <- summarize_genes(x, alpha_gene = alpha_gene)
# df <- dplyr::arrange(df, n_dc / n_pathways, desc(mean_expr1 + mean_expr2))
print(df)
} else {
df <- summarize_pathways(x, alpha_pathway = alpha_pathway,
alpha_gene = alpha_gene)
# df <- dplyr::arrange(df, p_value, desc(d_pathway))
print(df)
}
invisible(df)
}
#' Summary function for 'dnapath' object.
#'
#' @param x A 'dnapath' object.
#' @param by_gene If TRUE, summarizes the differential network analysis by genes;
#' otherwise, summarizes by gene-gene interactions.
#' @param alpha Threshold for p-values to determine significance; defaults
#' to 1 and returns all results. If 'by_gene'
#' is FALSE, then 'alpha' is used to filter edges. If 'by_gene' is TRUE,
#' then 'alpha' is used to filter genes.
#' @param ... Additional arguments are passed into \code{\link{summarize_pathways}}
#' (default) or \code{\link{summarize_genes_over_pathways}} (if by_gene is TRUE).
#' @return Summarizes the differential network analysis result.
#' @export
summary.dnapath <- function(x, by_gene = TRUE, alpha = NULL, ...) {
if(is.null(alpha)) {
alpha <- 1
}
if(alpha < get_min_alpha(x)) {
warning("alpha = ", alpha, " cannot be met with the number of ",
"permutations. Setting to ", get_min_alpha(x))
alpha <- get_min_alpha(x)
}
if(by_gene) {
df <- summarize_genes_for_pathway(x, alpha_gene = alpha)
df <- dplyr::arrange(df, p_value, desc(dc_score))
print(df[, -1])
} else {
df <- summarize_edges_for_pathway(x, alpha_edge = alpha)
df <- dplyr::arrange(df, p_value, desc(dc_score))
print(df[, -1])
}
invisible(df)
}
#' Subset function for 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param pathways A set of pathways to index on. This can be (1) a vector of
#' character strings, corresponding to
#' pathway names or regular expressions used to find pathways, (2) a vector of
#' indices to select pathways, (3) a vector of
#' negative indices indicating pathways to remove, or (4) a logical (boolean)
#' vector that is the same length of current number of pathways in `x`.
#' @param genes A set of gene names to index on; exact matching is used. Only
#' pathways containing these genes are retained.
#' @param ... Additional arguments are ignored.
#' @return A subset of the differential network analysis results.
#' @export
subset.dnapath_list <- function(x, pathways = NULL, genes = NULL, ...) {
# Pathways to keep or remove. These are used so that both pathways and genes
# can be provided, and the union of results is returned.
index_keep <- rep(FALSE, length(x))
index_remove <- rep(FALSE, length(x)) # Only used for -pathway input.
if(!is.null(pathways)) {
if(is.numeric(pathways)) {
if(all(pathways > 0)) {
index_keep[pathways] <- TRUE
} else if(all(pathways < 0)) {
index_remove[pathways] <- TRUE
} else {
stop("pathway indices must be all postive or all negative.")
}
} else if(is.logical(pathways)) {
if(length(pathways) == length(x)) {
index_keep[pathways] <- TRUE
} else {
stop("pathway is a logical vector but is not same length as the current",
"number of pathways in 'x'.")
}
} else if(is.character(pathways)) {
for(pathway in pathways) {
index <- grep(pathway, names(x), ignore.case = TRUE)
if(length(index) > 0)
index_keep[index] <- TRUE
}
}
}
if(!is.null(genes)) {
for(gene in genes) {
index <- which(sapply(x, function(path)
any(path$genes[path$pathway_genes] == gene)))
if(length(index) > 0)
index_keep[index] <- TRUE
}
}
# If pathways were specified for removal, update index_keep.
if(sum(index_remove) > 1) {
if(!is.null(genes)) {
# If pathways were subset on genes, then update the index_keep array.
index_keep[index_remove] <- FALSE
} else {
# Otherwise, keep all pathways that were not specified for removal.
index_keep[!index_remove] <- TRUE
}
}
x <- x[index_keep]
if(length(x) == 0) {
return(NULL)
} else if(length(x) == 1) {
return(x[[1]])
} else {
class(x) <- "dnapath_list"
return(x)
}
}
#' Sort function for 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param decreasing Logical. If TRUE (the default), results are sorted in
#' decreasing order.
#' @param by The variable to sort the results by. Must be one of: "mean_expr",
#' the mean expression of each pathway across both groups; "mean_expr1"
#' or "mean_expr2", the mean expression of each pathway in group
#' 1 or 2, respectively; "dc_score", the differential connectivity score
#' of the pathway; "p_value", the p-value of the dc score; "n_genes",
#' the number of genes in each pathway; "n_dc" the number of significantly
#' differentially conncted genes in each pathway.
#' @param ... Additional arguments are ignored.
#' @return The differential network analysis results ordered by DC pathway score.
#' @export
sort.dnapath_list <- function(x, decreasing = TRUE, by = "mean_expr", ...) {
if(class(x) != "dnapath_list")
stop("`x` must be a 'dnapath_list' object.")
if(by == "mean_expr") {
index <- order(sapply(x, function(res) mean(res$mean_expr)),
decreasing = decreasing)
} else if(by == "mean_expr1") {
index <- order(sapply(x, function(res) mean(res$mean_expr[1, ])),
decreasing = decreasing)
} else if(by == "mean_expr2") {
index <- order(sapply(x, function(res) mean(res$mean_expr[2, ])),
decreasing = decreasing)
} else if(by == "dc_score") {
index <- order(sapply(x, function(res) res$d_pathway),
decreasing = decreasing)
} else if(by == "p_value") {
index <- order(sapply(x, function(res) res$p_value_path),
decreasing = decreasing)
} else if(by == "n_genes") {
index <- order(sapply(x, function(res) length(res$pathway_genes)),
decreasing = decreasing)
} else if(by == "n_dc") {
alpha <- max(0.05, get_min_alpha(x))
index <- order(sapply(x, function(res) sum(res$p_value_genes <= alpha)),
decreasing = decreasing)
} else {
stop('`by` must be one of: "mean_expr", the mean expression of ',
'each pathway across both groups; "mean_expr1" or "mean_expr2", the ',
'mean expression of each pathway in group 1 or 2, respectively; ',
'"dc_score", the differential connectivity score of the pathway; ',
'"p_value", the p-value of the dc score; "n_genes", the number of ',
'genes in each pathway; "n_dc" the number of significantly ',
'differentially conncted genes in each pathway.')
}
x <- x[index]
return(x)
}
#' Order function for 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param ... Additional arguments are ignored.
#' @return The sorted indices of differential network analysis results,
#' ordered by DC pathway score.
#' @export
order.dnapath_list <- function(x, ...) {
if(class(x) == "dnapath_list") {
index <- order(sapply(x, function(res) res$d_pathway), decreasing = TRUE)
return(index)
} else {
return(NULL)
}
}
#' Extract parts of a 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param i The indices of pathways to extract.
#' @param ... Additional arguments are ignored.
#' @return A 'dnapath_list' object containing pathways indexed by 'i'.
#' @export
`[.dnapath_list` <- function(x, i, ...) {
x <- unclass(x)[i]
# If all elements are removed, return the empty list.
if(length(x) == 0) return(x)
p <- length(x[[1]]$genes)
remove_genes <- rep(TRUE, p)
# x[[j]]$pathway_genes contains indices for x[[j]]$genes that
# are correspond to the genes contained in the pathway.
# First, obtain all indices that are retained in the subset.
keep <- sort(unique(unlist(sapply(x, function(path) path$pathway_genes))))
# Check that the selected pathways are not empty.
if(length(keep) == 0) {
warning("Selected pathways contained no genes. Returning NULL.")
return(NULL)
}
# Do not remove the gene names that are indexed by the pathways.
remove_genes[keep] <- FALSE
# New indices are shifted by the number of genes that were removed
# before them. For ex. if the first first 3 genes are removed,
# index 4 will become 4 - 3. If gene 5 is also removed. Index 6
# would become 6 - (3 + 1) = 2.
# The cumsum counts the number of genes that were removed prior
# to each index.
index_shift <- cumsum(remove_genes)
newgenes <- x[[1]]$genes[!remove_genes]
# In the above example, newindex would be the vector
# (0, 0, 0, 1, 0, 2, ...).
# Indices 4 and 5 are retained and newindex[(4, 5)] provides the
# new, shifted indices.
newindex <- ((1:p) - index_shift)
# Loop through each pathway and store the new gene set and indices.
for(j in 1:length(x)) {
x[[j]]$genes <- newgenes
x[[j]]$pathway_genes <- newindex[x[[j]]$pathway_genes]
}
class(x) <- "dnapath_list"
return(x)
}
#' Replace parts of a 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param ... Additional arguments are ignored.
#' @return Replacement is not defined; an error is generated.
#' @export
`[<-.dnapath_list` <- function(x, ...) {
stop("Replacement undefined for 'dnapath_list' object.")
}
#' Combine two 'dnapath_list' objects.
#'
#' @param ... 'dnapath_list' objects to be concatenated.
#' @return Concatenation is not defined; an error is generated.
#' @export
`c.dnapath_list` <- function(...) {
stop("Concatenation is undefined for 'dnapath_list' objects.")
}
#' Return the first five pathways of a 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param ... Additional arguments are ignored.
#' @return A 'dnapath_list' object containing the first five pathways in 'x'.
#' @export
head.dnapath_list <- function(x, ...) {
return(x[1:min(5, length(x))])
}
#' Return the last five pathways of a 'dnapath_list' object.
#'
#' @param x A 'dnapath_list' object.
#' @param ... Additional arguments are ignored.
#' @return A 'dnapath_list' object containing the last five pathways in 'x'.
#' @export
tail.dnapath_list <- function(x, ...) {
return(x[max(1, length(x) - 4):length(x)])
}
#' Reverse the order of pathways in a 'dnapath_list' object.
#'
#' @param x A 'dnapathpath_list' object.
#' @param ... Additional arguments are ignored.
#' @return A 'dnapathpath_list' object containing the pathways in 'x' in reverse order.
#' @export
rev.dnapathpath_list <- function(x, ...) {
return(x[length(x):1])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.