#' @title Network Comparison Test
#'
#' @description A re-implementation and extension of the permutation based
#' network comparison test introduced in \insertCite{van2017comparing;textual}{GGMncv}.
#' Such extensions include scaling to networks with many nodes and the option to
#' use custom test-statistics.
#'
#' @param Y_g1 A matrix (or data.frame) of dimensions \emph{n} by \emph{p},
#' corresponding to the first dataset (\emph{p} must be the same
#' for \code{Y_g1} and \code{Y_g2}).
#'
#' @param Y_g2 A matrix of dimensions \emph{n} by \emph{p}, corresponding to the
#' second dataset (\emph{p} must be the same for \code{Y_g1} and \code{Y_g2}).
#'
#' @param iter Numeric. Number of (Monte Carlo) permutations (defaults to \code{1000}).
#'
#' @param desparsify Logical. Should the de-sparsified glasso estimator be
#' computed (defaults to \code{TRUE})? This is much faster,
#' as the tuning parameter is fixed to
#' \mjseqn{\lambda = \sqrt{log(p)/n}}.
#'
#' @param method character string. Which correlation coefficient (or covariance)
#' is to be computed. One of "pearson" (default), "kendall",
#' or "spearman".
#'
#' @param FUN A function or list of functions (defaults to \code{NULL}),
#' specifying custom test-statistics. See \strong{Examples}.
#'
#'
#' @param cores Numeric. Number of cores to use when executing the permutations in
#' parallel (defaults to \code{1}).
#'
#' @param progress Logical. Should a progress bar be included
#' (defaults to \code{TRUE})?
#'
#' @param update_progress How many times should the progress bar be updated
#' (defaults to \code{4})? Note that setting this to a
#' large value should result in the worse performance,
#' due to additional overhead communicating among the
#' parallel processes.
#'
#' @param ... Additional arguments passed to \code{\link{ggmncv}}.
#'
#' @details
#'
#' \strong{User-Defined Functions}
#'
#' These functions must have two arguments, corresponding
#' to the partial correlation network for each group. An
#' example is provided below.
#'
#'
#' For user-defined functions (\code{FUN}), absolute values are used
#' to compute the p-value, assuming more than one value is returned
#' (e.g., centrality). This is done to mimic the \code{R} package
#' \strong{NCT}.
#'
#' A fail-safe method to ensure the p-value is computed correctly is
#' to access the permutations and observed values from the \code{nct}
#' object.
#'
#' Finally, comparing edges is not implemented. The most straightforward
#' way to do this is with \code{\link{compare_edges}}, which
#' uses the de-sparsified estimator.
#'
#' @note
#'
#' In \insertCite{van2017comparing;textual}{GGMncv}, it was suggested that
#' these are tests of \emph{invariance}. To avoid confusion, that
#' terminology is not used in \strong{GGMncv}. This is because
#' these tests assume invariance or the null is \emph{true}, and thus
#' can only be used to detect differences. Hence, it would be incorrect
#' to suggest networks are the same, or evidence for invariance,
#' by merely failing to reject the null hypothesis
#' \insertCite{williams_null}{GGMncv}.
#'
#' For the defaults, Jensen-Shannon divergence is a symmetrized version
#' of Kullback-Leibler divergence (the average of both directions).
#'
#' \strong{Computational Speed}
#'
#' This implementation has two key features that should make it
#' scale to larger networks: (1) parallel computation and (2) the
#' \code{R} package \strong{glassoFast} is used under the hood
#' (as opposed to \strong{glasso}). CPU (time) comparisons are
#' provided in \insertCite{sustik2012glassofast;textual}{GGMncv}.
#'
#' \strong{Non-regularized}
#'
#' Non-regularized can be implemented by setting \code{lambda = 0}. Note
#' this is provided to \code{\link{ggmncv}} via \code{...}.
#'
#' @references
#' \insertAllCited{}
#'
#' @return A list of class \code{nct}, including the following
#'
#' \itemize{
#'
#' \item \code{glstr_pvalue}: Global strength p-value.
#'
#' \item \code{sse_pvalue}: Sum of square error p-value.
#'
#' \item \code{jsd_pvalue}: Jensen-Shannon divergence p-value.
#'
#' \item \code{max_pvalue}: Maximum difference p-value.
#'
#' \item \code{glstr_obs}: Global strength observed.
#'
#' \item \code{sse_obs}: Sum of square error observed.
#'
#' \item \code{jsd_obs}: Jensen-Shannon divergence observed.
#'
#' \item \code{max_obs}: Maximum difference observed.
#'
#' \item \code{glstr_perm}: Global strength permutations.
#'
#' \item \code{sse_perm}: Sum of square error permutations.
#'
#' \item \code{jsd_perm}: Jensen-Shannon divergence permutations.
#'
#' \item \code{max_perm}: Maximum difference permutations.
#'
#'
#' }
#'
#' For user-defined functions, i.e., those provided to \code{FUN},
#' the function name is pasted to \code{_pvalue}, \code{_obs}, and
#' \code{_perm}.
#'
#' @export
#'
#' @importFrom pbapply pboptions
#' @importFrom pbapply pblapply
#' @importFrom parallel makeCluster
#' @importFrom parallel stopCluster
#' @importFrom parallel parLapply
#'
#' @examples
#' \donttest{
#' # generate network
#' main <- gen_net(p = 10)
#'
#' # assume groups are equal
#' y1 <- MASS::mvrnorm(n = 500,
#' mu = rep(0, 10),
#' Sigma = main$cors)
#'
#' y2 <- MASS::mvrnorm(n = 500,
#' mu = rep(0, 10),
#' Sigma = main$cors)
#'
#' compare_ggms <- nct(y1, y2, iter = 500,
#' progress = FALSE)
#'
#' compare_ggms
#'
#' # custom function
#' # note: x & y are partial correlation networks
#'
#' # correlation
#' Correlation <- function(x, y){
#' cor(x[upper.tri(x)], y[upper.tri(y)])
#' }
#'
#' compare_ggms <- nct(y1, y2,iter = 100,
#' FUN = Correlation,
#' progress = FALSE)
#'
#' compare_ggms
#'
#' # correlation and strength
#'
#' Strength <- function(x, y){
#' NetworkToolbox::strength(x) - NetworkToolbox::strength(y)
#' }
#'
#' compare_ggms <- nct(y1, y2, iter = 100,
#' FUN = list(Correlation = Correlation,
#' Strength = Strength),
#' progress = FALSE)
#'
#' compare_ggms
#' }
nct <- function(Y_g1, Y_g2,
iter = 1000,
desparsify = TRUE,
method = "pearson",
FUN = NULL,
cores = 1,
progress = TRUE,
update_progress = 4,
...){
p_g1 <- ncol(Y_g1)
p_g2 <- ncol(Y_g2)
if(p_g1 != p_g2){
stop("Yg1 and Yg2 must have the same number of nodes (columns)")
}
I_p <- diag(p_g1)
n_g1 <- nrow(Y_g1)
n_g2 <- nrow(Y_g2)
n_total <- n_g1 + n_g2
if(desparsify){
fit_g1 <-
desparsify(
ggmncv(
R = cor(Y_g1, method = method),
n = n_g1, lambda = sqrt(log(p_g1)/n_g1),
progress = FALSE
)
)
fit_g2 <-
desparsify(
ggmncv(
R = cor(Y_g2, method = method),
n = n_g2, lambda = sqrt(log(p_g2)/n_g2),
progress = FALSE
)
)
} else {
fit_g1 <- ggmncv(R = cor(Y_g1, method = method),
n = n_g1, ...,
progress = FALSE)
fit_g2 <- ggmncv(R = cor(Y_g2, method = method),
n = n_g2, ...,
progress = FALSE)
}
pcor_g1 <- fit_g1$P
pcor_g2 <- fit_g2$P
# observed: global strength
glstr_obs_g1 <- sum(abs(pcor_g1[upper.tri(I_p)]))
glstr_obs_g2 <- sum(abs(pcor_g2[upper.tri(I_p)]))
glstr_obs <- abs(glstr_obs_g1 - glstr_obs_g2)
# observed: SSE
sse_obs <- sum((pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)])^2)
# observed: jensen shannon distance
jsd_obs <- jsd(fit_g1$Theta, fit_g2$Theta)
# observed: max diff
max_obs <- max(abs(pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)]))
obs <- list(glstr_obs = glstr_obs,
sse_obs = sse_obs,
jsd_obs = jsd_obs,
max_obs = max_obs)
n_seq <- seq_len(n_total)
stacked_data <- rbind(Y_g1, Y_g2)
if(!is.null(FUN)){
if(length(FUN) == 1 & is(FUN, "list")){
stop("one function in `FUN` cannot be a list.")
}
if(is(FUN, "function")){
obs$fun_obs <- list(FUN(pcor_g1, pcor_g2))
} else if (is(FUN, "list")){
obs$fun_obs <- lapply(FUN, function(f) f(pcor_g1, pcor_g2))
} else {
stop("custom not supported. must be a function or list of functions")
}
}
if(cores == 1){
if(progress){
pb <- utils::txtProgressBar(min = 0, max = iter, style = 3)
}
iter_results <- lapply(X = 1:iter, function(x){
perm_g1 <- sample(1:n_total, size = n_g1, replace = FALSE)
Y_g1_perm <- stacked_data[perm_g1,]
Y_g2_perm <- stacked_data[n_seq[-perm_g1],]
if(desparsify){
fit_g1 <-
desparsify(
ggmncv(
R = cor(Y_g1_perm, method = method),
n = n_g1, lambda = sqrt(log(p_g1)/n_g1),
progress = FALSE
)
)
fit_g2 <-
desparsify(
ggmncv(
R = cor(Y_g2_perm, method = method),
n = n_g2, lambda = sqrt(log(p_g2)/n_g2),
progress = FALSE
)
)
} else {
fit_g1 <- ggmncv(R = cor(Y_g1_perm, method = method),
n = n_g1, ...,
progress = FALSE)
fit_g2 <- ggmncv(R = cor(Y_g2_perm, method = method),
n = n_g2, ...,
progress = FALSE)
}
pcor_g1 <- fit_g1$P
pcor_g2 <- fit_g2$P
# observed: global strength
glstr_perm_g1 <- sum(abs(pcor_g1[upper.tri(I_p)]))
glstr_perm_g2 <- sum(abs(pcor_g2[upper.tri(I_p)]))
glstr_diff_perm <- abs(glstr_perm_g1 - glstr_perm_g2)
# observed: SSE
sse_perm <- sum((pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)])^2)
# observed: jensen shannon distance
jsd_perm <- jsd(fit_g1$Theta, fit_g2$Theta)
# observed: max diff
max_diff_perm <- max(abs(pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)]))
returned_obj <- list(glstr_perm = glstr_diff_perm,
sse_perm = sse_perm,
jsd_perm = jsd_perm,
max_perm = max_diff_perm)
if(!is.null(FUN)){
if(is(FUN, "function")){
returned_obj$fun_perm <- FUN(pcor_g1, pcor_g2)
} else if (is(FUN, "list")){
returned_obj$fun_perm <- lapply(FUN, function(f) f(pcor_g1, pcor_g2))
} else {
stop("custom not supported. must be a function or list of functions")
}
}
if(progress){
utils::setTxtProgressBar(pb, x)
}
return(returned_obj)
})
} else {
cl <- parallel::makeCluster(cores)
if(progress){
pbapply::pboptions(nout = update_progress)
iter_results <- pbapply::pblapply(X = 1:iter, function(x){
perm_g1 <- sample(1:n_total, size = n_g1, replace = FALSE)
Y_g1_perm <- stacked_data[perm_g1,]
Y_g2_perm <- stacked_data[n_seq[-perm_g1],]
if(desparsify){
fit_g1 <-
desparsify(
ggmncv(
R = cor(Y_g1_perm, method = method),
n = n_g1, lambda = sqrt(log(p_g1)/n_g1),
progress = FALSE
)
)
fit_g2 <-
desparsify(
ggmncv(
R = cor(Y_g2_perm, method = method),
n = n_g2, lambda = sqrt(log(p_g2)/n_g2),
progress = FALSE
)
)
} else {
fit_g1 <- ggmncv(R = cor(Y_g1_perm, method = method),
n = n_g1, ...,
progress = FALSE)
fit_g2 <- ggmncv(R = cor(Y_g2_perm, method = method),
n = n_g2, ...,
progress = FALSE)
}
pcor_g1 <- fit_g1$P
pcor_g2 <- fit_g2$P
# observed: global strength
glstr_perm_g1 <- sum(abs(pcor_g1[upper.tri(I_p)]))
glstr_perm_g2 <- sum(abs(pcor_g2[upper.tri(I_p)]))
glstr_diff_perm <- abs(glstr_perm_g1 - glstr_perm_g2)
# observed: SSE
sse_perm <- sum((pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)])^2)
# observed: jensen shannon distance
jsd_perm <- jsd(fit_g1$Theta, fit_g2$Theta)
# observed: max diff
max_diff_perm <- max(abs(pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)]))
returned_obj <- list(glstr_perm = glstr_diff_perm,
sse_perm = sse_perm,
jsd_perm = jsd_perm,
max_perm = max_diff_perm)
if(!is.null(FUN)){
if(is(FUN, "function")){
returned_obj$fun_perm <- FUN(pcor_g1, pcor_g2)
} else if (is(FUN, "list")){
returned_obj$fun_perm <- lapply(FUN, function(f) f(pcor_g1, pcor_g2))
} else {
stop("custom not supported. must be a function or list of functions")
}
}
return(returned_obj)
}, cl = cl)
} else {
iter_results <- parallel::parLapply(X = 1:iter, function(x){
perm_g1 <- sample(1:n_total, size = n_g1, replace = FALSE)
Y_g1_perm <- stacked_data[perm_g1,]
Y_g2_perm <- stacked_data[n_seq[-perm_g1],]
if(desparsify){
fit_g1 <-
desparsify(
ggmncv(
R = cor(Y_g1_perm, method = method),
n = n_g1, lambda = sqrt(log(p_g1)/n_g1),
progress = FALSE
)
)
fit_g2 <-
desparsify(
ggmncv(
R = cor(Y_g2_perm, method = method),
n = n_g2, lambda = sqrt(log(p_g2)/n_g2),
progress = FALSE
)
)
} else {
fit_g1 <- ggmncv(R = cor(Y_g1_perm, method = method),
n = n_g1, ...,
progress = FALSE)
fit_g2 <- ggmncv(R = cor(Y_g2_perm, method = method),
n = n_g2, ...,
progress = FALSE)
}
pcor_g1 <- fit_g1$P
pcor_g2 <- fit_g2$P
# observed: global strength
glstr_perm_g1 <- sum(abs(pcor_g1[upper.tri(I_p)]))
glstr_perm_g2 <- sum(abs(pcor_g2[upper.tri(I_p)]))
glstr_diff_perm <- abs(glstr_perm_g1 - glstr_perm_g2)
# observed: SSE
sse_perm <- sum((pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)])^2)
# observed: jensen shannon distance
jsd_perm <- jsd(fit_g1$Theta, fit_g2$Theta)
# observed: max diff
max_diff_perm <- max(abs(pcor_g1[upper.tri(I_p)] - pcor_g2[upper.tri(I_p)]))
returned_obj <- list(glstr_perm = glstr_diff_perm,
sse_perm = sse_perm,
jsd_perm = jsd_perm,
max_perm = max_diff_perm)
if(!is.null(FUN)){
if(is(FUN, "function")){
returned_obj$fun_perm <- FUN(pcor_g1, pcor_g2)
} else if (is(FUN, "list")){
returned_obj$fun_perm <- lapply(FUN, function(f) f(pcor_g1, pcor_g2))
} else {
stop("custom not supported. must be a function or list of functions")
}
}
return(returned_obj)
}, cl = cl)
}
stopCluster(cl)
}
custom_results <- list()
perm_list <- list()
if(!is.null(FUN)){
if (length(FUN) == 1) {
if (length(obs$fun_obs[[1]]) == 1) {
perm_list[[1]] <-
sapply(iter_results, "[[", "fun_perm", simplify = TRUE)
custom_results[[1]] <-
mean(as.numeric(perm_list[[1]]) >= obs$fun_obs[[1]])
names(perm_list) <-
paste0(as.character(substitute(FUN)), "_perm")
names(custom_results) <-
paste0(as.character(substitute(FUN)), "_pvalue")
names(obs$fun_obs) <-
paste0(as.character(substitute(FUN)), "_obs")
} else {
perm_list[[1]] <- t(sapply(iter_results, "[[", "fun_perm",
simplify = TRUE))
custom_results[[1]] <- colMeans(apply(perm_list[[1]],
MARGIN = 2, function(x) {
abs(x) >= abs(obs$fun_obs[[1]])
}))
names(custom_results) <-
paste0(as.character(substitute(FUN)), "_pvalue")
names(perm_list) <-
paste0(as.character(substitute(FUN)), "_perm")
names(obs$fun_obs) <-
paste0(as.character(substitute(FUN)), "_obs")
} # end more than 1 test stat
} else {
for(i in seq_along(FUN)){
res_i <-
do.call(rbind,
t(sapply(iter_results, "[[", "fun_perm",
simplify = TRUE))[,i])
perm_list[[i]] <- res_i
if (length(obs$fun_obs[[i]]) > 1) {
custom_results[[i]] <- colMeans(apply(res_i,
MARGIN = 2, function(x) {
abs(x) >= abs(obs$fun_obs[[i]])
}))
} else {
custom_results[[i]] <- mean(res_i >= obs$fun_obs[[i]])
}
}
names(custom_results) <- paste0(names(FUN), "_pvalue")
names(perm_list) <- paste(names(FUN), "_perm")
names(obs$fun_obs) <- paste0(names(obs$fun_obs), "_obs")
}
}
default_names <- names(iter_results[[1]])[1:4]
perm_list_default <- lapply(default_names, function(x) {
results_x <- sapply(iter_results, "[[", x)
return(results_x)
})
default_results <- lapply(1:4, function(x) {
results_x <- mean(perm_list_default[[x]] >= obs[[x]])
return(results_x)
})
names(default_results) <- paste0(sub(pattern = "\\_.*",
replacement = "",
x = default_names), "_pvalue")
names(perm_list_default) <- default_names
returned_object <- c(default_results,
custom_results,
obs[1:4],
obs$fun_obs,
perm_list_default,
perm_list)
class(returned_object) <- "nct"
return(returned_object)
}
#' Print \code{nct} Objects
#'
#' @param x An object of class \code{nct}
#' @param ... Currently ignored.
#' @export
print.nct <- function(x, ...){
check_defaults <- length(grep("_pvalue", names(x)))
cat("Network Comparsion Test\n")
cat("(GGMncv Edition)\n")
cat("----\n")
cat("Maximum Difference\n")
cat("p-value:", x$max_pvalue, "\n")
cat("----\n")
cat("Global Strength\n")
cat("p-value:", x$glstr_pvalue, "\n")
cat("----\n")
cat("Sum of Squared Error\n")
cat("p-value:", x$sse_pvalue, "\n")
cat("----\n")
cat("Jensen-Shannon divergence\n")
cat("p-value:", x$jsd_pvalue, "\n")
cat("----\n")
if (check_defaults > 4) {
cat("note: compute p-values manually for custom tests. see vignettes.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.