R/enr.R
In GGMnonreg: Non-Regularized Gaussian Graphical Models

Documented in enr plot.enr

#' Expected Network Replicability
#'
#' @description Compute expected network replicability in any number of
#'              replication attempts. This works for any kind of
#'              correlation, assuming it is possible to obtain the
#'              standard error (analytically or with a bootstrap).
#'
#' @param net True network of dimensions \emph{p} by \emph{p}.
#'
#' @param n Integer. The samples size, assumed equal in the replication
#'          attempts.
#'
#' @param alpha The desired significance level (defaults to \code{0.05}). Note that
#'              1 - alpha corresponds to specificity.
#'
#' @param replications Integer. The desired number of replications.
#'
#' @param type Character string. Which type of correlation coefficients
#'               to be computed. Options include \code{"pearson"} (default)
#'               and \code{"spearman"}.
#'
#' @references
#' \insertAllCited{}
#'
#' @return An list of class \code{enr}, including a variety of information used
#'         by other functions (e.g., to plot the results).
#'
#' @note This method was introduced in \insertCite{williams2020learning;textual}{GGMnonreg}.
#'
#' @export
#'
#' @examples
#' \donttest{
#' # correlations
#' cors <- cor(GGMnonreg::ptsd)
#'
#' # inverse
#' inv <- solve(cors)
#'
#' # partials
#' pcors <-  -cov2cor(inv)
#'
#' # set values to zero
#' pcors <- ifelse(abs(pcors) < 0.05, 0, pcors)
#'
#' fit_enr <- enr(net = pcors, n = 500, replications = 2)
#'
#'
#'# intuition for the method
#'
#' # location of edges
#' index <- which(pcors[upper.tri(diag(20))] != 0)
#'
#' # convert network into correlation matrix
#' diag(pcors) <- 1
#' cors_new <- corpcor::pcor2cor(pcors)
#'
#' # replicated edges
#' R <- NA
#'
#' # increase 100 to, say, 5,000
#' for(i in 1:100){
#'
#'   # two replications
#'   Y1 <- MASS::mvrnorm(500, rep(0, 20), cors_new)
#'   Y2 <- MASS::mvrnorm(500, rep(0, 20), cors_new)
#'
#'   # estimate network 1
#'   fit1 <- ggm_inference(Y1, boot = FALSE)
#'
#'   # estimate network 2
#'   fit2 <- ggm_inference(Y2, boot = FALSE)
#'
#'   # number of replicated edges (detected in both networks)
#'   R[i] <- sum(
#'     rowSums(
#'       cbind(fit1$adj[upper.tri(diag(20))][index],
#'             fit2$adj[upper.tri(diag(20))][index])
#'     ) == 2)
#' }
#'
#'
#' # combine simulation and analytic
#' cbind.data.frame(
#'   data.frame(simulation = sapply(seq(0, 0.9, 0.1), function(x) {
#'     mean(R > round(length(index) * x) )
#'   })),
#'   data.frame(analytic = round(fit_enr$cdf, 3))
#' )
#'
#' # average replicability (simulation)
#' mean(R / length(index))
#'
#' # average replicability (analytic)
#' fit_enr$ave_pwr
#'
#' }
#'
#' @importFrom poibin ppoibin
enr <- function(net, n, alpha = 0.05, replications = 2, type = "pearson"){

  # variables
  p <- ncol(net)

  # control variables
  c <- p - 2

  # off diagonals
  pcs <- net[upper.tri(net)]

  # which are edges
  which_nonzero <- which(pcs != 0)

  # how many non zeroes
  n_nonzero <- length(which_nonzero)

  qs <- round(seq(0, 0.9, 0.1) * length(which_nonzero))
    # quantile(1:length(which_nonzero),
                        # probs = seq(0, 0.9, 0.1)))

  # probability of success
  p_s <- power_z(pcs[which_nonzero],
                 n = n,
                 c = c,
                 type = type,
                 alpha = alpha)^replications

  # average power
  ave_pwr <- mean(p_s)

  #var
  var_pwr <- sum((1 - p_s) * p_s)

  # CDF greater
  cdf <- 1 - poibin::ppoibin(qs, p_s)

  returned_object <- list(ave_pwr = ave_pwr,
                          cdf = cdf,
                          p_s = p_s,
                          p = p,
                          n_nonzero = n_nonzero,
                          n = n,
                          replications = replications,
                          var_pwr = var_pwr,
                          type = type)

  class(returned_object) <- c("ggmnonreg", "enr")
  returned_object
}

print_enr <- function(x,...){
  cat("Average Replicability:", round(x$ave_pwr, 2), "\n")
  cat("Average Number of Edges:",
      round(round(x$ave_pwr, 2) * x$n_nonzero),
      paste0( "(SD = ", round(sqrt(x$var_pwr), 2), ")"), "\n\n")
  cat("----\n\n")
  cat("Cumulative Probability:" , "\n\n")

  dat <- data.frame(prop = seq(0, .90, .10),
                    round(x$n_nonzero *  seq(0, 0.9, 0.1)),
                    # round(quantile(1:x$n_nonzero,
                                   # probs = seq(0, 0.9, 0.1))),
                    prob = round(x$cdf, 2))

  colnames(dat) <- c("prop.edges", "edges", "Pr(R > prop.edges)")
  print(dat, row.names = F, right = T)

  cat("----\n")
  cat(paste0("Pr(R > prop.edges):\n", "probability of replicating more than the\n",
  "correpsonding proportion (and number) of edges"))
  }


#' @title Plot \code{enr} Objects
#'
#' @description Plot the probability mass function for ENR.
#'
#' @param x An object of class \code{enr}.
#'
#' @param iter Integer. How many draws from the
#'             Poisson-binomial distribution (defaults to 1,000)?
#'
#' @param fill Which color to fill the density?
#'
#' @param alpha Numeric (between 0 and 1). The transparency
#'              for the density.
#'
#' @param ... Currently ignored
#'
#' @return An object of class \code{ggplot}
#'
#' @export
#'
#' @examples
#' \donttest{
#' # correlations
#' cors <- cor(GGMnonreg::ptsd)
#'
#' # inverse
#' inv <- solve(cors)
#'
#' # partials
#' pcors <-  -cov2cor(inv)
#'
#' # set values to zero
#' pcors <- ifelse(abs(pcors) < 0.05, 0, pcors )
#'
#' est <- enr(net = pcors, n = 500, replications = 2)
#'
#' # plot
#' plot(est)
#'}
#'
#' @importFrom ggplot2 ggplot aes geom_vline geom_density scale_x_continuous xlab scale_y_continuous
#'
plot.enr <- function(x, iter = 100000,
                     fill = "#009E73",
                     alpha = 0.5, ...){

  # random variable
  y <- poibin::rpoibin(iter, pp = x$p_s)

  # data frame
  dat <- data.frame(y = y)

  # plot
  ggplot(dat, aes(x = y )) +
    geom_vline(xintercept = mean(y),
               linetype = "dotted") +
    geom_density(adjust = 2,
                 fill =  fill, alpha = alpha) +
    scale_x_continuous(limits = c(min(y), max(y) ),
                       breaks = seq(min(y), max(y), length.out = 5),
                       labels = paste0(round(seq(min(y), max(y),
                                                  length.out = 5) / x$n_nonzero, 2)*100, "%")) +
  xlab("Replicated Edges") +
  scale_y_continuous(expand = c(0,0))

}