R/graph.takahashi.test.R
In statGraph: Statistical Methods for Graphs

Documented in graph.takahashi.test

#' Test for the Jensen-Shannon Divergence Between Graphs
#'
#' \code{graph.takahashi.test} tests whether two sets of graphs were generated by the same
#' random graph model.
#' This bootstrap test is based on the Jensen-Shannon (JS) divergence between
#' graphs.
#'
#' Given two lists of graphs, \code{Graphs1} and \code{Graphs2}, \code{graph.takahashi.test} tests H0: 'JS
#' divergence between \code{Graphs1} and \code{Graphs2} is \code{0}' against H1: 'JS divergence between
#' \code{Graphs1} and \code{Graphs2} is larger than \code{0}'.
#'
#' @param Graphs1 a list of undirected Graphs.
#' If each graph has the  attribute \code{eigenvalues} containing its
#' eigenvalues , such values will be used to
#' compute their spectral density.
#'
#' @param Graphs2 a list of undirected Graphs.
#' If each graph has the  attribute \code{eigenvalues} containing its
#' eigenvalues , such values will be used to
#' compute their spectral density.
#'
#' @param maxBoot integer indicating the number of bootstrap resamplings (default \code{1000}).
#'
#' @param dist string indicating if you want to use the 'JS' (default) , 'L1' or 'L2'
#' distances. 'JS' means Jensen-Shannon divergence.
#'
#' @param ... Other relevant parameters for \code{\link{graph.spectral.density}}.
#'
#' @return A list with class 'htest' containing the following components:
#' \item{\code{statistic:}}{ the value of the Jensen-Shannon divergence (default), L1 or L2 between 'Graphs1' and 'Graphs2'.}
#' \item{\code{p.value:}}{ the p-value of the test.}
#' \item{\code{method:}}{ a string indicating the used method.}
#' \item{\code{data.name:}}{ a string with the data's name(s).}
#'
#' @keywords graph_comparison
#'
#' @references
#' Takahashi, D. Y., Sato, J. R., Ferreira, C. E. and Fujita A. (2012)
#' Discriminating Different Classes of Biological Networks by Analyzing the
#' Graph Spectra  Distribution. _PLoS ONE_, *7*, e49949.
#' doi:10.1371/journal.pone.0049949.
#'
#' Silverman, B. W. (1986) _Density Estimation_.  London: Chapman and Hall.
#'
#' Sturges, H. A. The Choice of a Class Interval. _J. Am. Statist. Assoc._,
#' *21*, 65-66.
#'
#' Sheather, S. J. and Jones, M. C. (1991). A reliable data-based bandwidth
#' selection method for kernel density estimation.
#' _Journal of the Royal Statistical Society series B_, 53, 683-690.
#' http://www.jstor.org/stable/2345597.
#'
#' @examples
#' set.seed(1)
#' G1 <- G2 <- list()
#' for (i in 1:20) {
#'   G1[[i]] <- igraph::sample_gnp(n=50, p=0.500)
#' }
#' for (i in 1:20) {
#'   G2[[i]] <- igraph::sample_gnp(n=50, p=0.512)
#' }
#' result <- graph.takahashi.test(G1, G2, maxBoot=500)
#' result
#'
#' @import methods
#' @export
graph.takahashi.test <- function(Graphs1, Graphs2, maxBoot = 1000, dist = "JS", ...) {
    if (!valid.input(Graphs1, level = 1) || !valid.input(Graphs2, level = 1)) {
        stop("The inputs should be a list of graphs")
    }
    data.name <- paste(deparse(substitute(Graphs1)), "and", deparse(substitute(Graphs2)))

    # obtain support for the spectral densities
    from <- min(get.smallest.eigenvalue(Graphs1), get.smallest.eigenvalue(Graphs2))
    to <- max(get.largest.eigenvalue(Graphs1), get.largest.eigenvalue(Graphs2))

    # compute spectral densities for each group of graphs
    Graphs1 <- set.list.spectral.density(Graphs1, from = from, to = to, ...)
    Graphs2 <- set.list.spectral.density(Graphs2, from = from, to = to, ...)

    all_Graphs <- c(Graphs1, Graphs2)

    # mean spectral density of each graph list
    mean_Graphs1 <- get.mean.spectral.density(Graphs1)
    mean_Graphs2 <- get.mean.spectral.density(Graphs2)

    # get number of Graphs in each list
    n1 <- length(Graphs1)
    n2 <- length(Graphs2)

    results <- vector(length = maxBoot)
    ngraphs <- length(all_Graphs)
    result <- distance(mean_Graphs1, mean_Graphs2, dist = dist)
    for (i in 1:maxBoot) {
        Graphs_sample_1 <- sample(all_Graphs, n1, replace = TRUE)
        Graphs_sample_2 <- sample(all_Graphs, n2, replace = TRUE)
        den_1 <- get.mean.spectral.density(Graphs_sample_1)
        den_2 <- get.mean.spectral.density(Graphs_sample_2)
        results[i] <- distance(den_1, den_2, dist = dist)
    }
    pvalue <- (sum(results >= result))/(maxBoot + 1)
    ###
    method_info <- paste0("Jensen-Shannon Divergence Between Graphs with\n       simulated p-value (based on ", maxBoot, " replicates)")
    statistic <- result
    names(statistic) <- dist
    rval <- list(statistic = statistic, p.value = pvalue, method = method_info, data.name = data.name)
    class(rval) <- "htest"
    return(rval)
}