## -----------------------------------------------------------------------
##
## IGraph R package
## Copyright (C) 2014 Gabor Csardi <csardi.gabor@gmail.com>
## 334 Harvard street, Cambridge, MA 02139 USA
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
## 02110-1301 USA
##
## -----------------------------------------------------------------------
#' Compute local scan statistics on graphs
#'
#' The scan statistic is a summary of the locality statistics that is
#' computed from the local neighborhood of each vertex. The
#' \code{local_scan} function computes the local statistics for each vertex
#' for a given neighborhood size and the statistic function.
#'
#' See the given reference below for the details on the local scan
#' statistics.
#'
#' \code{local_scan} calculates exact local scan statistics.
#'
#' If \code{graph.them} is \code{NULL}, then \code{local_scan} computes the
#' \sQuote{us} variant of the scan statistics. Otherwise,
#' \code{graph.them} should be an igraph object and the \sQuote{them}
#' variant is computed using \code{graph.us} to extract the neighborhood
#' information, and applying \code{FUN} on these neighborhoods in
#' \code{graph.them}.
#'
#' @param graph.us,graph An igraph object, the graph for which the scan
#' statistics will be computed
#' @param graph.them An igraph object or \code{NULL}, if not \code{NULL},
#' then the \sQuote{them} statistics is computed, i.e. the neighborhoods
#' calculated from \code{graph.us} are evaluated on \code{graph.them}.
#' @param k An integer scalar, the size of the local neighborhood for each
#' vertex. Should be non-negative.
#' @param FUN Character, a function name, or a function object itself, for
#' computing the local statistic in each neighborhood. If \code{NULL}(the
#' default value), \code{ecount} is used for unweighted graphs (if
#' \code{weighted=FALSE}) and a function that computes the sum of edge
#' weights is used for weighted graphs (if \code{weighted=TRUE}). This
#' argument is ignored if \code{k} is zero.
#' @param weighted Logical scalar, TRUE if the edge weights should be used
#' for computation of the scan statistic. If TRUE, the graph should be
#' weighted. Note that this argument is ignored if \code{FUN} is not
#' \code{NULL}, \code{"ecount"} and \code{"sumweights"}.
#' @param mode Character scalar, the kind of neighborhoods to use for the
#' calculation. One of \sQuote{\code{out}}, \sQuote{\code{in}},
#' \sQuote{\code{all}} or \sQuote{\code{total}}. This argument is ignored
#' for undirected graphs.
#' @param neighborhoods A list of neighborhoods, one for each vertex, or
#' \code{NULL}. If it is not \code{NULL}, then the function is evaluated on
#' the induced subgraphs specified by these neighborhoods.
#'
#' In theory this could be useful if the same \code{graph.us} graph is used
#' for multiple \code{graph.them} arguments. Then the neighborhoods can be
#' calculated on \code{graph.us} and used with multiple graphs. In
#' practice, this is currently slower than simply using \code{graph.them}
#' multiple times.
#' @param \dots Arguments passed to \code{FUN}, the function that computes
#' the local statistics.
#' @return For \code{local_scan} typically a numeric vector containing the
#' computed local statistics for each vertex. In general a list or vector
#' of objects, as returned by \code{FUN}.
#'
#' @references Priebe, C. E., Conroy, J. M., Marchette, D. J., Park,
#' Y. (2005). Scan Statistics on Enron Graphs. \emph{Computational and
#' Mathematical Organization Theory}.
#'
#' @family scan statistics
#' @export
#' @examples
#' pair <- sample_correlated_gnp_pair(n = 10^3, corr = 0.8, p = 0.1)
#' local_0_us <- local_scan(graph.us = pair$graph1, k = 0)
#' local_1_us <- local_scan(graph.us = pair$graph1, k = 1)
#'
#' local_0_them <- local_scan(graph.us = pair$graph1,
#' graph.them = pair$graph2, k = 0)
#' local_1_them <- local_scan(graph.us = pair$graph1,
#' graph.them = pair$graph2, k = 1)
#'
#' Neigh_1 <- neighborhood(pair$graph1, order = 1)
#' local_1_them_nhood <- local_scan(graph.us = pair$graph1,
#' graph.them = pair$graph2,
#' neighborhoods = Neigh_1)
local_scan <- function(graph.us, graph.them=NULL, k=1, FUN=NULL,
weighted=FALSE, mode=c("out", "in", "all"),
neighborhoods=NULL, ...) {
## Must be igraph object
stopifnot(is.igraph(graph.us))
## Must be NULL or igraph object
stopifnot(is.null(graph.them) || is.igraph(graph.them))
## If given, number of vertices must match
stopifnot(is.null(graph.them) || vcount(graph.them) == vcount(graph.us))
## k must be non-negative integer
stopifnot(length(k)==1, k >= 0, as.integer(k) == k)
## Must be NULL or a function
stopifnot(is.null(FUN) || is.function(FUN) ||
(is.character(FUN) && length(FUN) == 1))
## Logical scalar
stopifnot(is.logical(weighted), length(weighted )== 1)
## If weighted, then the graph(s) must be weighted
stopifnot(!weighted || (is.weighted(graph.us) && (is.null(graph.them) ||
is.weighted(graph.them))))
## Check if 'neighborhoods' makes sense
if (!is.null(neighborhoods)) {
stopifnot(is.list(neighborhoods))
stopifnot(length(neighborhoods) == vcount(graph.us))
}
if (!is.null(neighborhoods) && k==0) {
warning("`neighborhoods' ignored for k=0")
neighborhoods <- NULL
}
## Check mode argument
mode <- igraph.match.arg(mode)
cmode <- switch(mode, out = 1, `in` = 2, all = 3, total = 3)
sumweights <- function(g) sum(E(g)$weight)
if (is.null(FUN)) { FUN <- if (weighted) "sumweights" else "ecount" }
res <- if (is.null(graph.them)) {
if (!is.null(neighborhoods)) {
if (is.character(FUN) && FUN %in% c("ecount", "sumweights")) {
neighborhoods <- lapply(neighborhoods, function(x) {
as.integer(x)-1L
})
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_neighborhood_ecount, graph.us,
if (weighted) as.numeric(E(graph.us)$weight) else NULL,
neighborhoods)
} else {
sapply(lapply(neighborhoods, induced.subgraph, graph=graph.us),
FUN, ...)
}
} else {
## scan-0
if (k == 0) {
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_0, graph.us,
if (weighted) as.numeric(E(graph.us)$weight) else NULL, cmode)
## scan-1, ecount
} else if (k==1 && is.character(FUN) &&
FUN %in% c("ecount", "sumweights")) {
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_1_ecount, graph.us,
if (weighted) as.numeric(E(graph.us)$weight) else NULL, cmode)
## scan-k, ecount
} else if (is.character(FUN) && FUN %in% c("ecount", "sumweights")) {
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_k_ecount, graph.us, as.integer(k),
if (weighted) as.numeric(E(graph.us)$weight) else NULL, cmode)
## General
} else {
sapply(graph.neighborhood(graph.us, order=k, V(graph.us), mode=mode),
FUN, ...)
}
}
} else {
if (!is.null(neighborhoods)) {
neighborhoods <- lapply(neighborhoods, as.vector)
if (is.character(FUN) && FUN %in% c("ecount", "wumweights")) {
neighborhoods <- lapply(neighborhoods, function(x) {
as.integer(x)-1L
})
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_neighborhood_ecount, graph.them,
if (weighted) as.numeric(E(graph.them)$weight) else NULL,
neighborhoods)
} else {
sapply(lapply(neighborhoods, induced.subgraph, graph=graph.them),
FUN, ...)
}
} else {
## scan-0
if (k == 0) {
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_0_them, graph.us, graph.them,
if (weighted) as.numeric(E(graph.them)$weight) else NULL,
cmode)
## scan-1, ecount
} else if (k==1 && is.character(FUN) &&
FUN %in% c("ecount", "sumweights")) {
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_1_ecount_them, graph.us, graph.them,
if (weighted) as.numeric(E(graph.them)$weight) else NULL,
cmode)
## scan-k, ecount
} else if (is.character(FUN) && FUN %in% c("ecount", "sumweights")) {
on.exit(.Call(C_R_igraph_finalizer))
.Call(C_R_igraph_local_scan_k_ecount_them, graph.us, graph.them,
as.integer(k),
if (weighted) as.numeric(E(graph.them)$weight) else NULL,
cmode)
## general case
} else {
sapply(V(graph.us), function(x) {
vei <- neighborhood(graph.us, order=k, nodes=x, mode=mode)[[1]]
if (!is.function(FUN)) { FUN <- getFunction(FUN, where=environment()) }
FUN(induced.subgraph(graph.them, vei), ...)
})
}
}
}
res <- as.numeric(res)
if (igraph_opt("add.vertex.names") && is_named(graph.us)) {
names(res) <- V(graph.us)$name
}
res
}
#' Scan statistics on a time series of graphs
#'
#' Calculate scan statistics on a time series of graphs.
#' This is done by calculating the local scan statistics for
#' each graph and each vertex, and then normalizing across the
#' vertices and across the time steps.
#'
#' @param graphs A list of igraph graph objects. They must be all directed
#' or all undirected and they must have the same number of vertices.
#' @param tau The number of previous time steps to consider for the
#' time-dependent normalization for individual vertices. In other words,
#' the current locality statistics of each vertex will be compared to this
#' many previous time steps of the same vertex to decide whether it is
#' significantly larger.
#' @param ell The number of previous time steps to consider
#' for the aggregated scan statistics. This is essentially a smoothing
#' parameter.
#' @param locality Whether to calculate the \sQuote{us} or \sQuote{them}
#' statistics.
#' @param ... Extra arguments are passed to \code{\link{local_scan}}.
#' @return A list with entries:
#' \item{stat}{The scan statistics in each time step. It is \code{NA}
#' for the initial \code{tau + ell} time steps.}
#' \item{arg_max_v}{The (numeric) vertex ids for the vertex with
#' the largest locality statistics, at each time step. It is \code{NA}
#' for the initial \code{tau + ell} time steps.}
#'
#' @family scan statistics
#' @export
#' @examples
#' ## Generate a bunch of SBMs, with the last one being different
#' num_t <- 20
#' block_sizes <- c(10, 5, 5)
#' p_ij <- list(p = 0.1, h = 0.9, q = 0.9)
#'
#' P0 <- matrix(p_ij$p, 3, 3)
#' P0[2, 2] <- p_ij$h
#' PA <- P0
#' PA[3, 3] <- p_ij$q
#' num_v <- sum(block_sizes)
#'
#' tsg <- replicate(num_t - 1, P0, simplify = FALSE) %>%
#' append(list(PA)) %>%
#' lapply(sample_sbm, n = num_v, block.sizes = block_sizes, directed = TRUE)
#'
#' scan_stat(graphs = tsg, k = 1, tau = 4, ell = 2)
#' scan_stat(graphs = tsg, locality = "them", k = 1, tau = 4, ell = 2)
scan_stat <- function(graphs, tau = 1, ell = 0,
locality = c("us", "them"), ...) {
## List of igraph graphs, all have same directedness and
## weightedness
stopifnot(is.list(graphs),
length(graphs) > 0,
all(sapply(graphs, is_igraph)),
length(unique(sapply(graphs, is_directed))) == 1,
length(unique(sapply(graphs, gorder))) == 1)
## tau must the a non-negative integer
stopifnot(length(tau) == 1, tau >= 0, as.integer(tau) == tau)
## ell must the a non-negative integer
stopifnot(length(ell) == 1, ell >= 0, as.integer(ell) == ell)
locality <- igraph.match.arg(locality)
## number of time steps and number of vertices
maxTime = length(graphs)
nVertex = vcount(graphs[[1]])
if (locality == 'us') {
## Underlying locality stat is us
lstatPsi <- matrix(0, nrow = nVertex , ncol = maxTime)
for (i in 1:maxTime) {
## locality statistics \Psi over all vertices at t=i
lstatPsi[,i] <- local_scan(graphs[[i]], ...)
}
lstat <- lstatPsi
} else if (locality == 'them') {
## Underlying locality stat is \Phi
lstatPhi <- array(0, dim = c(nVertex, (tau + 1), maxTime))
for (i in 1:maxTime) {
if (i > tau) {
## graph to trace k-th order neighborhood
g <- graphs[[i]]
for (j in 0:tau) {
## locality statistics \Phi over all vertices with t=i and t'=i-tau+j
lstatPhi[, (j + 1), i] <- local_scan(
graph.us = graphs[[i]],
graph.them= graphs[[i - tau + j]],
...
)
}
}
}
lstat <- lstatPhi
}
## vertex-dependent and temporal normalization
scan_temp_norm(
scan_vertex_norm(lstat, tau),
tau,
ell
)
}
#' @importFrom stats sd
scan_vertex_norm <-function (input_stat, tau) {
if (is.matrix(input_stat)) {
n <- nrow(input_stat)
nbins <- ncol(input_stat)
nstat <- matrix(0, n, nbins)
for (i in 1:nbins) {
if (i > tau) {
if (tau == 0) {
nstat[,i] <- input_stat[, i]
} else {
muv <- apply(as.matrix(input_stat[, (i - tau):(i-1)]), 1, mean)
sdv <- apply(as.matrix(input_stat[, (i - tau):(i-1)]), 1, sd)
sdv[is.na(sdv)] <- 1
nstat[, i] <- (input_stat[, i] - muv) / pmax(sdv, 1)
}
}
}
} else {
dd <- dim(input_stat)
n <- dd[1]
nbins <- dd[3]
nstat <- matrix(0, n, nbins)
for (i in 1:nbins) {
if (i > tau) {
if (tau == 0) {
nstat[, i] <- input_stat[, (tau + 1), i]
} else {
muv <- apply(as.matrix(input_stat[, (1 : tau), i]), 1, mean)
sdv <- apply(as.matrix(input_stat[, (1 : tau), i]), 1, sd)
sdv[is.na(sdv)] <- 1
nstat[, i] <- (input_stat[, (tau + 1),i] - muv) / pmax(sdv, 1)
}
}
}
}
return(nstat)
}
#' @importFrom stats sd
scan_temp_norm <- function (stat, tau, ell) {
maxTime <- ncol(stat)
Mtilde <- apply(stat, 2, max)
argmaxV <- apply(stat, 2, which.max)
if (ell == 0) {
res <- list(stat = Mtilde, arg_max_v = argmaxV)
} else if(ell ==1 ) {
res <- list(stat = Mtilde - c(NA, Mtilde[-maxTime]), arg_max_v = argmaxV)
} else {
muMtilde <- rep(0, maxTime)
sdMtilde <- rep(1, maxTime)
for (i in (ell + 1):maxTime) {
muMtilde[i] <- mean(Mtilde[(i - ell):(i - 1)])
sdMtilde[i] <- sd(Mtilde[(i - ell):(i - 1)])
}
sstat <- (Mtilde - muMtilde) / pmax(sdMtilde, 1)
res <- list(stat = sstat, arg_max_v = argmaxV)
}
res$stat[seq_len(tau + ell)] <- NA
res$arg_max_v[seq_len(tau + ell)] <- NA
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.