DF21: SNA functions and dataset for Data Forensics 2021-2022

Documented in vuln_efficiency

#' Vulnerability: efficiency
#' 
#' The vulnerability of a network, in terms of the change in 
#' necessary path lengths.
#' 
#' When a vertex is removed from the network, some other vertices 
#' might not be able to reach each other as efficiently 
#' as before anymore. For example, when A-B-C, the removal of 
#' vertex B requires A and C to connect through other vertices 
#' than through B, which might require more steps.
#' 
#' The "efficiency" of a graph can be defined in many ways, 
#' here we adopt two of the common definitions: the sum 
#' of the geodesic lengths between each pair of vertices or 
#' the harmonic mean of these lengths. 
#' The latter is the default in this function.
#' 
#' Actually, the measure captures the **lack** of efficiency: 
#' the more efficient the network, the lower should be the sum or the 
#' harmonic mean of the geodesic lengths. 
#' As a result, the efficiency score might more appropriately be 
#' called "inefficiency," but we use the term "efficiency" here for 
#' compatibility with the literature.
#' 
#' This algorithm calculates the change in efficiency due 
#' to vertex \code{i} as the increase in summed geodesic 
#' lengths (across the whole network)--or the harmonic mean 
#' of these lengths--after vertex \code{i} 
#' has been removed.
#' Note that this measure is not comparable between graphs of 
#' different sizes; it is therefore mainly useful to find 
#' those vertices that have the most (or, perhaps, least)
#' critical influence on the graph.
#' 
#' The output includes a column \code{vuln_prop} that 
#' is the ratio of the harmonic mean of (or: total) geodesic lengths after 
#' removing the vertex and that before removing it (
#' correcting for the paths to and from \code{i}). 
#' For example, if this ratio is 1.5, then the efficiency 
#' (without the paths to and from \code{i}) 
#' decreases by 50% after removing vertex \code{i}.
#' A score of 1 means that there is no effect on the 
#' efficiency of the graph when the vertex is removed
#' from the graph.
#' 
#' When a graph is fully connected, the geodesic between 
#' disconnected vertices is not defined and is commonly 
#' reported as infinite. 
#' Using infinite distances renders efficiency calculations 
#' largely useless, since it makes the summed geodesic lengths 
#' infinite as well and can even lead to an infinite reduction 
#' in summed geodesic lengths when a disconnected vertex is 
#' removed from the graph.
#' 
#' Therefore, the default in this function is to set the distance 
#' to a disconnected vertex to \code{n + 1}, with \code{n} equal 
#' to the number of vertices in the original graph (ie. the "size" of
#' the graph before removing any vertices). 
#' This is \code{disconnected == "size"}. 
#' If a weighted graph is used (and the \code{weight} argument has been 
#' specified appropriately), the distance 
#' to a disconnected vertex is set to to \code{n*maximum_weight + 1}. 
#' Of course, this is a somewhat arbitrary choice.
#' 
#' Alternatively, \code{"max"} sets it to 
#' "maximum distance + 1" (of the graph before 
#' removing any vertices). 
#' If an edge weight has been specified, it will be used here as well.
#' 
#' The argument \code{disconnected == "infinite"} 
#' maintains the \code{Inf} value for the geodesic lengths of paths 
#' between disconnected vertices.
#' 
#' Alternatively, one can also provide a numeric value 
#' for \code{disconnected}. In that case, the \code{Inf} values 
#' will replaced by this number.
#' 
#' When a vector of length larger than 1 is specified for 
#' \code{disconnected}, the first value will be used only.
#' 
#' @note This function is inspired by the
#'  \code{\link[NetSwan]{swan_efficiency}} function.
#' Our function is more robust and a lot more useful (e.g., the 
#' original function was useless for not-fully connected graphs.)
#' Our function yields the same SCORE as \code{swan_efficiency} 
#' through 
#' \code{vuln_efficiency(g, method = "sum", weight = NULL, disconnected = "infinite")}.
#' 
#' @param g object if class \code{igraph}
#' @param method Either "harmonic" (the default) or "sum". 
#' Denotes which method to use for the measure of efficiency.
#' @param mode Character constant, gives whether the shortest paths to or from 
#' the vertices should be calculated for directed graphs. If \code{out} then the 
#' shortest paths in the direction of the edges is taken, if \code{in} then 
#' paths will go against the direction of the edge. 
#' If \code{all}, the default, then the corresponding undirected graph will be 
#' used, ie. edge direction is not taken into account. 
#' This argument is ignored for undirected graphs.
#' @param weight Possibly a numeric vector giving edge weights. 
#' If this is \code{NA}--which is the default--
#' then no weights are used 
#' (even if the graph has a \code{weight} attribute).
#' If set to \code{NULL} and the graph 
#' has a \code{weight} edge attribute, then this edge attribute is used 
#' in the calculation of the distances. 
#' @param disconnected how to deal with disconnected vertices. 
#' See Details. This is irrelevant for fully connected graphs 
#' and then does not need to be set. 
#' The default is \code{disconnected == "size"}.
#' @param digits number of decimals for \code{vuln_prop}.
#'
#' @return
#' \code{data.frame}
#' 
#' @export
vuln_efficiency <- function(g, 
                            method = c("harmonic", "sum"),
                            mode = c("all", "out", "in"),
                            weight = NA,
                            disconnected = c("size", "max", "infinite"),
                            digits = 3) {
  
  if (!inherits(g, "igraph")) {
    stop("'g' should be an igraph object")
  }
  
  # let hier op het gebruik van &&, 
  # dat is nodig omdat is.na(NULL) gelijk is aan logical(0) ipv FALSE
  if (!is.null(weight) && !is.matrix(weight) && !is.na(weight)) {
    stop("'weight' can only be NA, NULL, or a numeric matrix.")
  }
  
  if (is.matrix(weight) & !is.numeric(weight)) {
    stop("If you specify a matrix for 'weight', it needs to be numeric.")
  }
  
  if (is.null(weight)) {
    attrs <- igraph::get.edge.attribute(g)
    if (!"weight" %in% names(attrs)) {
      stop("You specified NULL for 'weight', but there is no 'weight' edge attribute.")
    }
  }
  
  disconnected <- disconnected[1]
  method <- method[1]
  mode <- mode[1]
  n <- length(igraph::V(g))
  fin <- prop <- matrix(ncol = 1, nrow = n, 0)
  dt <- igraph::distances(g, mode = mode, weight = weight)
  
  if (disconnected == "size") {
    if (is.na(weight)) {   # no weight is used
      dt[dt == Inf] <- n + 1
    } else if (is.null(weight)) {  # weight edge attr is used
        max_weight <- max(attrs$weight, na.rm = TRUE)
        dt[dt == Inf] <- n*max_weight + 1
    } else if (is.matrix(weight)) {  # weight als matrix gespecificeerd
        max_weight <- max(weight, na.rm = TRUE)
        dt[dt == Inf] <- n*max_weight + 1
    }
  } else if (disconnected == "max") {
    maks <- max(dt[dt != Inf])
    dt[dt == Inf] <- maks
  } else if (is.numeric(disconnected)) {
    dt[dt == Inf] <- disconnected
  }
  
  if (method == "harmonic") {
    tot <- harmonic_matrix(dt)
  } else if (method == "sum") {
    tot <- sum(dt)
  } else {
    stop("Error in the argument for 'method'")
  }
  
  for (i in 1:n) {
    g2 <- g
    g2 <- igraph::delete_vertices(g2, i)
    dt2 <- igraph::distances(g2, mode = mode, weight = weight)
    
    if (disconnected == "size") {
      if (is.na(weight)) {   # no weight is used
        dt2[dt2 == Inf] <- n + 1
      } else if (is.null(weight) | is.matrix(weight)) { 
        # de toepasselijke max_weight is hierboven al berekend
        dt2[dt2 == Inf] <- n*max_weight + 1
      }
    } else if (disconnected == "max") {
      dt2[dt2 == Inf] <- maks
    } else if (is.numeric(disconnected)) {
      dt2[dt2 == Inf] <- disconnected
    }   
    

    if (method == "harmonic") {
      tot2 <- harmonic_matrix(dt2)
      # correct for the distances of the removed actor in the original graph
      tot_before <- harmonic_matrix(dt[-i, -i])
    } else if (method == "sum") {
      tot2 = sum(dt2)
      # correct for the distances of the removed actor in the original graph
      tot_before <- (tot - sum(dt[i, ]) - sum(dt[, i]))
    }
    
    # tot_before <- (tot - sum(dt[i, ]) - sum(dt[, i]))
    fin[i] <- tot2 - tot_before
    prop[i] <- tot2/tot_before
  }  # end for loop
  
  if (igraph::is.named(g)) {
    fin <- data.frame(name = igraph::V(g)$name,
                      vulnerability = fin,
                      vuln_prop = prop)
  } else {
    fin <- data.frame(vulnerability = fin,
                      vuln_prop = prop)
  }
  fin$vuln_prop <- round(fin$vuln_prop, digits = digits)
  return(fin)
}


harmonic_matrix <- function(x) {
  diag(x) <- NA
  1/mean(1/x, na.rm = TRUE)
}