#' Extract backbone using the Ordinal Stochastic Degree Sequence Model
#'
#' `osdsm` extracts the backbone of a bipartite projection using the Ordinal Stochastic Degree Sequence Model.
#'
#' @param B An ordinally weighted bipartite graph, as: (1) an incidence matrix in the form of a matrix or sparse \code{\link{Matrix}}; (2) an edgelist in the form of a three-column dataframe; (3) an \code{\link{igraph}} object.
#' @param trials integer: the number of bipartite graphs generated to approximate the edge weight distribution. If NULL, the number of trials is selected based on `alpha` (see details)
#' @param alpha real: significance level of hypothesis test(s)
#' @param missing.as.zero boolean: should missing edges be treated as edges with zero weight and tested for significance
#' @param signed boolean: TRUE for a signed backbone, FALSE for a binary backbone (see details)
#' @param mtc string: type of Multiple Test Correction to be applied; can be any method allowed by \code{\link{p.adjust}}.
#' @param class string: the class of the returned backbone graph, one of c("original", "matrix", "Matrix", "igraph", "edgelist").
#' If "original", the backbone graph returned is of the same class as `B`.
#' @param narrative boolean: TRUE if suggested text & citations should be displayed.
#' @param progress boolean: TRUE if the progress of Monte Carlo trials should be displayed.
#' @param ... optional arguments
#'
#' @details
#' The `osdsm` function compares an edge's observed weight in the projection `B*t(B)` to the distribution of weights
#' expected in a projection obtained from a random bipartite network where both the rows and the columns contain
#' approximately the same number of each value. The edges in `B` must be integers, and are assumed to represent an
#' ordinal-level measure such as a Likert scale that starts at 0.
#'
#' When `signed = FALSE`, a one-tailed test (is the weight stronger?) is performed for each edge. The resulting backbone
#' contains edges whose weights are significantly *stronger* than expected in the null model. When `signed = TRUE`, a
#' two-tailed test (is the weight stronger or weaker?) is performed for each edge. The resulting backbone contains
#' positive edges for those whose weights are significantly *stronger*, and negative edges for those whose weights are
#' significantly *weaker*, than expected in the null model.
#'
#' The p-values used to evaluate the statistical significance of each edge are computed using Monte Carlo methods. The number of
#' `trials` performed affects the precision of these p-values. This precision impacts the confidence that a given edge's p-value
#' is less than the desired `alpha` level, and therefore represents a statistically significant edge that should be retained in
#' the backbone. When `trials = NULL`, [trials.needed()] is used to estimate the required number of trials to evaluate the
#' statistical significance of an edges' p-values.
#'
#' @return
#' If `alpha` != NULL: Binary or signed backbone graph of class `class`.
#'
#' If `alpha` == NULL: An S3 backbone object containing (1) the weighted graph as a matrix, (2) upper-tail p-values as a
#' matrix, (3, if `signed = TRUE`) lower-tail p-values as a matrix, (4, if present) node attributes as a dataframe, and
#' (5) several properties of the original graph and backbone model, from which a backbone can subsequently be extracted
#' using [backbone.extract()].
#'
#' @references package: {Neal, Z. P. (2022). backbone: An R Package to Extract Network Backbones. *PLOS ONE, 17*, e0269137. \doi{10.1371/journal.pone.0269137}}
#' @references osdsm: {Neal, Z. P. (2017). Well connected compared to what? Rethinking frames of reference in world city network research. *Environment and Planning A, 49*, 2859-2877. \doi{10.1177/0308518X16631339}}
#'
#' @export
#'
#' @examples
#' #A weighted binary bipartite network of 20 agents & 50 artifacts; agents form two communities
#' B <- rbind(cbind(matrix(sample(0:3, 250, replace = TRUE, prob = ((1:4)^2)),10),
#' matrix(sample(0:3, 250, replace = TRUE, prob = ((4:1)^2)),10)),
#' cbind(matrix(sample(0:3, 250, replace = TRUE, prob = ((4:1)^2)),10),
#' matrix(sample(0:3, 250, replace = TRUE, prob = ((1:4)^2)),10)))
#'
#' P <- B%*%t(B) #An ordinary weighted projection...
#' plot(igraph::graph_from_adjacency_matrix(P, mode = "undirected",
#' weighted = TRUE, diag = FALSE)) #...is a dense hairball
#'
#' bb <- osdsm(B, alpha = 0.05, narrative = TRUE, #An oSDSM backbone...
#' class = "igraph", trials = 100)
#' plot(bb) #...is sparse with clear communities
osdsm <- function(B, alpha = 0.05, trials = NULL, missing.as.zero = FALSE, signed = FALSE, mtc = "none", class = "original", narrative = FALSE, progress = TRUE, ...){
#### Class Conversion and Argument Checks ####
convert <- tomatrix(B)
if (class == "original") {class <- convert$summary$class}
attribs <- convert$attribs
B <- convert$G
if (convert$summary$weighted==FALSE){stop("Graph must be weighted.")}
if (convert$summary$bipartite==FALSE){
warning("This object is being treated as a bipartite network.")
convert$summary$bipartite <- TRUE
}
if (!is.null(trials)) {if (trials < 1 | trials%%1!=0) {stop("trials must be a positive integer")}}
if (is.null(trials) & is.null(alpha)) {stop("If trials = NULL, then alpha must be specified")}
if (!is.null(alpha)) {if (alpha < 0 | alpha > .5) {stop("alpha must be between 0 and 0.5")}}
weights <- sort(unique(as.vector(B))) #Unique edge weights
if (sum(weights%%1)!=0) {stop("Edge weights must be integers that reflect values on an ordinal scale.")}
if (any(weights < 0)) {stop("Edge weights must be positive integers that reflect values on an ordinal scale.")}
if (!(0 %in% weights)) {warning("Although no edges have weight = 0, the weight scale is assumed to start at 0.")}
weights <- c(0:max(B)) #If weights are valid, this is the full scale
#### Bipartite Projection ####
P <- tcrossprod(B)
### Create Positive and Negative Matrices to hold backbone ###
Pupper <- matrix(0, nrow(P), ncol(P))
if (signed) {Plower <- matrix(0, nrow(P), ncol(P))}
#### Compute probabilities for SDSM ####
#Vectorize the bipartite data
A <- data.frame(value = as.vector(B)) #Edge weight
A$rowid <- rep(1:nrow(B), times=ncol(B)) #Row index
A$colid <- rep(1:ncol(B), each=nrow(B)) #Column index
#Compute conditional probabilities using logistic regression (see Neal, 2017)
for (value in 1:max(weights)) { #For each edge weight > 0
dat <- data.frame(y = (A$value>=value)*1, x1 = stats::ave(A$value>=value,A$rowid,FUN=sum), x2 = stats::ave(A$value>=value,A$colid, FUN=sum))
fitted <- suppressWarnings(stats::glm(y ~ x1 + x2, data = dat[which(A$value>=(value-1)),], family = "binomial"))
A <- cbind(A, stats::predict(fitted, newdata = dat, type = "response"))
}
#Transform into unconditional probabilities (see Neal, 2017)
for (value in c(1:max(weights),0)) { #For each edge weight value, doing weight = 0 last
if (value == 1) {ucondp <- A[,4]}
if (value > 1) {ucondp <- apply(A[,4:(value+3)], 1, prod)}
if (value != 0 & value != max(weights)) {ucondp <- ucondp * (1 - A[,(value+4)])}
if (value == 0) {ucondp <- 1 - rowSums(A[,(4+max(weights)):ncol(A)])}
A <- cbind(A, ucondp)
}
A <- A[,c(1:3,(4+max(weights)):ncol(A))]
colnames(A) <- c("value", "rowid", "colid", paste0("p", c(1:max(weights),0)))
A$rand <- NA
#### Build null models ####
if (is.null(trials)) {trials <- trials.needed(M = P, alpha = alpha, signed = signed, missing.as.zero = missing.as.zero, mtc = mtc, ...)}
if (progress) {message(paste0("Constructing empirical edgewise p-values using ", trials, " trials -" ))}
if (progress) {pb <- utils::txtProgressBar(min = 0, max = trials, style = 3)} #Start progress bar
for (i in 1:trials){
#Use probabilities to create an SDSM Bstar
A$rand <- apply(X = A[,4:(max(weights)+4)], MARGIN = 1, FUN = function(x) sample(c(1:max(weights),0), size = 1, replace= TRUE, prob = x))
Bstar <- matrix(A$rand, nrow=nrow(B), ncol=ncol(B)) #Convert to matrix
#Construct Pstar from Bstar, check whether Pstar edge is larger/smaller than P edge
Pstar <- tcrossprod(Bstar)
Pupper <- Pupper + (Pstar > P)+0
if (signed) {Plower <- Plower + (Pstar < P)+0}
#Increment progress bar
if (progress) {utils::setTxtProgressBar(pb, i)}
} #end for loop
if (progress) {close(pb)} #End progress bar
#### Compute p-values ####
Pupper <- (Pupper/trials)
if (signed) {Plower <- (Plower/trials)}
#### If missing edges should *not* be treated as having zero weight, remove p-value and do not consider for backbone ####
if (!missing.as.zero) {
Pupper[P == 0] <- NA
if (signed) {Plower[P == 0] <- NA}
}
#### Create Backbone Object ####
bb <- list(G = P, #Preliminary backbone object
Pupper = Pupper,
model = "osdsm",
agents = nrow(B),
artifacts = ncol(B),
weighted = FALSE,
bipartite = TRUE,
symmetric = TRUE,
class = class,
trials = NULL)
if (signed) {bb <- append(bb, list(Plower = Plower))} #Add lower-tail values, if requested
if (!is.null(attribs)) {bb <- append(bb, list(attribs = attribs))} #Add node attributes, if present
class(bb) <- "backbone"
#### Return result ####
if (is.null(alpha)) {return(bb)} #Return backbone object if `alpha` is not specified
if (!is.null(alpha)) { #Otherwise, return extracted backbone (and show narrative text if requested)
backbone <- backbone.extract(bb, alpha = alpha, signed = signed, mtc = mtc, class = class, narrative = narrative)
return(backbone)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.