Nothing
.onAttach <- function(lib,pkg) {
local_version <- utils::packageVersion("backbone")
packageStartupMessage(" ____ backbone v",local_version)
packageStartupMessage("| _ \\ Cite: Neal, Z. P. (2026). backbone: An R package to extract network backbones.")
packageStartupMessage("|#|_) | PLOS One, 21, e0349258. https://doi.org/10.1371/journal.pone.0349258")
packageStartupMessage("|# _ < ")
packageStartupMessage("|#|_) | Help: type vignette(\"backbone\"); email zpneal@msu.edu; github zpneal/backbone")
packageStartupMessage("|____/ Beta: type devtools::install_github(\"zpneal/backbone\", ref = \"devel\")")
}
#' Choose edges to retain in a backbone network
#'
#' @param p edgewise p-values generated by a backbone model
#' @param alpha real: significance level of hypothesis test(s)
#' @param mtc string: type of Multiple Test Correction to be applied; can be any method allowed by \code{\link{p.adjust}}.
#'
#' @return unweighted (possibly signed) adjacency matrix
#'
#' @noRd
.retain <- function(p, alpha, mtc){
#### Extract binary backbone (one-tailed test; only positively-weighed edges considered) ####
if (length(p)==1) {
diag(p$upper) <- NA #Eliminate p-values for loops; not relevant
if (mtc != "none") { #Adjust p-values for familywise error, if requested
if (isSymmetric(p$upper)) {p$upper[upper.tri(p$upper)] <- NA} #If undirected, ignore upper triangle
pvalues <- as.vector(p$upper) #Vector of p-values
pvalues <- stats::p.adjust(pvalues, method = mtc, n = sum(!is.na(pvalues))) #Adjust p-values, where number of comparisons is number of non-missing p-values
p$upper <- matrix(pvalues, nrow = nrow(p$upper), ncol = ncol(p$upper)) #Put adjusted p-values in original p-value matrix
if (all(is.na(p$upper[upper.tri(p$upper)]))) {p$upper[upper.tri(p$upper)] <- t(p$upper)[upper.tri(p$upper)]} #If upper triangle is missing, put it back
}
backbone <- (p$upper < alpha)*1 #Identify all significant edges, code as 1
backbone[which(is.na(backbone))] <- 0 #fill NAs (missing and non-significant edges) with 0s
}
#### Extract signed backbone (two-tailed test; all dyads considered) ####
if (length(p)==2) {
alpha <- alpha / 2 #Use two-tailed test
smaller_p <- pmin(p$upper,p$lower) #Find smaller p-value (upper-tail or lower-tail) for each edge
diag(smaller_p) <- NA
upper_tail <- (p$upper < p$lower) #Find tail of smaller p-value (TRUE if smaller p-value is in upper tail)
diag(upper_tail) <- NA
if (mtc != "none") { #Adjust p-values for familywise error, if requested
if (isSymmetric(smaller_p)) {smaller_p[upper.tri(smaller_p)] <- NA} #If undirected, ignore upper triangle
pvalues <- as.vector(smaller_p) #Vector of p-values
pvalues <- stats::p.adjust(pvalues, method = mtc, n = sum(!is.na(pvalues))) #Adjust p-values, where number of comparisons is number of non-missing p-values
smaller_p <- matrix(pvalues, nrow = nrow(smaller_p), ncol = ncol(smaller_p)) #Put adjusted p-values in original p-value matrix
if (all(is.na(smaller_p[upper.tri(smaller_p)]))) {smaller_p[upper.tri(smaller_p)] <- t(smaller_p)[upper.tri(smaller_p)]} #If upper triangle is missing, put it back
}
backbone <- (smaller_p < alpha)*1 #Identify all significant edges, code as 1
backbone[which(upper_tail==FALSE)] <- backbone[which(upper_tail==FALSE)] * -1 #Re-code edges significant in lower-tail as -1
backbone[which(is.na(backbone))] <- 0 #fill NAs (missing and non-significant edges) with 0s
}
return(backbone)
}
#' Computes the loglikelihood gradient for the \link{bicm} function
#'
#' @param x0 vector, probabilities given by current step in bicm function
#' @param args list, c(degree sequence of rows, degree sequence of cols, multiplicity of rows, multiplicity of columns)
#'
#' @return loglikelihood
#' @noRd
.loglikelihood_prime_bicm <- function(x0, args){
r_dseq_rows <- args[[1]]
r_dseq_cols <- args[[2]]
rows_multiplicity = args[[3]]
cols_multiplicity = args[[4]]
num_rows = length(r_dseq_rows)
num_cols = length(r_dseq_cols)
x = x0[1:num_rows]
y = x0[(num_rows+1):length(x0)]
rm <- rows_multiplicity*x
cm <- cols_multiplicity*y
denom <- outer(x,y)+1
a <- -rowSums(1/sweep(denom, MARGIN = 2, FUN = "/", STATS = cm))
b <- -colSums(rm/denom)
a <- a + (r_dseq_rows/x)
b <- b + (r_dseq_cols/y)
c <- c(a,b)
return(c)
}
#' Computes the loglikelihood hessian for the \link{bicm} function
#'
#' @param x0 vector, probabilities given by current step in bicm function
#' @param args list, c(degree sequence of rows, degree sequence of cols, multiplicity of rows, multiplicity of columns)
#'
#' @return hessian matrix
#' @noRd
.loglikelihood_hessian_diag_bicm <- function(x0, args){
r_dseq_rows <- args[[1]]
r_dseq_cols <- args[[2]]
rows_multiplicity = args[[3]]
cols_multiplicity = args[[4]]
num_rows = length(r_dseq_rows)
num_cols = length(r_dseq_cols)
x = x0[1:num_rows]
y = x0[(num_rows+1):length(x0)]
x2 = x**2
y2 = y**2
rm <- rows_multiplicity*x2
cm <- cols_multiplicity*y2
denom <- (outer(x,y)+1)**2
a <- rowSums(1/sweep(denom, MARGIN = 2, FUN = "/", STATS = cm))
b <- colSums(rm/denom)
a <- a - (r_dseq_rows/x2)
b <- b - (r_dseq_cols/y2)
c <- matrix(c(a,b), 1, (num_rows+num_cols))
return(c)
}
#' Computes the loglikelihood for the \link{bicm} function
#'
#' @param x0 vector, probabilities given by current step in bicm function
#' @param args list, c(degree sequence of rows, degree sequence of cols, multiplicity of rows, multiplicity of columns)
#'
#' @return loglikelihood, numeric
#' @noRd
.loglikelihood_bicm <- function(x0, args){
r_dseq_rows <- args[[1]] #originally 0, have to shift everything by 1
r_dseq_cols <- args[[2]]
rows_multiplicity = args[[3]]
cols_multiplicity = args[[4]]
num_rows = length(r_dseq_rows)
num_cols = length(r_dseq_cols)
x = x0[1:num_rows]
y = x0[(num_rows+1):length(x0)]
f = sum(rows_multiplicity*r_dseq_rows*log(x))+sum(cols_multiplicity*r_dseq_cols*log(y))
f = f-sum((rows_multiplicity%o%cols_multiplicity)*log(x%o%y+1))
return(f)
}
#' Bipartite Configuration Model
#'
#' `bicm` estimates cell probabilities under the bipartite configuration model
#'
#' @param M matrix: a binary matrix
#' @param fitness logical: FALSE returns a matrix of probabilities, TRUE returns a list of row and column fitnesses only
#' @param tol numeric, tolerance of algorithm
#' @param max_steps numeric, number of times to run .loglikelihood_prime_bicm algorithm
#' @param ... optional arguments
#'
#' @details
#' Given a binary matrix **M**, the Bipartite Configuration Model (BiCM; Saracco et. al. 2015) returns a valued matrix
#' **B** in which Bij is the *approximate* probability that Mij = 1 in the space of all binary matrices with
#' the same row and column marginals as **M**. The BiCM yields the closest approximations of the true probabilities
#' compared to other estimation methods (Neal et al., 2021), and is used to extract the backbone of a bipartite
#' projection with the stochastic degree sequence model.
#'
#' Matrix **M** is "conforming" if no rows and no columns contain only zeros or only ones. If **M** is conforming, then
#' `bicm()` is faster. Additionally, if `fitness = TRUE`, then `bicm()` returns a list of row and column fitnesses,
#' which requires less memory. Given the *i*th row's fitness Ri and the *j*th column's fitness Rj, the entry Bij in
#' the probability matrix can be computed as Ri x Rj/(1+(Ri x Rj)).
#'
#' Matrix **M** is "non-conforming" if any rows or any columns contain only zeros or only ones. If **M** is non-conforming,
#' then `bicm()` is slower and will only return a probability matrix.
#'
#' @references package: {Neal, Z. P. (2025). backbone: An R Package to Extract Network Backbones. CRAN. \doi{10.32614/CRAN.package.backbone}}
#' @references bicm: {Saracco, F., Di Clemente, R., Gabrielli, A., & Squartini, T. (2015). Randomizing bipartite networks: The case of the World Trade Web. *Scientific Reports, 5*, 10595. \doi{10.1038/srep10595}}
#'
#' @return a matrix of probabilities or a list of fitnesses
#' @export
#'
#' @examples
#' M <- matrix(c(0,0,1,0,1,0,1,0,1),3,3) #A binary matrix
#' bicm(M)
bicm <- function(M, fitness = FALSE, tol = 1e-8, max_steps = 200, ...){
#### Compute parameters ####
n_rows <- dim(M)[1]
n_cols <- dim(M)[2]
rows_deg <- rowSums(M)
cols_deg <- colSums(M)
nonconforming <- any(rows_deg==0) | any(rows_deg==n_cols) | any(cols_deg==0) | any(cols_deg==n_rows)
#### BiCM for conforming matrices ####
if (!nonconforming) {
## find the unique row and column degrees, and their multiplicity
row_t <- as.data.frame(table(rows_deg))
r_rows_deg <- as.numeric(as.vector(row_t$rows_deg)) #Unique row degree values
r_invert_rows_deg <- match((rows_deg), r_rows_deg) #Rows having each unique degree value
rows_multiplicity <- as.numeric(as.vector(row_t$Freq)) #Number of rows having each unique degree value
r_n_rows <- length(r_rows_deg) #Number of unique degree values
col_t <- as.data.frame(table(cols_deg))
r_cols_deg <- as.numeric(as.vector(col_t$cols_deg))
r_invert_cols_deg <- match((cols_deg), r_cols_deg)
cols_multiplicity <- as.numeric(as.vector(col_t$Freq))
r_n_edges <- sum(rows_deg)
## apply initial guess function
r_x = r_rows_deg/(sqrt(r_n_edges)+1)
r_y = r_cols_deg/(sqrt(r_n_edges)+1)
x = c(r_x,r_y)
## initialize problem
args = list(r_rows_deg, r_cols_deg, rows_multiplicity, cols_multiplicity)
n_steps <- 0
f_x <- .loglikelihood_prime_bicm(x,args)
norm <- norm(as.matrix(f_x), type = "F")
diff <- 1
## solve problem
while ((norm > tol) & (diff > tol) & (n_steps < max_steps)){
x_old <- x
B <- as.array(.loglikelihood_hessian_diag_bicm(x,args))
dx <- -f_x/B
alpha <- 1
i <- 0
while ((!(-.loglikelihood_bicm(x,args)>-.loglikelihood_bicm(x + alpha * dx,args)))&(i<50)){
alpha <- alpha*.5
i <- i+1
}#end while sdc
x <- x+alpha*dx
f_x <- .loglikelihood_prime_bicm(x,args)
norm <- norm(as.matrix(f_x), type = "F")
diff <- norm(as.matrix(x-x_old), type = "F")
n_steps <- n_steps + 1
}#end while
## format solution
sx <- as.vector(rep(0,n_rows))
sy <- as.vector(rep(0,n_cols))
row_fitness <- x[1,1:r_n_rows][r_invert_rows_deg]
col_fitness <- x[1,-(1:r_n_rows)][r_invert_cols_deg]
## return probability matrix or fitnesses
if (fitness) {
fitnesses <- list(rowfit = row_fitness, colfit = col_fitness)
return(fitnesses)
} else {
x <- outer(row_fitness, col_fitness)
probs <- x/(x+1)
return(probs)
}
}
#### BiCM for conforming matrices ####
if (nonconforming) {
## initialize probability matrix
probs <- matrix(0, nrow = n_rows, ncol = n_cols)
r_bipart <- M
good_rows <- seq(1,n_rows)
good_cols <- seq(1,n_cols)
zero_rows <- which(rows_deg == 0)
zero_cols <- which(cols_deg == 0)
full_rows <- which(rows_deg == n_cols)
full_cols <- which(cols_deg == n_rows)
num_full_rows <- 0
num_full_cols <- 0
## identify non-conforming rows and columns
while ((length(zero_rows)
+ length(zero_cols)
+ length(full_rows)
+ length(full_cols)) > 0){
if (length(zero_rows)>0){
r_bipart <- r_bipart[-zero_rows,]
good_rows <- good_rows[-zero_rows]
}
if (length(zero_cols)>0){
r_bipart <- r_bipart[, -zero_cols]
good_cols <- good_cols[-zero_cols]
}
full_rows = which(rowSums(r_bipart) == dim(r_bipart)[2])
full_cols = which(colSums(r_bipart) == dim(r_bipart)[1])
num_full_rows = num_full_rows + length(full_rows)
num_full_cols = num_full_cols + length(full_cols)
probs[good_rows[full_rows],good_cols] <- 1
probs[good_rows,good_cols[full_cols]] <- 1
if (length(full_rows)>0){
good_rows <- good_rows[-full_rows]
r_bipart <- r_bipart[-full_rows,]
}
if (length(full_cols)>0){
good_cols <- good_cols[-full_cols]
r_bipart <- r_bipart[,-full_cols]
}
zero_rows <- which(rowSums(r_bipart) == 0)
zero_cols <- which(colSums(r_bipart) == 0)
} #end while
nonfixed_rows = good_rows
fixed_rows = seq(1,n_rows)[-good_rows]
nonfixed_cols = good_cols
fixed_cols = seq(1,n_cols)[-good_cols]
## re-compute row and column degrees
if (length(nonfixed_rows)>0){
rows_deg <- rows_deg[nonfixed_rows]
}
if (length(nonfixed_cols)>0){
cols_deg <- cols_deg[nonfixed_cols]
}
## find the unique row and column degrees, and their multiplicity
row_t <- as.data.frame(table(rows_deg-num_full_cols))
r_rows_deg <- as.numeric(as.vector(row_t$Var1))
r_invert_rows_deg <- match((rows_deg-num_full_cols), r_rows_deg)
rows_multiplicity <- as.numeric(as.vector(row_t$Freq))
r_n_rows <- length(r_rows_deg)
col_t <- as.data.frame(table(cols_deg - num_full_rows))
r_cols_deg <- as.numeric(as.vector(col_t$Var1))
r_invert_cols_deg <- match((cols_deg-num_full_rows), r_cols_deg)
cols_multiplicity <- as.numeric(as.vector(col_t$Freq))
r_n_edges <- sum(rows_deg-num_full_cols)
## apply initial guess function
r_x = r_rows_deg/(sqrt(r_n_edges)+1)
r_y = r_cols_deg/(sqrt(r_n_edges)+1)
x = c(r_x,r_y)
## initialize problem
args = list(r_rows_deg, r_cols_deg, rows_multiplicity, cols_multiplicity)
n_steps <- 0
f_x <- .loglikelihood_prime_bicm(x,args)
norm <- norm(as.matrix(f_x), type = "F")
diff <- 1
## solve problem
while ((norm > tol) & (diff > tol) & (n_steps < max_steps)){
x_old <- x
B <- as.array(.loglikelihood_hessian_diag_bicm(x,args))
dx <- -f_x/B
alpha <- 1
i <- 0
while ((!(-.loglikelihood_bicm(x,args)>-.loglikelihood_bicm(x + alpha * dx,args)))&
(i<50)){
alpha <- alpha*.5
i <- i+1
}#end while sdc
x <- x+alpha*dx
f_x <- .loglikelihood_prime_bicm(x,args)
norm <- norm(as.matrix(f_x), type = "F")
diff <- norm(as.matrix(x-x_old), type = "F")
n_steps <- n_steps + 1
}#end while
## format solution
sx <- as.vector(rep(0,n_rows))
sy <- as.vector(rep(0,n_cols))
sr_xy <- x
sr_x <- sr_xy[1:r_n_rows]
sr_y <- sr_xy[-(1:r_n_rows)]
sx[nonfixed_rows] <- sr_x[r_invert_rows_deg]
sy[nonfixed_cols] <- sr_y[r_invert_cols_deg]
## return probability matrix
f <- function(x,y){
z <- x*y
z/(1+z)
}
r_probs <- outer(sx[nonfixed_rows],sy[nonfixed_cols],f)
probs[nonfixed_rows,nonfixed_cols] <- r_probs
return(probs)
}
}
#' Randomize a binary matrix using the fastball algorithm
#'
#' `fastball` randomizes a binary matrix, preserving the row and column sums
#'
#' @param M matrix: a binary matrix (see details)
#' @param trades integer: number of trades; the default is 5R trades (approx. mixing time)
#'
#' @return
#' matrix: A random binary matrix with same row sums and column sums as M.
#'
#' @details
#' Given a matrix `M`, `fastball` randomly samples a new matrix from the space of all matrices with the same row and column sums as `M`.
#'
#' @references fastball: {Godard, Karl and Neal, Zachary P. 2022. fastball: A fast algorithm to sample bipartite graphs with fixed degree sequences. *Journal of Complex Networks* \doi{10.1093/comnet/cnac049}}
#'
#' @export
#' @examples
#' M <- matrix(rbinom(200,1,0.5),10,20) #A random 10x20 binary matrix
#' Mrand <- fastball(M) #Random matrix with same row and column sums
fastball <- function(M, trades = 5 * nrow(M)) {
if (methods::is(M, "matrix")) {
#Convert matrix to adjacency list
if (as.numeric(R.Version()$major)>=4 & as.numeric(R.Version()$minor)>=1) {
L <- apply(M==1, 1, which, simplify = FALSE) #Slightly faster, requires R 4.1.0
} else {
L <- lapply(asplit(M == 1, 1), which) #Slightly slower, works for earlier version of R
}
Lrand <- fastball_cpp(L, trades)
Mrand <- matrix(0,nrow(M),ncol(M))
for (row in 1:nrow(Mrand)) {Mrand[row,Lrand[[row]]] <- 1L}
return(Mrand)
} else {return(fastball_cpp(M, 5 * length(M)))}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.