#' The fixed degree sequence model (fdsm) for backbone probabilities
#'
#' `fdsm` computes the proportion of generated edges above
#' or below the observed value using the fixed degree sequence model.
#' Once computed, use \code{\link{backbone.extract}} to
#' return the backbone matrix for a given alpha value.
#'
#' @param B graph: An unweighted bipartite graph object of class matrix, sparse matrix, igraph, edgelist, or network object.
#' Any rows and columns of the associated bipartite matrix that contain only zeros are automatically removed before computations.
#' @param trials numeric: The number of bipartite graphs generated to approximate the edge weight distribution.
#' @param dyad vector length 2: two row entries i,j. Saves each value of the i-th row and j-th column in each projected B* matrix. This is useful for visualizing an example of the empirical null edge weight distribution generated by the model. These correspond to the row and column indices of a cell in the projected matrix, and can be written as their string row names or as numeric values.
#' @param progress Boolean: If \link[utils]{txtProgressBar} should be used to measure progress
#' @param ... optional arguments
#'
#' @details During each iteration, fdsm computes a new B* matrix using the \link{curveball} algorithm. This is a random bipartite matrix with the same row and column sums as the original matrix B.
#' If a value is supplied for the dyad parameter, when the B* matrix is projected (multiplied by its transpose), the value in the corresponding row and column will be saved.
#' This allows the user to see the distribution of the edge weights for desired row and column.
#' @details The "backbone" S3 class object returned is composed of two matrices, a summary dataframe and (if specified) a 'dyad_values' vector.
#' @return backbone, a list(positive, negative, dyad_values, summary). Here
#' `positive` is a matrix of proportion of times each entry of the projected matrix B is above the corresponding entry in the generated projection,
#' `negative` is a matrix of proportion of times each entry of the projected matrix B is below the corresponding entry in the generated projection,
#' `dyad_values` is a list of edge weight for i,j in each generated projection, and
#' `summary` is a data frame summary of the inputted matrix and the model used including: class, model name, number of rows, number of columns, and running time.
#'
#' @references fixed degree sequence model: {Zweig, Katharina Anna, and Michael Kaufmann. 2011. “A Systematic Approach to the One-Mode Projection of Bipartite Graphs.” Social Network Analysis and Mining 1 (3): 187–218. \doi{10.1007/s13278-011-0021-0}}
#' @references curveball algorithm: {Strona, Giovanni, Domenico Nappo, Francesco Boccacci, Simone Fattorini, and Jesus San-Miguel-Ayanz. 2014. “A Fast and Unbiased Procedure to Randomize Ecological Binary Matrices with Fixed Row and Column Totals.” Nature Communications 5 (June). Nature Publishing Group: 4114. \doi{10.1038/ncomms5114}}
#'
#' @export
#'
#' @examples
#' fdsm_props <- fdsm(davis, trials = 100, dyad=c(3,6))
fdsm <- function(B,
trials = 1000,
dyad = NULL,
progress = FALSE,
...){
#### Argument Checks ####
if (trials < 0) {stop("trials must be a positive integer")}
if ((trials > 1) & (trials%%1!=0)) {stop("trials must be decimal < 1, or a positive integer")}
#### Class Conversion ####
convert <- tomatrix(B)
class <- convert$summary$class
B <- convert$G
if (convert$summary$weighted==TRUE){stop("Graph must be unweighted.")}
if (convert$summary$bipartite==FALSE){
warning("This object is being treated as a bipartite network.")
convert$summary$bipartite <- TRUE
}
#### Bipartite Projection ####
P <- tcrossprod(B)
### Create Positive and Negative Matrices to hold backbone ###
Positive <- matrix(0, nrow(P), ncol(P))
Negative <- matrix(0, nrow(P), ncol(P))
### Dyad save ###
edge_weights <- numeric(trials)
if (length(dyad) > 0){
if (class(dyad[1]) != "numeric"){
vec <- match(c(dyad[1], dyad[2]), rownames(B))
} else {
vec <- dyad
}
}
#### Define Dimensional Variables ####
RC=dim(B)
R=RC[1]
C=RC[2]
hp=list()
#TODO: adjust this address appropriately
sourceCpp("/Users/karlgodard/RProjects/Fastball-SourceCpp/backbone/R/fastball.cpp")
#### Mark Locations of One's ####
for (row in 1:R) {hp[[row]]=(which(B[row,]==1))}
#### Build Null Models ####
for (i in 1:trials){
#Algorithm credit to: Strona, G., Nappo, D., Boccacci, F., Fattorini, S., San-Miguel-Ayanz, J. (2014). A fast and unbiased procedure to randomize ecological binary matrices with fixed row and column totals. Nature Communications, 5, 4114
### Use fastball (an optimized curveball implementation) to create an FDSM hp_star ###
Bstar = fastball_cpp(hp, c(R, C))
### Construct Pstar from Bstar ###
Pstar <- tcrossprod(Bstar)
### Start estimation timer; print message ###
if (i == 1) {
start.time <- Sys.time()
}
### Check whether Pstar edge is larger/smaller than P edge ###
Positive <- Positive + (Pstar >= P)+0
Negative <- Negative + (Pstar <= P)+0
### Save Dyad of P ###
if (length(dyad) > 0){
edge_weights[i] <- Pstar[vec[1], vec[2]]
}
### Report estimated running time, update progress bar ###
if (i==10){
end.time <- Sys.time()
est = (round(difftime(end.time, start.time, units = "auto"), 2) * (trials/10))
message("Estimated time to complete is ", est, " " , units(est), " for ", trials, " trials")
if (progress == "TRUE"){
pb <- utils::txtProgressBar(min = 0, max = trials, style = 3)
}
}
if ((progress == "TRUE") & (i>=10)) {utils::setTxtProgressBar(pb, i)}
} #end for loop
if (progress == "TRUE"){close(pb)}
#### Find Proportions ####
### Proporition of greater than expected and less than expected ###
Positive <- (Positive/trials)
Negative <- (Negative/trials)
rownames(Positive) <- rownames(B)
colnames(Positive) <- rownames(B)
rownames(Negative) <- rownames(B)
colnames(Negative) <- rownames(B)
### Insert NAs for p-values along diagonal
diag(Positive) <- NA
diag(Negative) <- NA
#### Compile Summary ####
r <- rowSums(B)
c <- colSums(B)
a <- c("Model", "Input Class", "Bipartite", "Symmetric", "Weighted", "Number of Rows", "Number of Columns")
b <- c("Fixed Degree Sequence Model", convert$summary$class, convert$summary$bipartite, convert$summary$symmetric, convert$summary$weighted, dim(B)[1], dim(B)[2])
model.summary <- data.frame(a,b, row.names = 1)
colnames(model.summary)<-"Model Summary"
#### Return Backbone Object ####
if (length(dyad) > 0){
bb <- list(positive = Positive, negative = Negative, dyad_values = edge_weights, summary = model.summary)
class(bb) <- "backbone"
return(bb)
} else {
bb <- list(positive = Positive, negative = Negative, summary = model.summary)
class(bb) <- "backbone"
return(bb)
}
} #end fdsm function
#' curveball algorithm
#'
#' @param M matrix
#'
#' @return rm, a matrix with same row sums and column sums as M, but randomized 0/1 entries.
#' @export
#'
#' @references Algorithm and R implementation: Strona, Giovanni, Domenico Nappo, Francesco Boccacci, Simone Fattorini, and Jesus San-Miguel-Ayanz. 2014. “A Fast and Unbiased Procedure to Randomize Ecological Binary Matrices with Fixed Row and Column Totals.” Nature Communications 5 (June). Nature Publishing Group: 4114. \doi{10.1038/ncomms5114}
#' @examples
#' curveball(davis)
curveball<-function(M){
#### Define Variables ####
RC=dim(M)
R=RC[1]
C=RC[2]
hp=list()
#### Mark Locations of One's ####
for (row in 1:dim(M)[1]) {hp[[row]]=(which(M[row,]==1))}
l_hp=length(hp)
#### Curveball Swaps ####
for (rep in 1:(5*l_hp)){
AB=sample(1:l_hp,2)
a=hp[[AB[1]]]
b=hp[[AB[2]]]
ab=intersect(a,b)
l_ab=length(ab)
l_a=length(a)
l_b=length(b)
if ((l_ab %in% c(l_a,l_b))==F){
tot=setdiff(c(a,b),ab)
l_tot=length(tot)
tot=sample(tot, l_tot, replace = FALSE, prob = NULL)
L=l_a-l_ab
hp[[AB[1]]] = c(ab,tot[1:L])
hp[[AB[2]]] = c(ab,tot[(L+1):l_tot])}
}
#### Define and Return Random Matrix ####
rm=matrix(0,R,C)
for (row in 1:R){rm[row,hp[[row]]]=1}
rm
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.