R/fdsm.R

Defines functions curveball fdsm

Documented in curveball fdsm

#' The fixed degree sequence model (fdsm) for backbone probabilities
#'
#' `fdsm` computes the proportion of generated edges above
#'     or below the observed value using the fixed degree sequence model.
#'     Once computed, use \code{\link{backbone.extract}} to
#'     return the backbone matrix for a given alpha value.
#'
#' @param B graph: An unweighted bipartite graph object of class matrix, sparse matrix, igraph, edgelist, or network object.
#'     Any rows and columns of the associated bipartite matrix that contain only zeros are automatically removed before computations.
#' @param trials numeric: The number of bipartite graphs generated to approximate the edge weight distribution.

#' @param dyad vector length 2: two row entries i,j. Saves each value of the i-th row and j-th column in each projected B* matrix. This is useful for visualizing an example of the empirical null edge weight distribution generated by the model. These correspond to the row and column indices of a cell in the projected matrix, and can be written as their string row names or as numeric values.
#' @param progress Boolean: If \link[utils]{txtProgressBar} should be used to measure progress
#' @param ... optional arguments
#'
#' @details During each iteration, fdsm computes a new B* matrix using the \link{curveball} algorithm. This is a random bipartite matrix with the same row and column sums as the original matrix B.
#'     If a value is supplied for the dyad parameter, when the B* matrix is projected (multiplied by its transpose), the value in the corresponding row and column will be saved.
#'     This allows the user to see the distribution of the edge weights for desired row and column.
#' @details The "backbone" S3 class object returned is composed of two matrices, a summary dataframe and (if specified) a 'dyad_values' vector.
#' @return backbone, a list(positive, negative, dyad_values, summary). Here
#'     `positive` is a matrix of proportion of times each entry of the projected matrix B is above the corresponding entry in the generated projection,
#'     `negative` is a matrix of proportion of times each entry of the projected matrix B is below the corresponding entry in the generated projection,
#'     `dyad_values` is a list of edge weight for i,j in each generated projection, and
#'     `summary` is a data frame summary of the inputted matrix and the model used including: class, model name, number of rows, number of columns, and running time.
#'
#' @references fixed degree sequence model: {Zweig, Katharina Anna, and Michael Kaufmann. 2011. “A Systematic Approach to the One-Mode Projection of Bipartite Graphs.” Social Network Analysis and Mining 1 (3): 187–218. \doi{10.1007/s13278-011-0021-0}}
#' @references curveball algorithm: {Strona, Giovanni, Domenico Nappo, Francesco Boccacci, Simone Fattorini, and Jesus San-Miguel-Ayanz. 2014. “A Fast and Unbiased Procedure to Randomize Ecological Binary Matrices with Fixed Row and Column Totals.” Nature Communications 5 (June). Nature Publishing Group: 4114. \doi{10.1038/ncomms5114}}
#'
#' @export
#'
#' @examples
#' fdsm_props <- fdsm(davis, trials = 100, dyad=c(3,6))

fdsm <- function(B,
                 trials = 1000,
                 dyad = NULL,
                 progress = FALSE,
                 ...){

  #### Argument Checks ####
  if (trials < 0) {stop("trials must be a positive integer")}
  if ((trials > 1) & (trials%%1!=0)) {stop("trials must be decimal < 1, or a positive integer")}

  #### Class Conversion ####
  convert <- tomatrix(B)
  class <- convert$summary$class
  B <- convert$G
  if (convert$summary$weighted==TRUE){stop("Graph must be unweighted.")}
  if (convert$summary$bipartite==FALSE){
    warning("This object is being treated as a bipartite network.")
    convert$summary$bipartite <- TRUE
  }

  #### Bipartite Projection ####
  P <- tcrossprod(B)

  ### Create Positive and Negative Matrices to hold backbone ###
  Positive <- matrix(0, nrow(P), ncol(P))
  Negative <- matrix(0, nrow(P), ncol(P))

  ### Dyad save ###
  edge_weights <- numeric(trials)
  if (length(dyad) > 0){
    if (class(dyad[1]) != "numeric"){
      vec <- match(c(dyad[1], dyad[2]), rownames(B))
    } else {
      vec <- dyad
    }
  }

  #### Define Dimensional Variables ####
  RC=dim(B)
  R=RC[1]
  C=RC[2]
  hp=list()

  #TODO: adjust this address appropriately
  sourceCpp("/Users/karlgodard/RProjects/Fastball-SourceCpp/backbone/R/fastball.cpp")

  #### Mark Locations of One's ####
  for (row in 1:R) {hp[[row]]=(which(B[row,]==1))}

  #### Build Null Models ####
  for (i in 1:trials){

    #Algorithm credit to: Strona, G., Nappo, D., Boccacci, F., Fattorini, S., San-Miguel-Ayanz, J. (2014). A fast and unbiased procedure to randomize ecological binary matrices with fixed row and column totals. Nature Communications, 5, 4114
    ### Use fastball (an optimized curveball implementation) to create an FDSM hp_star ###
    Bstar = fastball_cpp(hp, c(R, C))

    ### Construct Pstar from Bstar ###
    Pstar <- tcrossprod(Bstar)

    ### Start estimation timer; print message ###
    if (i == 1) {
      start.time <- Sys.time()
    }

    ### Check whether Pstar edge is larger/smaller than P edge ###
    Positive <- Positive + (Pstar >= P)+0
    Negative <- Negative + (Pstar <= P)+0

    ### Save Dyad of P ###
    if (length(dyad) > 0){
      edge_weights[i] <- Pstar[vec[1], vec[2]]
    }

    ### Report estimated running time, update progress bar ###
    if (i==10){
      end.time <- Sys.time()
      est = (round(difftime(end.time, start.time, units = "auto"), 2) * (trials/10))
      message("Estimated time to complete is ", est, " " , units(est), " for ", trials, " trials")
      if (progress == "TRUE"){
        pb <- utils::txtProgressBar(min = 0, max = trials, style = 3)
      }
    }
    if ((progress == "TRUE") & (i>=10)) {utils::setTxtProgressBar(pb, i)}
  } #end for loop
  if (progress == "TRUE"){close(pb)}

  #### Find Proportions ####
  ### Proporition of greater than expected and less than expected ###
  Positive <- (Positive/trials)
  Negative <- (Negative/trials)
  rownames(Positive) <- rownames(B)
  colnames(Positive) <- rownames(B)
  rownames(Negative) <- rownames(B)
  colnames(Negative) <- rownames(B)

  ### Insert NAs for p-values along diagonal
  diag(Positive) <- NA
  diag(Negative) <- NA

  #### Compile Summary ####
  r <- rowSums(B)
  c <- colSums(B)

  a <- c("Model", "Input Class", "Bipartite", "Symmetric", "Weighted", "Number of Rows", "Number of Columns")
  b <- c("Fixed Degree Sequence Model", convert$summary$class, convert$summary$bipartite, convert$summary$symmetric, convert$summary$weighted, dim(B)[1], dim(B)[2])
  model.summary <- data.frame(a,b, row.names = 1)
  colnames(model.summary)<-"Model Summary"

  #### Return Backbone Object ####
  if (length(dyad) > 0){
    bb <- list(positive = Positive, negative = Negative, dyad_values = edge_weights, summary = model.summary)
    class(bb) <- "backbone"
    return(bb)
  } else {
    bb <- list(positive = Positive, negative = Negative, summary = model.summary)
    class(bb) <- "backbone"
    return(bb)
  }

} #end fdsm function

#' curveball algorithm
#'
#' @param M matrix
#'
#' @return rm, a matrix with same row sums and column sums as M, but randomized 0/1 entries.
#' @export
#'
#' @references Algorithm and R implementation: Strona, Giovanni, Domenico Nappo, Francesco Boccacci, Simone Fattorini, and Jesus San-Miguel-Ayanz. 2014. “A Fast and Unbiased Procedure to Randomize Ecological Binary Matrices with Fixed Row and Column Totals.” Nature Communications 5 (June). Nature Publishing Group: 4114. \doi{10.1038/ncomms5114}
#' @examples
#' curveball(davis)
curveball<-function(M){
  #### Define Variables ####
  RC=dim(M)
  R=RC[1]
  C=RC[2]
  hp=list()

  #### Mark Locations of One's ####
  for (row in 1:dim(M)[1]) {hp[[row]]=(which(M[row,]==1))}
  l_hp=length(hp)

  #### Curveball Swaps ####
  for (rep in 1:(5*l_hp)){
    AB=sample(1:l_hp,2)
    a=hp[[AB[1]]]
    b=hp[[AB[2]]]
    ab=intersect(a,b)
    l_ab=length(ab)
    l_a=length(a)
    l_b=length(b)
    if ((l_ab %in% c(l_a,l_b))==F){
      tot=setdiff(c(a,b),ab)
      l_tot=length(tot)
      tot=sample(tot, l_tot, replace = FALSE, prob = NULL)
      L=l_a-l_ab
      hp[[AB[1]]] = c(ab,tot[1:L])
      hp[[AB[2]]] = c(ab,tot[(L+1):l_tot])}
  }

  #### Define and Return Random Matrix ####
  rm=matrix(0,R,C)
  for (row in 1:R){rm[row,hp[[row]]]=1}
  rm
}
KGodard1/Fastball-SourceCpp documentation built on Dec. 18, 2021, 2:35 a.m.