R/gsbm_mcgd.R
In gsbm: Estimate Parameters in the Generalized SBM

Documented in gsbm_mcgd

#' Fit a Generalized Stochastic Block Model
#'
#' @description Given an adjacency matrix with missing observations, the function \code{gsbm_mgcd}
#' robustly estimates the probabilities of connections between nodes.
#'
#' @param A nxn adjacency matrix
#' @param lambda1 regularization parameter for nuclear norm penalty (positive number)
#' @param lambda2 regularization parameter for 2,1-norm penalty (positive number)
#' @param epsilon regularization parameter for the L2-norm penalty (positive number, if NULL, default method is applied)
#' @param U lower bound on nuclear norm (positive number, if NULL, default method is applied)
#' @param maxit maximum number of iterations (positive integer, if NULL, default method is applied)
#' @param thresh convergence tolerance (positive number, if NULL, default method is applied)
#' @param S0 initial value for the sparse component (if NULL, default method is applied))
#' @param L0 initial value for the low-rank component (if NULL, default method is applied)
#' @param R0 lower bound on nuclear norm of L0 (positive number, if NULL, default method is applied)
#' @param trace.it whether messages about convergence should be printed (boolean, if NULL, default is FALSE)
#'
#' @return The estimate for the nxn matrix of probabilities of connections between nodes. It is
#' given as the sum of a low-rank nxn matrix L, corresponding to connections between inlier
#' nodes, and a column sparse nxn matrix S, corresponding to connections between outlier
#' nodes and the rest of the network. The matrices L and S are such that
#'
#' E(A) = L - diag(L) + S + S'
#'
#' where E(A) is the expectation of the adjacency matrix, diag(L) is a nxn diagonal
#' matrix with diagonal entries equal to those of L, and S' means S transposed.
#'
#' The return value is a list of components
#'   \itemize{
#'   \item{\code{A}}{ the adjacency matrix.}
#'   \item{\code{L}}{ estimate for the low-rank component.}
#'   \item{\code{S}}{ estimate for the column-sparse component.}
#'   \item{\code{objective}}{ the value of the objective function.}
#'   \item{\code{R}}{ a bound on the nuclear norm of the low-rank component.}
#'   \item{\code{iter}}{ number of iterations between convergence of the objective function.}
#'  }
#'
#' @importFrom stats aggregate
#' @importFrom RSpectra eigs_sym
#' @export
#'
#' @examples
#' # Draw a 50x50 adjacency matrix
#' # Generalized SBM with 2 communities and 2 outliers
#' # Create low-rank matrix L
#' L <- matrix(0,50,50) # low-rank component
#' L[1:25, 1:25] <- 0.6 # connection probabilities within community 1
#' L[1:25, 26:48] <- 0.1 # connection probabilities between communities 1 and 2
#' L[26:48, 1:25] <- 0.1 # connection probabilities between communities 1 and 2
#' L[26:48, 26:48] <- 0.6 # connection probabilities within community 2
#'
#' # Create column-sparse matrix S
#' S <- matrix(0,50,50) # column sparse component
#' S[49:50,1:48] <- 0.6 # connection probabilties between outliers and inliers
#'
#' # Draw connections and create the adjacency matrix
#' undir <- rbinom(n=50*(50-1)/2, size=1, prob=(L+S+t(S))[upper.tri(L+S+t(S))]) # draw edges
#' A <- matrix(0,50,50)
#' A[upper.tri(A)] <- undir
#' A <- (A+t(A))
#'
#' # Estimate the probabilities of connection
#' lambda1 <- 7
#' lambda2 <- 7
#' res <- gsbm_mcgd(A, lambda1, lambda2)

gsbm_mcgd <- function(A, lambda1, lambda2, epsilon=0.1, U = NULL, maxit = 100, thresh = 1e-6,
                      S0 = NULL, L0 = NULL, R0 = NULL, trace.it = FALSE){
  n <- nrow(A)
  Omega <- !is.na(A)
  if(is.null(S0)) {
    S0 <- matrix(0,n,n)
    #S0 <- as(Matrix(0,n,n, sparse=T), "dgCMatrix")
  }
  if(is.null(L0)) L0 <- matrix(0,n,n)#L0 <- as(Matrix(0,n,n, sparse=T), "dgCMatrix")
  if(is.null(R0)) R0 <- 0
  if(is.null(U)) {
    U <- (1/2)*sum((A - S0)^2, na.rm = T)/lambda1
  }
  S <- S0
  L <- L0
  R <- R0
  objective <- 1/2*sum(A^2, na.rm=T)
  error <- 1
  iter <- 0
  A0 <- A
  A[is.na(A)] <- 0
  while((error > thresh) && (iter < maxit)){
    iter <- iter + 1
    S.tmp <- S
    L.tmp <- L
    R.tmp <- R
    G_S <- -2*Omega*(A-L-S-t(S))+epsilon*S # gradient wrt S
    obj0 <- (1/2)*sum((A0-S-t(S)-L)^2, na.rm = T)+lambda1*R+lambda2*sum(sqrt(colSums(S^2)))+epsilon*(norm(L, type="F")^2+norm(S, type="F")^2)
    step <- 1
    flag <- TRUE
    while(flag){
      step <- 0.5*step
      #mat <- t(pmax(1-step*lambda2/sqrt(colSums((S-step*G_S)^2, na.rm=T)),0)*t(S-G_S))
      mat <- sapply(1:n, function(j){
        max(1-step*lambda2/sqrt(sum((S[,j]-step*G_S[,j])^2, na.rm=T)),0)*(S[,j]-step*G_S[,j])
      })
      mat <- pmax(mat,0)
      obj <- (1/2)*sum((A0-mat-t(mat)-L)^2, na.rm = T)+lambda1*R+lambda2*sum(sqrt(colSums(mat^2)))+epsilon*(norm(L, type="F")^2+norm(mat, type="F")^2)
      flag <- obj > obj0
    }
    S <- mat
    G_L <- -1*Omega*(A - S - t(S) - L) + epsilon*L
    obj0 <- (1/2)*sum((A0-S-t(S)-L)^2, na.rm = T)+lambda1*R+lambda2*sum(sqrt(colSums(S^2)))+epsilon*(norm(L, type="F")^2+norm(S, type="F")^2)
    step <- 2
    flag <- TRUE
    svdL <- eigs_sym(G_L, 1)
    D_t <- - sign(svdL$value)*svdL$vectors%*%t(svdL$vectors)
    while(flag){
      step <- 0.5*step
      if(lambda1 >= - sum(D_t*G_L)){
        R_tilde <- 0
        L_tilde <- matrix(0,n,n)
        L <- L.tmp + step*(L_tilde - L.tmp)
        L <- (L+t(L))/2 # to avoid propagation of numerical errors
        R <- R.tmp + step*(R_tilde - R.tmp)
      } else{
        R_tilde <- U
        L_tilde <- U*D_t
        #beta <- min(1, (sum((L - L_tilde)*L_tilde)+(R - R_tilde)*lambda1)/norm(L_tilde - L, type = "F")^2)
        L <- L.tmp + step*(L_tilde - L.tmp)
        L <- (L+t(L))/2 # to avoid propagation of numerical errors
        R <- R.tmp + step*(R_tilde - R.tmp)
      }
      obj <- (1/2)*sum((A0-S-t(S)-L)^2, na.rm = T)+lambda1*R+lambda2*sum(sqrt(colSums(S^2)))+epsilon*(norm(L, type="F")^2+norm(S, type="F")^2)
      flag <- obj > obj0
    }
    obj <- (1/2)*sum((A0-S-t(S)-L)^2, na.rm = T)+lambda1*R+lambda2*sum(sqrt(colSums(S^2)))+epsilon*(norm(L, type="F")^2+norm(S, type="F")^2)
    objective <- c(objective, obj)
    U <- obj/lambda1
    if(iter == 1) {
      error <- 1
    } else{
      error <- abs(objective[iter]-objective[iter - 1]) /abs(objective[iter])
    }
    if(trace.it && (iter%%10 == 0) ){
      print(paste("iter ", iter, ": error ", error, " - objective: ", objective[iter]))
    }
  }
  return(list(A = A0, L = L, S = S, objective = objective, R = R,
              iter = iter))
}