R/InitErgmTerm.multinet.R

Defines functions Networks

Documented in Networks

#  File R/InitErgmTerm.multinet.R in package ergm.multi, part of the
#  Statnet suite of packages for network analysis, https://statnet.org .
#
#  This software is distributed under the GPL-3 license.  It is free,
#  open source, and has the attribution requirements (GPL Section 7) at
#  https://statnet.org/attribution .
#
#  Copyright 2003-2024 Statnet Commons
################################################################################
#' A multinetwork network representation.
#'
#' A function for specifying the LHS of a multi-network
#' (a.k.a. multilevel) ERGM. Typically used in conjunction with the
#' [`N()`][N-ergmTerm] term operator.
#'
#' @param ... network specification, in one of two formats:
#' 
#'   1. An (optionally named) list of networks with same directedness and bipartedness (but possibly different sizes).
#'
#'   1. Several networks as (optionally named) arguments.
#'
#' @return A [`network`] object comprising the provided networks, with multinetwork metadata.
#'
#' @templateVar combiner Networks
#' @template combine_networks_readonly
#'
#' @seealso [`ergmTerm`] for specific terms.
#' @seealso `vignette("Goeyvaerts_reproduction")` for a demonstration.
#' @examples
#'
#' data(samplk)
#'
#' # Method 1: list of networks
#' monks <- Networks(list(samplk1, samplk2))
#' ergm(monks ~ N(~edges))
#'
#' # Method 2: networks as arguments
#' monks <- Networks(samplk1, samplk2)
#' ergm(monks ~ N(~edges))
#' 
#' @export
Networks <- function(...){
  args <- list(...)
  if(all(sapply(args, is, "network"))){
    nwl <- args
  }else if(is.list(args[[1]]) && all(sapply(args[[1]], is, "network"))){
    nwl <- args[[1]]
  }else stop("Unrecognized format for multinetwork specification. See help for information.")

  if(!.same_constraints(nwl, "constraints")) stop("Networks have differing constraint structures. This is not supported at this time.")
  if(!.same_constraints(nwl, "obs.constraints")) stop("Networks have differing observation processes. This is not supported at this time.")
  
  nw <- combine_networks(nwl, blockID.vattr=".NetworkID", blockName.vattr=".NetworkName", ignore.nattr = c(eval(formals(combine_networks)$ignore.nattr), "constraints", "obs.constraints", "ergm"), subnet.cache=TRUE)

  nw %n% "ergm" <- combine_ergmlhs(nwl)

  nw %ergmlhs% "constraints" <-
      if(NVL(nwl[[1]] %ergmlhs% "constraints",base_env(~.))==base_env(~.))
        base_env(~blockdiag(".NetworkID"))
      else
        append_rhs.formula(nwl[[1]]%ergmlhs%"constraints", list(call("blockdiag",".NetworkID")), TRUE)
  if(!is.null(nwl[[1]]%ergmlhs%"obs.constraints")) nw %ergmlhs% "obs.constraints" <- nwl[[1]]%ergmlhs%"obs.constraints"

  nw
}

InitErgmTerm..subnets <- function(nw, arglist, ...){
  a <- check.ErgmTerm(nw, arglist,
                      varnames = c("attrname"),
                      vartypes = c("character"),
                      defaultvalues = list(NULL),
                      required = c(TRUE))

  list(name="_subnets", coef.names=c(), iinputs=c(unlist(.block_vertexmap(nw, a$attrname))), dependence=FALSE)
}

#' An `as_tibble` method for combined networks.
#'
#' A method to obtain a network attribute table from a
#' [`combined_networks`] object, falling back to the
#' [network::as_tibble.network()] if vertex or edge attributes are
#' required.
#'
#' @param x  a [`combined_networks`] (inheriting from [`network::network`]).
#' @param attrnames a list (or a selection index) for attributes to obtain; for combined networks, defaults to all.
#' @param unit whether to obtain edge, vertex, or network attributes.
#' @template ergmTerm-NetworkIDName
#' @param store.nid whether to include columns with network ID and network name; the columns will be named with the arguments passed to `.NetworkID` and `.NetworkName`.
#' @param ... additional arguments, currently passed to unlist()].
#'
#' @seealso [network::as_tibble.network()]
#' @export
as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertices","networks")), ..., unit=c("edges", "vertices", "networks"), .NetworkID=".NetworkID", .NetworkName=".NetworkName", store.nid=FALSE){
  unit <- match.arg(unit)
  if(unit!="networks") return(NextMethod())

  al <-
    if(is(x, "combined_networks") && !is.null(x %n% ".subnetattr")) (x %n% ".subnetattr")[[.NetworkID]]
    else{
      # FIXME: Probably more efficient to use attrnames earlier, in order to save calls to get.network.attribute().
      xl <- if(is.network(x)) subnetwork_templates(x, .NetworkID, .NetworkName) else x
      nattrs <- Reduce(union, lapply(xl, list.network.attributes))
      lapply(nattrs, function(nattr) lapply(xl, get.network.attribute, nattr)) %>% set_names(nattrs)
    }

  if(is.logical(attrnames) || is.numeric(attrnames)) attrnames <- na.omit(names(al)[attrnames])
  else intersect(attrnames, names(al))

  out <- al[attrnames] %>% lapply(simplify_simple, ...) %>% as_tibble()

  if(store.nid){
    out[[.NetworkID]] <- unique(x %v% .NetworkID)
    out[[.NetworkName]] <- unique(x %v% .NetworkName)
  }

  out
}

get_lminfo <- function(nattrs, lm=~1, subset=TRUE, contrasts=NULL, offset=NULL, weights=1){
  nn <- nrow(nattrs)
  
  subset <-
    if(mode(subset) %in% c("expression", "call")) eval(if(is(subset, "formula")) subset[[2]] else subset, envir = nattrs, enclos = environment(lm))
    else subset
  subset <- unwhich(switch(mode(subset),
                           logical = which(rep(subset, length.out = nn)),
                           numeric = subset),
                    nn)

  weights <- if(mode(weights) %in% c("expression", "call")) eval(if(is(weights, "formula")) weights[[2]] else weights, envir = nattrs, enclos = environment(lm))
             else weights
  weights <- rep(weights, length.out=nn)

  offset <- if(mode(offset) %in% c("expression", "call")) eval(if(is(offset, "formula")) offset[[2]] else offset, envir = nattrs, enclos = environment(lm))
             else offset

  subset[weights==0] <- FALSE
  
  ## model.frame
  # This loop is necessary because if the predictor vector for a
  # network is all 0s as is its offest, it needs to be dropped.
  subset.prev <- NULL
  while(!identical(subset, subset.prev)){
    nm <- sum(subset)
    xf <- do.call(stats::lm, list(lm, data=nattrs,contrast.arg=contrasts, offset = offset, subset=subset, weights=weights, na.action=na.fail, method="model.frame"), envir=environment(lm))
    xm <- model.matrix(attr(xf, "terms"), xf, contrasts=contrasts)
    offset <- model.offset(xf) %>% NVL(numeric(nrow(xm))) %>% matrix(nrow=nrow(xm)) # offset needs to have reliable dimension.
    subset.prev <- subset
    subset[subset] <- subset[subset] & !apply(cbind(xm,offset)==0, 1, all)
  }

  list(xf=xf, xm=xm, subset=subset, offset=offset)  
}

assert_LHS_Networks <- function(nw, nid, term_trace = TRUE, call = if(term_trace) NULL else rlang::caller_env()){
  if(anyNA(.peek_vattrv(nw, nid))){
    msg <- paste0("The LHS of the model is not a multi-network ", sQuote("Networks()"), " construct.")
    if(term_trace) ergm_Init_abort(msg, call=call)
    else abort(msg, call=call)
  }
}

#' @import purrr
#' @import tibble
#' @templateVar name N
#' @title Evaluation on multiple networks
#' @description Evaluates the terms in `formula` on each of the networks joined
#'   using [`Networks`] function, and returns either a weighted
#'   sum or an [`lm`]-style linear model for the ERGM
#'   coefficients \insertCite{KrCo22t}{ergm.multi}. Its syntax follows that of [`lm`] closely,
#'   with sensible defaults.
#'
#'
#' The default formula (`~1`) sums the specified network
#' statistics. If `lm` refers to any network attributes for which some
#' networks have missing values, the term will stop with an
#' error. This can be avoided by pre-filtering with `subset`, which
#' controls which networks are affected by the term.
#'
#' The formula may also reference `.NetworkID` and `.NetworkName`. In
#' particular, `~0+factor(.NetworkID)` will evaluate `formula` on each
#' network individually.
#'
#' @note Care should be taken to avoid multicollinearity when using
#'   this operator. As with the [lm()] function, `lm` formulas have an
#'   implicit intercept, which can be suppressed by specifying `~ 0 +
#'   ...` or `~ -1 + ...` on the formula. When `lm` is given a model
#'   with intercept and a categorical predictor (including a
#'   [`logical`] one), it will use the first level (or `FALSE`) as the
#'   baseline, but if the model is without intercept, it will use all
#'   levels of the first categorical predictor. This is typically what
#'   is wanted in a linear regression, but for the `N` operator, this
#'   can be problematic if the "intercept" effect is added by a
#'   different term. A workaround is to convert the categorical
#'   predictor to dummy variables before putting it into the `lm`
#'   formula.
#'
#' @usage
#' # binary: N(formula, lm=~1, subset=TRUE, weights=1, contrasts=NULL, offset=0, label=NULL,
#' #           .NetworkID=".NetworkID", .NetworkName=".NetworkName")
#' @template ergmTerm-formula
#' @param lm a one-sided [lm()]-style formula whose RHS specifies the network-level predictors for the terms in the [ergm()] formula `formula`.
#' @param subset,contrasts see [lm()].
#' @param offset A constant, a vector of length equal to the number of networks, or a matrix whose number of rows is the number of networks and whose number of columns is the number of free parameters of the ERGM. It can be specified in `lm` as well.
#' @param weights reserved for future use; attempting to change it will cause an
#'   error: at this time, there is no way to assign sampling weights to
#'   networks.
#' @param label An optional parameter which will add a label to model
#'   parameters to help identify the term (which may have similar
#'   predictors but, say, a different network subset) in the output
#'   *or* a function that wraps the names.
#' @template ergmTerm-NetworkIDName
#'
#' @section Offsets and fixing parameters:
#'
#' By default, an `N(formula, lm)` term will add \eqn{p \times q}{p*q} free
#' parameters, where \eqn{p} is the number of free parameters
#' (possibly curved) of the ERGM specified by `formula`, and \eqn{q}
#' is the number of parameters specified by the `lm` formula. That is,
#' there would be one parameter for each combination of an ERGM
#' parameter and a linear model parameter, in an ERGM-major order
#' (i.e., for each ERGM parameter, the linear model parameters will be
#' enumerated). For example, the term `gwesp()` has two free
#' parameters: its coefficient and its decay rate. We can specify a
#' model in which they depend on \eqn{\log(n)} as `N(~gwesp,
#' ~log(n))`, resulting in the following 4 parameters, with the
#' intercept for the linear model being implicit:
#' ```{r, echo=FALSE, warning=FALSE}
#' data(florentine)
#' param_names(ergm_model(Networks(flomarriage,flobusiness) ~ N(~gwesp, ~log(n))))
#' ```
#'
#' If a different linear model is desired for different ERGM terms
#' (e.g., some are to be affected by network size while others are
#' not), multiple `N()` terms can be specified. This covers most such
#' cases, but not all. For example, suppose that for the above model,
#' we wish for its coefficient to depend on `log(n)` but for the decay
#' parameter not to. In this case, one can use the `offset()`
#' decorator with partial offsetting. Then, specifying
#' `offset(N(~gwesp(), ~log(n)), 4)`, we get:
#' ```{r, echo=FALSE, warning=FALSE}
#' data(florentine)
#' param_names(ergm_model(Networks(flomarriage,flobusiness) ~ offset(N(~gwesp(), ~log(n)), 4)))
#' ```
#' Then, setting the corresponding `offset.coef = 0` will fix the
#' coefficient of `log(n)` for the decay parameter at 0, while
#' allowing a constant decay parameter to be estimated.
#'
#' @references \insertAllCited{}
#'
#' @template ergmTerm-general
#'
#' @seealso `vignette("Goeyvaerts_reproduction")` for a demonstration.
#'
#' @concept operator
#' @concept directed
#' @concept undirected
InitErgmTerm.N <- function(nw, arglist, ..., N.compact_stats=TRUE, .NetworkID=".NetworkID", .NetworkName=".NetworkName"){
  a <- check.ErgmTerm(nw, arglist,
                      varnames = c("formula", "lm", "subset", "weights", "contrasts", "offset", "label", ".NetworkID", ".NetworkName"),
                      vartypes = c("formula", "formula", "formula,logical,numeric,expression,call", "formula,logical,numeric,expression,call", "list", "formula,logical,numeric,expression,call", "character,function", "character", "character"),
                      defaultvalues = list(NULL, ~1, TRUE, 1, NULL, NULL, NULL, .NetworkID, .NetworkName),
                      required = c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE))

  assert_LHS_Networks(nw, a$.NetworkID)

  auxiliaries <- eval(substitute(~.subnets(.NetworkID), list(.NetworkID=a$.NetworkID)), baseenv())

  nwl <- subnetwork_templates(nw, a$.NetworkID, a$.NetworkName)
  nwnames <- names(nwl)
  nn <- length(nwl)
  nattrs <- as_tibble(nw, unit="networks", .NetworkID=a$.NetworkID, .NetworkName=a$.NetworkName, store.nid=TRUE)

  lmi <- get_lminfo(nattrs, lm=a$lm, subset=a$subset, contrasts=a$contrasts, offset=a$offset, weights=a$weights)

  xf <- lmi$xf
  xm <- lmi$xm
  subset <- lmi$subset
  weights <- lmi$weights
  offset <- lmi$offset
  nm <- sum(subset)
  nwl <- nwl[subset]
  rm(lmi)

  ms <- lapply(nwl, function(nw1){
    ergm_model(a$formula, nw1, ..., offset.decorate=FALSE)
  })

  nparams <- ms %>% map_int(nparam, canonical=FALSE)
  nstats <-  ms %>% map_int(nparam, canonical=TRUE)

  # Check for MANOVA style matrix.
  if(!all_identical(nparams)) ergm_Init_abort("N() operator only supports models with the same numbers of parameters for every network. This may change in the future.")
  if(!all_identical(ms %>% map(param_names, canonical=FALSE))) ergm_Init_warn("Subnetwork models have different parameter names but the same parameter vector lengths; this may indicate specification problems.")
  nparam <- nparams[1]

  # Extract offsets and weights.
  offset <- model.offset(xf) %>% NVL(numeric(nm)) %>% matrix(nrow=nm, ncol=nparam) # offset is actually an nm*q matrix.
  weights <- model.weights(xf)
  if(!all(weights==1)) ergm_Init_abort("Network-level weights different from 1 are not supported at this time.")

  # Model parameters.
  params <- rep(list(NULL), nparam*ncol(xm))
  parnames <- colnames(xm)
  parnames <- ifelse(parnames=="(Intercept)", "1", parnames)
  names(params) <- (
    if(is.function(a$label)) a$label
    else ergm_mk_std_op_namewrap('N',a$label)
  )(rep(param_names(ms[[1]], canonical=FALSE), each=ncol(xm)), parnames)
  if(any(ms[[1]]$etamap$offsettheta))
    ergm_Init_abort("The ERGM ", sQuote("formula="), " argument of an ", sQuote("N()"),
                    " operator may not have offsets. See ", sQuote("ergmTerm?N"),
                    " section on fixing parameters for details.")
  if(with(ms[[1]]$etamap,
          any(mintheta[!offsettheta]!=-Inf) || any(maxtheta[!offsettheta]!=+Inf))){
    warning("Submodel specified to N() operator with a linear model formula has parameter constraints. They will be ignored.")
  }
  
  nstats.all <- integer(nn)
  nstats.all[subset] <- nstats # So networks not in subset get 0 stats.

  # Get the extended state and empty network stats wrapping. Note that
  # naming, curved models, etc., are handled "in-house" (for now).
  wms <- .mapply(wrap.ergm_model, list(ms, nwl), NULL)
  
  ## An important special case is when all models are linear and have
  ## the same number of stats. lm-offset does not work with this
  ## approach at this time, either *unless* all terms are offset by
  ## the same amount (that may vary over networks). Also,
  ## N.compact_stats can be set to FALSE to force the general case to
  ## be used.
  # TODO: A more refined detection than is.curved(), since currently
  # all operators show up as curved.
  # TODO: Check that multivariate offset is still not allowed in R.
  if(N.compact_stats &&
     all_identical(nstats) &&
     !any(ms%>%map_lgl(is.curved)) &&
     all(apply(offset,1,all_identical))){
    xm.all <- matrix(0, nn, ncol(xm))
    xm.all[subset,] <- xm
    
    if(!all(offset==0)){
      offset.all <- numeric(nn)
      offset.all[subset] <- offset[,1]
    }else offset.all <- NULL

    iinputs <- ncol(xm)+!is.null(offset.all)
    inputs <- c(t(cbind(xm.all,offset.all)))

    if(is.null(offset.all)){
      # This can be represented as a fully linear term.
      coef.names <- names(params)
      params <- etamap <- etagradient <- NULL
    }else{
      # Offset requires a bit of extra work.
      etamap <- function(x, n, ...){
        x %>% matrix(ncol=nparam) %>% rbind(1) %>% c()
      }
      etagradm <- list(cbind(diag(1, ncol(xm)), 0)) %>%
        rep(nparam) %>%
        Matrix::.bdiag() %>%
        as.matrix()
      etagradient <- function(x, n, ...){
        etagradm
      }
      coef.names <- names(params) %>% matrix(ncol=nparam) %>% rbind(paste0("offset",seq_len(nparam))) %>% c()
    }
    
    gss <- map(wms, "emptynwstats")
    gs <-
      if(all(map_lgl(gss, is.null))){ # Linear combination of 0s is 0.
        NULL
      }else{ # All numeric or NULL
        gs0 <- map_lgl(gss, is.null)
        lst(X = (cbind(xm,offset.all[subset]) %>% split(., row(.)))[!gs0],
            Y = gss[!gs0]) %>%
          pmap(outer) %>%
          reduce(`+`) %>%
          c()
    }

    list(name="MultiNet", coef.names = coef.names, inputs=inputs, iinputs=iinputs, submodels=ms, dependence=any(map_lgl(wms, "dependence")), emptynwstats = gs, auxiliaries = auxiliaries, map = etamap, gradient = etagradient, params = params)
        
  }else{
    Xl <- xm %>%
      split(., row(.)) %>% # Rows of xl as a list.
      map(rbind) %>% # List of row vectors.
      map(list) %>% # List of singleton lists of row vectors.
      map(rep, nparam) %>% # List of lists with appropriate parameter numbers.
      map(Matrix::.bdiag) %>% # nm-list of block-diagonal matrices. 
      map(as.matrix) # FIXME: Maintain representation as sparse matrices?
    
    # Xl can now be used as covariates to vectorized MANOVA-style
    # parameters.
    
    ol <- offset %>% split(., row(.)) # Rows of offset as a list.
    
    iinputs <- c(0,cumsum(nstats.all))
    
    etamap <- function(x, n, ...){
      Xl %>%
        map(`%*%`, c(x)) %>% # Evaluate X%*%c(x), where X is the predictor matrix and x is "theta".
        map(c) %>% # Output of the previous step is a matrix; convert to vector.
        map2(ol, `+`) %>% # Add on offset.
        map2(ms %>% map("etamap"), # Obtain the etamap.
             ergm.eta) %>% # Evaluate eta.
        #      map2(weights, `*`) %>% # Weight.
        unlist()
    }
    etagradient <- function(x, n, ...){
      Xl %>%
        map(`%*%`, c(x)) %>% # Evaluate X%*%c(x), where X is the predictor matrix and x is "theta".
        map(c) %>% # Output of the previous step is a matrix; convert to vector.
        map2(ol, `+`) %>% # Add on offset.
        map2(ms %>% map("etamap"), # Obtain the etamap.
             ergm.etagrad) %>% # Evaluate eta gradient.
        map2(Xl, ., crossprod) %>%
        #      map2(weights, `*`) %>% # Weight.
        do.call(cbind,.)
    }

    coef.names <- ergm_mk_std_op_namewrap(paste0('N#',rep(seq_len(nm), nstats)))(unlist(lapply(ms, param_names, canonical=TRUE)))

    # Empty network statistics.
    gss <- map(wms, "emptynwstats")
    gs <-
      if(all(map_lgl(gss, is.null))){ # Linear combination of 0s is 0.
        NULL
      }else{ # All numeric or NULL
        unlist(Map(function(gs, nst) if(is.null(gs)) numeric(nst) else gs, gss, nstats))
      }

    list(name="MultiNets", coef.names = coef.names, iinputs=iinputs, submodels=ms, dependence=any(map_lgl(wms, "dependence")), emptynwstats = gs, auxiliaries = auxiliaries, map = etamap, gradient = etagradient, params = params)
  }
}


#' @templateVar name ByNetDStats
#' @title Evaluate the formula on each individual network
#' @description Evaluates the terms in `formula` on each of the
#'   networks joined using [`Networks`] function, and returns a vector
#'   of its statistics for each network. This term is used internally
#'   for model diagnostics, and should not, generally, be used in models.
#'
#' @usage
#' # binary: N(formula, subset=TRUE)
#' @template ergmTerm-formula
#' @param subset see [lm()].
#'
#' @template ergmTerm-general
#'
#' @concept operator
#' @concept directed
#' @concept undirected
#' @noRd
InitErgmTerm.ByNetDStats <- function(nw, arglist, ..., .NetworkID=".NetworkID"){
  a <- check.ErgmTerm(nw, arglist,
                      varnames = c("formula",  "subset", ".NetworkID"),
                      vartypes = c("formula", "formula,logical,numeric,expression,call", "character"),
                      defaultvalues = list(NULL, TRUE, .NetworkID),
                      required = c(TRUE, FALSE, FALSE))

  assert_LHS_Networks(nw, a$.NetworkID)

  auxiliaries <- eval(substitute(~.subnets(.NetworkID), list(.NetworkID=a$.NetworkID)), baseenv())
  nattrs <- as_tibble(nw, unit="networks")

  lmi <- get_lminfo(nattrs, subset=a$subset)

  subset <- lmi$subset
  nn <- sum(subset)
  rm(lmi)
    
  m <- ergm_model(a$formula, nw, ..., offset.decorate=FALSE)

  coef.names <- ergm_mk_std_op_namewrap(paste0('N#',rep(seq_len(nn)[subset], each=nparam(m, canonical=TRUE))))(param_names(m, canonical=TRUE))
  iinputs <- cumsum(c(-1,subset))*nparam(m, canonical=TRUE)
  wm <- wrap.ergm_model(m, nw, NULL) # Only need a few things from wrap.
  list(name="ByNetDStats", coef.names = coef.names, iinputs=iinputs, submodel=m, auxiliaries=auxiliaries, dependence=wm$dependence, ext.encode=wm$ext.encode)
}
statnet/ergm.multi documentation built on Dec. 4, 2024, 11:24 a.m.