# R/endogenousstats.R In ghypernet: Fit and Simulate Generalised Hypergeometric Ensembles of Graphs

#### Documented in checkGraphtypeget_zero_dummyhomophily_statreciprocity_statsharedPartner_stat

```#' Check graph input type (for whether it's a graph or a edgelist).
#'
#' Returns TRUE if the supplied object \code{graph} is an adjacency matrix. Returns FALSE if the provided object is an edgelist. The function checks whether the edgelist conforms to our standards (sender, target, edgecount).
#'
#' @param graph A graph adjacency matrix or an edgelist.
#' @return TRUE or FALSE. Returns TRUE if the provided object \code{graph} is an adjacency matrix.
checkGraphtype <- function(graph) {
# is matrix?
isMatrix <-  FALSE
if (dim(graph)[1] == dim(graph)[2]) {
isMatrix <- TRUE
}
# if edgelist = check if 3 columns are there, 2 characters/factors, 1 numeric
if (!isTRUE(isMatrix)) {
if (ncol(graph) != 3) {
stop(
"graph needs to be an adjacecy matrix or an edgelist. The edgelist needs to have exactly 3 columns: sender, target, edgecounts."
)
}
if (!is.character(graph[, 1]) & !is.factor(graph[, 1])) {
stop(
"The first row in your edgelist needs to be a factor/character vector with sender node IDs."
)
}
if (!is.character(graph[, 2]) & !is.factor(graph[, 2])) {
stop(
"The second row in your edgelist needs to be a factor/character vector with target node IDs."
)
}
if (!is.numeric(graph[, 3]) & !is.integer(graph[, 3])) {
stop(
"The third row in your edgelist needs to be a numeric/integer vector with edge counts."
)
}
}
# return:
isMatrix
}

################################################################################
#' Calculate weighted reciprocity change statistics for multi-edge graphs.
#'
#' The function takes either an edgelist or an adjacency matrix and returns an
#' adjacency matrix with the reciprocity change statistic. This reciprocity
#' matrix can then be used as a predictor in the gHypEG regression.
#'
#' @param graph A graph adjacency matrix or an edgelist.  The edgelist needs to
#'   have 3 columns: a sender vector, a target vector and an edgecount vector.
#' @param nodes optional character/factor vector. If an edgelist is provided,
#'   you have to provide a list of unique identifiers of your nodes in the graph.
#'   This is because in the edgelist, isolates are usually not recorded.
#'   If you do not specify isolates in your nodes object, they are excluded
#'   from the analysis (falsifies data).
#' @param zero_values optional numeric value. Use this to substitute zero-values
#'   in your reciprocity change statistic matrix. Zero values in the predictors
#'   are recognized in the gHypEG regression as structural zeros. To ensure this
#'   ideally using a dummy variable fitting an optimal value for the zeroes.
#' @return Reciprocity change statistic matrix.
#' @author LB, GC
#' @export
#' @examples
#' recip_stat_dummy <- get_zero_dummy(recip_stat, name = 'reciprocity')
#' nrm(w=recip_stat_dummy, adj_karate, directed = FALSE, selfloops = FALSE)
#'
reciprocity_stat <- function(graph, nodes = NULL, zero_values = NULL){
## preprocess:
# is graph object a matrix or edgelist?
isMatrix <- checkGraphtype(graph) # returns isMatrix==TRUE/FALSE
# if not isMatrix transform edgelist into matrix
if(!isTRUE(isMatrix)){

el <- graph #bc graph will be overwritten

# check if they provided a list of nodes for the graph (matching attribute files)
if(is.null(nodes)){
nodes <- unique(c(el[,1], el[,2]))
}
#

#transform edgelist to matrix
# save it to object "graph"

}else{
nodes = rownames(graph)
}

## calculate reciprocity
recip_mat <- t(graph)

## transform zero values
if(is.null(zero_values)){
# recip_mat[recip_mat == 0] <- min(recip_mat[recip_mat > 0])/10
}else{
recip_mat[recip_mat == 0] <- zero_values
}

return(recip_mat)
}

################################################################################
#' Calculate (un-)weighted shared partner change statistics for multi-edge graphs.
#'
#' The function calculates the change statistic for shared partners for each
#' dyad in the graph. Shared partner statistics count for each dyad involving
#' nodes i and j in the graph, how many nodes k these two nodes have in common
#' (or share). The shared partner \$k\$ counts are weighted by their
#' interactions with the focal nodes \$i\$ and \$j\$. This is necessary in
#' dense multi-edge graphs to ensure that meaningful triadic closure is
#' detected. The statistic can be calculated in 3 different forms: undirected,
#' incoming shared partners (where shared partner k: k->i and k->j) and outgoing
#' shared partners (where shared partner k: k<-i and k<-j).
#'
#' @param graph A graph adjacency matrix or an edgelist. The edgelist needs to
#'   have 3 columns: a sender vector, a target vector and an edgecount vector.
#' @param weighted set to TRUE.
#' @param triad.type set to \code{undirected}. Can be set to \code{incoming}
#'   in the multi-edge graph.
#' @param nodes optional character/factor vector. If an edgelist is provided,
#'   you have to provide a list of unique identifiers of your nodes in the graph.
#'   This is because in the edgelist, isolates are usually not recorded.
#'   If you do not specify isolates in your nodes object, they are excluded
#'   from the analysis (falsifies data).
#' @param zero_values optional numeric value. Use this to substitute zero-values
#'   in your shared partner change statistic matrix. Zero values in the predictors
#'   are recognized in the gHypEG regression as structural zeros. To ensure this
#'   ideally using a dummy variable fitting an optimal value for the zeroes.
#' @param directed boolean. Is the graph directed?
#' @return Shared partner change statistic matrix.
#' @author LB, GC, GV
#' @export
#' @import dplyr
#' @importFrom utils setTxtProgressBar txtProgressBar
#' @examples
#' tri_stat <- sharedPartner_stat(adj_karate, directed = FALSE)
#' tri_stat_dummy <- get_zero_dummy(tri_stat, name = 'shared_partners')
#' nrm(w=tri_stat_dummy, adj_karate, directed = FALSE, selfloops = FALSE)
sharedPartner_stat <- function(graph,
directed,
weighted = TRUE,
nodes = NULL,
zero_values = NULL) {
## preprocess:
# is graph object a matrix or edgelist?
isMatrix <- checkGraphtype(graph) # returns isMatrix==TRUE/FALSE
# transform edgelist into matrix
if (isTRUE(isMatrix)) {
} else{
}
if (is.null(nodes))

stop(
'triad.type needs to be one of three: "undirected", "directed.incoming" or "directed.outgoing".
Check help files for additional infos.'
)
}

# progressbar
pb = txtProgressBar(
min = 1,
initial = 0,
style = 3
)

## now count triangles
partners <- sapply(nodes, findPartners_target, el = el)
partners <- sapply(nodes, findPartners_sender, el = el)
if (triad.type == 'undirected' | isFALSE(directed))
partners <- sapply(nodes, findPartners_all, el = el, simplify = FALSE)

tri <-

for (i in nodes[-length(nodes)]) {
for (k in nodes[(which(nodes == i) + 1):length(nodes)]) {
js <- intersect(partners[[i]], partners[[k]])
if(isFALSE(weighted)){
tri[i, k] <- length(js)
next
}
if (length(js) > 0) {
tri[i, k] <-
tri[i, k] + sum(sapply(js, function(j)
}
if (directed & (triad.type == 'directed.incoming' |
tri[i, k] <-
tri[i, k] + sum(sapply(js, function(j)
}
}
}
setTxtProgressBar(pb, which(nodes==i))
}
tri <- tri + t(tri)

## transform zero values
if (!is.null(zero_values)) {
tri[tri == 0] <- zero_values
}

return(tri)
}

findPartners_sender <- function(node, el){
colnames(el) <- c('sender','target')
return(el\$target[el\$sender == node])
}

findPartners_target <- function(node, el){
colnames(el) <- c('sender','target')
return(el\$sender[el\$target == node])
}

findPartners_all <- function(node, el){
unique(c(findPartners_sender(node,el),findPartners_target(node,el)))
}

################################################################################
## graph statistic: homophily
#Function: Creates a matrix for categorical attribute matches between two nodes
# as well as absolute difference effects.

#' Calculate homophily in multi-edge graphs.
#'
#' The function calculates homophily matrices. If you supply a categorical
#' variable (factor, character), the function returns attribute matches for
#' dyads from the same group. If you supply a continuous variable (numeric,
#' integers), the function returns absolute difference effects for each dyad in
#' the graph.
#'
#' @param variable A attribute variable. Can be categorical (attribute matches)
#'   or continuous (absolute difference effects).
#' @param type set to \code{categorical}. Can be set to \code{absdiff} instead.
#'   If set to \code{categorical}, the homophily statistic calculates matches
#'   between dyads from the same group (analogous to dummy variables measuring
#'   attribute match between two nodes (=10) and attribute mismatch (=1)). If
#'   set to \code{absdiff} it calculates the difference in values from variable
#'   for each dyad in the graph.
#' @param nodes optional character/factor vector. If an edgelist is provided, you
#'   have to provide a list of unique identifiers of your nodes in the graph.
#'   This is because in the edgelist, isolates are usually not recorded. If you
#'   do not specify isolates in your nodes object, they are excluded from the
#'   analysis (falsifies data).
#' @param zero_values optional numeric value. Use this to substitute zero-values
#'   in your homophily change statistic matrix. Zero values in the predictors
#'   are recognized in the gHypEG regression as structural zeroes. To ensure this
#'   ideally using a dummy variable fitting an optimal value for the zeroes.
#'   Only useful with absdiff type.
#' @param these.categories.only optional vector specifying the categories to be
#'   used, if only a subset of factor(variable) is needed.
#' @return Homophily change statistic matrix.
#' @author LB, GC
#' @export
#' @examples
#' homop_stat <- homophily_stat(variable = vertexlabels, nodes = rownames(adj_karate))
#' nrm(w=list('homophily'= homop_stat), adj_karate, directed = FALSE, selfloops = FALSE)
homophily_stat <- function(variable = variable,
type = 'categorical',
#type = categorical, absdiff
nodes = nodes,
these.categories.only = NULL,
zero_values = NULL) {
# check inputs
if (is.null(variable)) {
stop(
"You need to specify a homophily variable. Either a categorical (=character, factor) or continous (=numeric, integer) variable."
)
} else{
if (is.character(variable) | is.factor(variable)) {
# check type
if (type == 'absdiff') {
stop("Please use type = categorical for chacacter/factor homophily variables.")
}
# check categories
if (!is.null(these.categories.only)) {
if(!all(these.categories.only %in% unique(variable))){
stop(
"The values you provided in these.categories.only are not subsets of the variable object."
)
}
}
}
}
if (is.null(nodes)) {
stop(
"You need to specify a nodes object. This object lists the nodes in the graph (e.g., their ID)."
)
}
if (type != 'categorical' & type != 'absdiff') {
stop("Homophily type can either be categorical or absolute difference")
}

### Categorical homophily
if (type == 'categorical') {
## create block IDs
labels <- levels(factor(variable))
blockids <- as.numeric(plyr::mapvalues(
variable,
from = labels,
to = c(1, numbers::Primes(length(variable) * 200))[1:(length(unique(variable)))]
))
#TODO: package dependence here!!
#verbose = TRUE:
#print(unique(blockids))
#print(unique(labels))
## create block matrix
blocks <- blockids %*% t(blockids)

## now sort the blocks (according to which values are specified in these.categories.only)
if (is.null(these.categories.only)) {
# create homophily matrix
homophily_mat <-
ifelse(matrix(blocks %in% blockids ^ 2, nrow = length(nodes)), exp(1), 1) #hardcoded e, 1
} else{
#these.categories.only = specified
# which labels should be selected?
selectedIDs <-
numbers::Primes(length(variable))[labels %in% these.categories.only]
homophily_mat <-
ifelse(matrix(blocks %in% selectedIDs ^ 2, nrow = length(nodes)), exp(1), 1) #hardcoded e, 1
}
## label matrix
rownames(homophily_mat) <- nodes
colnames(homophily_mat) <- nodes

} else{
# type = absdiff
### Absolute difference
homophily_mat <-
matrix(0, nrow = length(nodes), ncol = length(nodes))
rownames(homophily_mat) <- nodes
colnames(homophily_mat) <- nodes
##TODO: this is super inefficient (2-for loops), move this to Rcpp
for (i in 1:nrow(homophily_mat)) {
for (j in 1:ncol(homophily_mat)) {
homophily_mat[i, j] <-
abs(variable[nodes %in% nodes[i]] - variable[nodes %in% nodes[j]])
}
}
}

## treat zero-values
if (is.null(zero_values)) {
# homophily_mat[homophily_mat == 0] <-
#   min(homophily_mat[homophily_mat > 0]) / 10
} else{
homophily_mat[homophily_mat == 0] <- zero_values
}

## return matrix with 1/10 or absolute difference
return(homophily_mat)
}

#' Create a dummy variable to encode zero values of another variable.
#'
#' Use this to substitute zero-values in your nrm values. Zero values in the predictors
#' are recognized in the gHypEG regression as structural zeroes. To ensure this
#' ideally using a dummy variable fitting an optimal value for the zeroes.
#' This function takes a predictor that needs to be recoded and returns a list
#' containing two matrices. The first one contains the original predictor
#' recoded such that all zero values are 1 (and thus do not impact the model).
#' The second one consist of a matrix with 1 where the original predictor was
#' different from 0, and `zero_values` where the original predictor was 0.
#' If `zero_values` is not specified, it is fixed to e to simplify the interpretation
#' of the results.
#'
#' @param dat matrix, the predictor for which the zero values should be recoded.
#' @param zero_values optional numeric, the value to assign to the zero values of `dat`
#' in the dummy variable. It defaults to e to simplify the interpretation of the results.
#' @param name optional character, the name of the predictor to create a named list
#'
#' @return a possibly named list of two matrices. The first one is the recoded version of `dat` where all zeroes
#' are changed to 1. The second is the dummy variable such that dummy[dat==0] <- zero_values and 1
#' otherwise.
#' @export
get_zero_dummy <- function(dat, name = NULL, zero_values = NULL){
zero_dummy <- matrix(1, nrow(dat), ncol(dat))
if(all(is.null(zero_values))){
zero_values <- exp(1)
}
zero_dummy[dat==0] <- zero_values
dat[dat==0] <- 1
w <- list(dat,zero_dummy)
if(is.null(name))
return(w)
names(w) <- c(name, paste0(name,'_zeroes'))
return(w)
}
```

## Try the ghypernet package in your browser

Any scripts or data that you put into this service are public.

ghypernet documentation built on Oct. 15, 2021, 5:14 p.m.