Nothing
#' Check graph input type (for whether it's a graph or a edgelist).
#'
#' Returns TRUE if the supplied object \code{graph} is an adjacency matrix. Returns FALSE if the provided object is an edgelist. The function checks whether the edgelist conforms to our standards (sender, target, edgecount).
#'
#' @param graph A graph adjacency matrix or an edgelist.
#' @return TRUE or FALSE. Returns TRUE if the provided object \code{graph} is an adjacency matrix.
checkGraphtype <- function(graph) {
#TODO: add igraph compatibility
# is matrix?
isMatrix <- FALSE
if (dim(graph)[1] == dim(graph)[2]) {
isMatrix <- TRUE
}
# if edgelist = check if 3 columns are there, 2 characters/factors, 1 numeric
if (!isTRUE(isMatrix)) {
if (ncol(graph) != 3) {
stop(
"graph needs to be an adjacecy matrix or an edgelist. The edgelist needs to have exactly 3 columns: sender, target, edgecounts."
)
}
if (!is.character(graph[, 1]) & !is.factor(graph[, 1])) {
stop(
"The first row in your edgelist needs to be a factor/character vector with sender node IDs."
)
}
if (!is.character(graph[, 2]) & !is.factor(graph[, 2])) {
stop(
"The second row in your edgelist needs to be a factor/character vector with target node IDs."
)
}
if (!is.numeric(graph[, 3]) & !is.integer(graph[, 3])) {
stop(
"The third row in your edgelist needs to be a numeric/integer vector with edge counts."
)
}
}
# return:
isMatrix
}
################################################################################
#' Calculate weighted reciprocity change statistics for multi-edge graphs.
#'
#' The function takes either an edgelist or an adjacency matrix and returns an
#' adjacency matrix with the reciprocity change statistic. This reciprocity
#' matrix can then be used as a predictor in the gHypEG regression.
#'
#' @param graph A graph adjacency matrix or an edgelist. The edgelist needs to
#' have 3 columns: a sender vector, a target vector and an edgecount vector.
#' @param nodes optional character/factor vector. If an edgelist is provided,
#' you have to provide a list of unique identifiers of your nodes in the graph.
#' This is because in the edgelist, isolates are usually not recorded.
#' If you do not specify isolates in your nodes object, they are excluded
#' from the analysis (falsifies data).
#' @param zero_values optional numeric value. Use this to substitute zero-values
#' in your reciprocity change statistic matrix. Zero values in the predictors
#' are recognized in the gHypEG regression as structural zeros. To ensure this
#' does not happen, please recode your zero-values in all your predictors,
#' ideally using a dummy variable fitting an optimal value for the zeroes.
#' @return Reciprocity change statistic matrix.
#' @author LB, GC
#' @seealso \code{\link{sharedPartner_stat}} or \code{\link{homophily_stat}}
#' @export
#' @examples
#' recip_stat <- reciprocity_stat(adj_karate)
#' recip_stat_dummy <- get_zero_dummy(recip_stat, name = 'reciprocity')
#' nrm(w=recip_stat_dummy, adj_karate, directed = FALSE, selfloops = FALSE)
#'
reciprocity_stat <- function(graph, nodes = NULL, zero_values = NULL){
## preprocess:
# is graph object a matrix or edgelist?
isMatrix <- checkGraphtype(graph) # returns isMatrix==TRUE/FALSE
# if not isMatrix transform edgelist into matrix
if(!isTRUE(isMatrix)){
el <- graph #bc graph will be overwritten
# check if they provided a list of nodes for the graph (matching attribute files)
if(is.null(nodes)){
nodes <- unique(c(el[,1], el[,2]))
}
#
#transform edgelist to matrix
# save it to object "graph"
graph <- el2adj(el, nodes)
}else{
nodes = rownames(graph)
}
## calculate reciprocity
recip_mat <- t(graph)
## transform zero values
if(is.null(zero_values)){
# recip_mat[recip_mat == 0] <- min(recip_mat[recip_mat > 0])/10
}else{
recip_mat[recip_mat == 0] <- zero_values
}
return(recip_mat)
}
################################################################################
#' Calculate (un-)weighted shared partner change statistics for multi-edge graphs.
#'
#' The function calculates the change statistic for shared partners for each
#' dyad in the graph. Shared partner statistics count for each dyad involving
#' nodes i and j in the graph, how many nodes k these two nodes have in common
#' (or share). The shared partner $k$ counts are weighted by their
#' interactions with the focal nodes $i$ and $j$. This is necessary in
#' dense multi-edge graphs to ensure that meaningful triadic closure is
#' detected. The statistic can be calculated in 3 different forms: undirected,
#' incoming shared partners (where shared partner k: k->i and k->j) and outgoing
#' shared partners (where shared partner k: k<-i and k<-j).
#'
#' @param graph A graph adjacency matrix or an edgelist. The edgelist needs to
#' have 3 columns: a sender vector, a target vector and an edgecount vector.
#' @param weighted set to TRUE.
#' @param triad.type set to \code{undirected}. Can be set to \code{incoming}
#' or \code{outgoing} instead. This then corresponds to directed triadic closure
#' in the multi-edge graph.
#' @param nodes optional character/factor vector. If an edgelist is provided,
#' you have to provide a list of unique identifiers of your nodes in the graph.
#' This is because in the edgelist, isolates are usually not recorded.
#' If you do not specify isolates in your nodes object, they are excluded
#' from the analysis (falsifies data).
#' @param zero_values optional numeric value. Use this to substitute zero-values
#' in your shared partner change statistic matrix. Zero values in the predictors
#' are recognized in the gHypEG regression as structural zeros. To ensure this
#' does not happen, please recode your zero-values in all your predictors,
#' ideally using a dummy variable fitting an optimal value for the zeroes.
#' @param directed boolean. Is the graph directed?
#' @return Shared partner change statistic matrix.
#' @author LB, GC, GV
#' @seealso \code{\link{reciprocity_stat}} or \code{\link{homophily_stat}}
#' @export
#' @import dplyr
#' @importFrom utils setTxtProgressBar txtProgressBar
#' @examples
#' tri_stat <- sharedPartner_stat(adj_karate, directed = FALSE)
#' tri_stat_dummy <- get_zero_dummy(tri_stat, name = 'shared_partners')
#' nrm(w=tri_stat_dummy, adj_karate, directed = FALSE, selfloops = FALSE)
sharedPartner_stat <- function(graph,
directed,
weighted = TRUE,
triad.type = 'undirected',
nodes = NULL,
zero_values = NULL) {
## preprocess:
# is graph object a matrix or edgelist?
isMatrix <- checkGraphtype(graph) # returns isMatrix==TRUE/FALSE
# transform edgelist into matrix
if (isTRUE(isMatrix)) {
adj <- graph
el <- adj2el(adj, directed = directed)
} else{
adj <- el2adj(graph, nodes)
}
if (is.null(nodes))
nodes <- rownames(adj)
# check triad.type
if (!triad.type == 'undirected' &
!triad.type == 'directed.incoming' &
!triad.type == 'directed.outgoing') {
stop(
'triad.type needs to be one of three: "undirected", "directed.incoming" or "directed.outgoing".
Check help files for additional infos.'
)
}
if (!directed & !isSymmetric(adj))
adj <- adj + t(adj)
diag(adj) <- 0
# progressbar
pb = txtProgressBar(
min = 1,
max = nrow(adj) - 1,
initial = 0,
style = 3
)
## now count triangles
if (triad.type == 'directed.outgoing')
partners <- sapply(nodes, findPartners_target, el = el)
if (triad.type == 'directed.incoming')
partners <- sapply(nodes, findPartners_sender, el = el)
if (triad.type == 'undirected' | isFALSE(directed))
partners <- sapply(nodes, findPartners_all, el = el, simplify = FALSE)
tri <-
matrix(0, nrow(adj), ncol(adj), dimnames = list(rownames(adj), rownames(adj)))
for (i in nodes[-length(nodes)]) {
for (k in nodes[(which(nodes == i) + 1):length(nodes)]) {
js <- intersect(partners[[i]], partners[[k]])
if(isFALSE(weighted)){
tri[i, k] <- length(js)
next
}
if (length(js) > 0) {
if (triad.type == 'directed.outgoing' |
triad.type == 'undirected') {
tri[i, k] <-
tri[i, k] + sum(sapply(js, function(j)
adj[j, i] * adj[j, k] / max(adj[j, i], adj[j, k], 1)))
}
if (directed & (triad.type == 'directed.incoming' |
triad.type == 'undirected')) {
tri[i, k] <-
tri[i, k] + sum(sapply(js, function(j)
adj[i, j] * adj[k, j] / max(adj[i, j], adj[k, j], 1)))
}
}
}
setTxtProgressBar(pb, which(nodes==i))
}
tri <- tri + t(tri)
## transform zero values
if (!is.null(zero_values)) {
tri[tri == 0] <- zero_values
}
return(tri)
}
findPartners_sender <- function(node, el){
colnames(el) <- c('sender','target')
return(el$target[el$sender == node])
}
findPartners_target <- function(node, el){
colnames(el) <- c('sender','target')
return(el$sender[el$target == node])
}
findPartners_all <- function(node, el){
unique(c(findPartners_sender(node,el),findPartners_target(node,el)))
}
################################################################################
## graph statistic: homophily
#Function: Creates a matrix for categorical attribute matches between two nodes
# as well as absolute difference effects.
#' Calculate homophily in multi-edge graphs.
#'
#' The function calculates homophily matrices. If you supply a categorical
#' variable (factor, character), the function returns attribute matches for
#' dyads from the same group. If you supply a continuous variable (numeric,
#' integers), the function returns absolute difference effects for each dyad in
#' the graph.
#'
#' @param variable A attribute variable. Can be categorical (attribute matches)
#' or continuous (absolute difference effects).
#' @param type set to \code{categorical}. Can be set to \code{absdiff} instead.
#' If set to \code{categorical}, the homophily statistic calculates matches
#' between dyads from the same group (analogous to dummy variables measuring
#' attribute match between two nodes (=10) and attribute mismatch (=1)). If
#' set to \code{absdiff} it calculates the difference in values from variable
#' for each dyad in the graph.
#' @param nodes optional character/factor vector. If an edgelist is provided, you
#' have to provide a list of unique identifiers of your nodes in the graph.
#' This is because in the edgelist, isolates are usually not recorded. If you
#' do not specify isolates in your nodes object, they are excluded from the
#' analysis (falsifies data).
#' @param zero_values optional numeric value. Use this to substitute zero-values
#' in your homophily change statistic matrix. Zero values in the predictors
#' are recognized in the gHypEG regression as structural zeroes. To ensure this
#' does not happen, please recode your zero-values in all your predictors,
#' ideally using a dummy variable fitting an optimal value for the zeroes.
#' Only useful with absdiff type.
#' @param these.categories.only optional vector specifying the categories to be
#' used, if only a subset of factor(variable) is needed.
#' @return Homophily change statistic matrix.
#' @author LB, GC
#' @seealso \code{\link{reciprocity_stat}} or \code{\link{sharedPartner_stat}}
#' @export
#' @examples
#' homop_stat <- homophily_stat(variable = vertexlabels, nodes = rownames(adj_karate))
#' nrm(w=list('homophily'= homop_stat), adj_karate, directed = FALSE, selfloops = FALSE)
homophily_stat <- function(variable = variable,
type = 'categorical',
#type = categorical, absdiff
nodes = nodes,
these.categories.only = NULL,
zero_values = NULL) {
# check inputs
if (is.null(variable)) {
stop(
"You need to specify a homophily variable. Either a categorical (=character, factor) or continous (=numeric, integer) variable."
)
} else{
if (is.character(variable) | is.factor(variable)) {
# check type
if (type == 'absdiff') {
stop("Please use type = categorical for chacacter/factor homophily variables.")
}
# check categories
if (!is.null(these.categories.only)) {
if(!all(these.categories.only %in% unique(variable))){
stop(
"The values you provided in these.categories.only are not subsets of the variable object."
)
}
}
}
}
if (is.null(nodes)) {
stop(
"You need to specify a nodes object. This object lists the nodes in the graph (e.g., their ID)."
)
}
if (type != 'categorical' & type != 'absdiff') {
stop("Homophily type can either be categorical or absolute difference")
}
### Categorical homophily
if (type == 'categorical') {
## create block IDs
labels <- levels(factor(variable))
blockids <- as.numeric(plyr::mapvalues(
variable,
from = labels,
to = c(1, numbers::Primes(length(variable) * 200))[1:(length(unique(variable)))]
))
#TODO: package dependence here!!
#verbose = TRUE:
#print(unique(blockids))
#print(unique(labels))
## create block matrix
blocks <- blockids %*% t(blockids)
## now sort the blocks (according to which values are specified in these.categories.only)
if (is.null(these.categories.only)) {
# create homophily matrix
homophily_mat <-
ifelse(matrix(blocks %in% blockids ^ 2, nrow = length(nodes)), exp(1), 1) #hardcoded e, 1
} else{
#these.categories.only = specified
# which labels should be selected?
selectedIDs <-
numbers::Primes(length(variable))[labels %in% these.categories.only]
homophily_mat <-
ifelse(matrix(blocks %in% selectedIDs ^ 2, nrow = length(nodes)), exp(1), 1) #hardcoded e, 1
}
## label matrix
rownames(homophily_mat) <- nodes
colnames(homophily_mat) <- nodes
} else{
# type = absdiff
### Absolute difference
homophily_mat <-
matrix(0, nrow = length(nodes), ncol = length(nodes))
rownames(homophily_mat) <- nodes
colnames(homophily_mat) <- nodes
##TODO: this is super inefficient (2-for loops), move this to Rcpp
for (i in 1:nrow(homophily_mat)) {
for (j in 1:ncol(homophily_mat)) {
homophily_mat[i, j] <-
abs(variable[nodes %in% nodes[i]] - variable[nodes %in% nodes[j]])
}
}
}
## treat zero-values
if (is.null(zero_values)) {
# homophily_mat[homophily_mat == 0] <-
# min(homophily_mat[homophily_mat > 0]) / 10
} else{
homophily_mat[homophily_mat == 0] <- zero_values
}
## return matrix with 1/10 or absolute difference
return(homophily_mat)
}
#' Create a dummy variable to encode zero values of another variable.
#'
#' Use this to substitute zero-values in your nrm values. Zero values in the predictors
#' are recognized in the gHypEG regression as structural zeroes. To ensure this
#' does not happen, please recode your zero-values in all your predictors,
#' ideally using a dummy variable fitting an optimal value for the zeroes.
#' This function takes a predictor that needs to be recoded and returns a list
#' containing two matrices. The first one contains the original predictor
#' recoded such that all zero values are 1 (and thus do not impact the model).
#' The second one consist of a matrix with 1 where the original predictor was
#' different from 0, and `zero_values` where the original predictor was 0.
#' If `zero_values` is not specified, it is fixed to e to simplify the interpretation
#' of the results.
#'
#' @param dat matrix, the predictor for which the zero values should be recoded.
#' @param zero_values optional numeric, the value to assign to the zero values of `dat`
#' in the dummy variable. It defaults to e to simplify the interpretation of the results.
#' @param name optional character, the name of the predictor to create a named list
#'
#' @return a possibly named list of two matrices. The first one is the recoded version of `dat` where all zeroes
#' are changed to 1. The second is the dummy variable such that dummy[dat==0] <- zero_values and 1
#' otherwise.
#' @export
#' @seealso \code{\link{reciprocity_stat}} or \code{\link{sharedPartner_stat}}
get_zero_dummy <- function(dat, name = NULL, zero_values = NULL){
zero_dummy <- matrix(1, nrow(dat), ncol(dat))
if(all(is.null(zero_values))){
zero_values <- exp(1)
}
zero_dummy[dat==0] <- zero_values
dat[dat==0] <- 1
w <- list(dat,zero_dummy)
if(is.null(name))
return(w)
names(w) <- c(name, paste0(name,'_zeroes'))
return(w)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.