#
# Copyright (C) 2017 José Tomás Atria <jtatria at gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Functions for creating and manipulating semantic networks #
# Construction and pruning --------------------------------------------------------------------
#' @export
#' @importFrom Matrix isSymmetric
#' @importFrom igraph graph_from_adjacency_matrix E edge_attr
graph_make <- function(
X, ls=rep( TRUE, nrow( X ) ), fs=rep( TRUE, ncol( X ) ), sim.func=NULL,
edge.mode=c( 'auto', 'directed', 'undirected' ), edge.normalize=TRUE, allow.loops=TRUE,
prune.tol=1, prune.sort=NULL,
vertex.data=NULL,
cluster=TRUE, cluster.func=function( G ) igraph::cluster_louvain( igraph::as.undirected( G ) ),
cluster.contribs=FALSE,
verbose=FALSE,
...
) {
X <- if( is.null( sim.func ) ) {
X[ls,ls]
} else {
X[ls,fs] %>% sim.func()
}
edge.mode <- match.arg( edge.mode )
edge.mode <- ifelse( edge.mode == 'auto',
ifelse( Matrix::isSymmetric( unname( X ) ),
'undirected',
'directed'
),
edge.mode
)
G <- X %>% igraph::graph_from_adjacency_matrix(
mode=edge.mode, diag=allow.loops, weighted=TRUE
)
edges <- if( is.null( prune.sort ) ) {
igraph::E( G )[ order( igraph::edge_attr( G, 'weight' ), decreasing=TRUE ) ]
} else if( is.function( prune.sort ) ) {
igraph::E( G )[ order( prune.sort( G, ... ), decreasing=TRUE ) ]
}
G %<>% graph_prune( edges=edges, tol=prune.tol, desc=desc, drop.vertices=TRUE, verbose=verbose )
if( edge.normalize ) {
if( !is.null( igraph::edge_attr( G, 'weight' ) ) ) {
w <- igraph::edge_attr( G, 'weight' )
igraph::edge_attr( G, 'weight' ) <- w / max( w )
} else {
warning( 'Can\'t normalize weights in unweighted graph!' )
}
}
if( !is.null( vertex.data ) ) G %<>% graph_add_vertex_data( vertex.data )
if( cluster ) {
K <- graph_cluster( G, cluster.func=cluster.func )
G %<>% graph_add_cluster_data( K, contribs=cluster.contribs, verbose=verbose )
}
obj <- structure( list(
X=X, G=G, K=K,
params=list(
prune.sort = prune.sort,
sim.func = sim.func,
prune.tol = prune.tol,
edge.normalize = edge.normalize,
allow.loops = allow.loops
)
), class='semnet' )
return( obj )
}
#' Threshold pruning maintaining connectivity
#'
#' This function will attempt to remove as many edges as possible following some order of edge
#' significance, while maintaining total graph connectivity up to the given vertex dettachment
#' tolerance.
#'
#' Different prunning strategies can be implemented via different edge sorting orders according to
#' some edge significance criterion. Naive thresholding will consider each edge's weight as its
#' significance.
#'
#' Internally, this function will try to find an optimum significance threshold through a pivot
#' search over the entire significance range. If \code{verbose} is \code{TRUE}, the progress of the
#' pivot search is printed to output to show the algorithm's optimization process; this is
#' sometimes useful to analyze degenerate results.
#'
#' NB: be advised that this strategy may fail for some graphs, e.g. if all vertices dettach from
#' the main component in components smaller than the accepted tolerance, the algorithm will happily
#' proceed to delete all edges in the graph. Lowering the connectivity threshold may sometimes help,
#' but in most cases a different edge sorting strategy is needed. See \link{graph_edge_score}.
#'
#' @param g An igraph graph.
#' @param edges A vector of edges, sorted according to their significance.
#' @param tol An integer indicating the maximum allowed disconnected component size.
#' @param attr An edge attribute name, used to sort edges if no edge list provided. Ignored if
#' edges is not NULL. Deafults to 'weight' if it is.
#' @param desc Logical, indicating whether edges should be sorted in descending order according to
#' attr. Ignored if edges is not NULL, defaults to TRUE if it is.
#' @param drop.vertices Logical. Whether vertices in disconnected components below \code{tol}
#' should be dropped from the graph. Defaults to TRUE.
#'
#' @return A subgraph of g with the least significant edges removed.
#'
#' @export
#' @importFrom igraph V E edge_attr components
graph_prune <- function(
g, edges=NULL, tol=1, attr='weight', desc=TRUE, drop.vertices=TRUE, verbose=FALSE
) {
chk_igraph( g )
if( is.null( edges ) ) {
edges <- E( g )[ order( edge_attr( g, attr ), decreasing=desc ) ]
}
nv <- length( igraph::V( g ) )
ne <- length( edges )
if( verbose ) message( sprintf( 'Pruning graph to max dettached component size %d', tol ) )
head <- 1
tail <- length( edges )
while( tail - head >= 1 ) { # run until there are no more edges between tail and head
pivot <- ceiling( head + ( tail - head ) / 2 ) # find middle edge between tail and head
# test for connectedness after deleting all edges above pivot
if( graph_connected( g - edges[ pivot:length( edges ) ], tol=tol ) ) {
# if connected, set pivot to tail. if tail is aleady pivot, set head to pivot.
if( verbose ) bins_prog( head, tail, pivot, length( edges ), lab=' connected ' )
if( tail == pivot ) head <- pivot
tail <- pivot
} else {
# if not connected, set pivot to head. if head is aleady pivot, set tail to pivot.
if( verbose ) bins_prog( head, tail, pivot, length( edges ), lab='not connected' )
if( head == pivot ) tail <- pivot
head <- pivot
}
}
out <- g - edges[ tail:length( edges ) ]
if( drop.vertices ) {
cmps <- components( out )
out <- out - V( out )[ cmps$membership %in% which( cmps$csize <= tol ) ]
}
if( verbose ) message( sprintf(
"Edge pruning removed %4.2f%% of edges and dropped %4.2f%% of vertices",
( ( ne - length( igraph::E( out ) ) ) / ne ) * 100,
( ( nv - length( igraph::V( out ) ) ) / nv ) * 100
) )
return( out )
}
#' Determine graph connectivity up to the given tolerance
#'
#' This function will return true if the maximum component size of all components detached from the
#' largest component is greater or equal to the given tolerance. E.g. a tolerance of one implies
#' that a graph will be considered connected if all minor components are at most orphan nodes,
#' a tolerance of 2 implies the same if all minor components are at most only dyads, etc.
#'
#' This function will call \code{igraph::components( g )} if no value is given for \code{cmps}.
#' \emph{This can be extremely slow}, but so far I have not been able to find or produce
#' a faster component determination strategy.
#' If you know of one, please contact me (e.g. if you know how to compute matrix kernels in less
#' than \eqn{O(n^3)} )
#'
#' @param g An igraph graph.
#' @param tol An integer indicating the maximum size of detached components. Defaults to 1.
#' @param cmps An igraph components object. Will be computed over g if none given.
#'
#' @return \code{TRUE} if the given graph has no detached component larger than tol, \code{FALSE}
#' otherwise.
#'
#' @export
graph_connected <- function( g, tol=1, cmps=igraph::components( g ) ) {
if( cmps$no == 1 ) return( TRUE )
if( cmps$no > 1 && tol == 0 ) return( FALSE )
if( max( cmps$csize[ -which.max( cmps$csize ) ] ) > tol ) {
return( FALSE )
} else {
return( TRUE )
}
}
#' Add vertex data from the given data frame
#'
#' This function will add all the values contained in the given data frame as vertex attributes,
#' retaining variable names as attribute names, optionally prefixed by the given \code{pref} string.
#'
#' Igraph does not accept all of \R's data types as attribute values, so some types are coerced
#' without information loss: logical values are converted to integers and factors are replaced with
#' their level names as strings. All other values are left unchanged.
#'
#' @param g An igraph object.
#' @param vdf A valid lexical data frame with vertex data. See \link{lexical_dataset}.
#' @param pref An optional prefix for attribute names.
#'
#' @return \code{g}, with data from vdf added as vertex attributes.
#'
#' @export
graph_add_vertex_data <- function( g, vdf, pref='' ) {
data <- term_data( vdf, names( V( g ) ) )
for( n in names( data ) ) {
igraph::vertex_attr( g, pref %.% n ) <- if( is.logical( ( v <- data[[n]] ) ) ) {
as.integer( v )
} else if( is.factor( v ) ) {
as.character( v )
} else {
v
}
}
return( g )
}
#' Add cluster data to vertices and edges
#'
#' This function will add cluster data from the given \code{cms} igraph communities object as
#' vertex and edge attributes.
#'
#' Edge attributes added by this function consist of a logical value named 'xing' indicating wether
#' the respective edge crosses community boundaries or not, as well as a numeric "cluster weight"
#' value named the same as the given \code{eweight} name with a '_c' suffix, equal to the product
#' of the respective edge's value in the attribute \code{eweight} attribute and the value of
#' \code{intra_factor} if the edge is internal to a community and 1 otherwise.
#'
#' Vertex attributes added by this function consist of the vertex's community membership as an
#' integer value named 'comm'. If \code{contrib} is \code{TRUE}, then cluster-vertex contribution
#' scores will be computed and also added as numeric values named 'wgt_c2v' and 'wgt_v2c' for
#' cluster-vertex contribution and vertex-cluster contribution scores, respectively. NB: this
#' computation can be very slow for large networks.
#'
#' @param g An igraph object.
#' @param cms An igraph communities object.
#' @param intra_factor Factor to multiply edge weights for non-community crossing edges.
#' Defaults to 10.
#' @param eweight Name for the input edge weight attribute. Defaults to 'weight'.
#' @param pref An optional prefix that will be added to all attribute names.
#' @param contribs Logical. Add cluster-vertex contribution scores. Defaults to \code{FALSE}
#' @param quiet Logical. Suppress all messages. Defaults to \code{FALSE}.
#'
#' @return \code{g} with vertex and edge attributes containing relevant data from \code{cms}.
#'
#' @export
graph_add_cluster_data <- function(
g, cms, intra_factor=10, eweight='weight', pref=NULL, contribs=FALSE, verbose=FALSE
) {
pref <- if( is.null( pref ) || pref == '' ) '' else pref %.% '_'
# edges
x <- igraph::crossing( cms, g )
igraph::edge_attr( g, pref %.% 'xing' ) <- x %>% as.integer()
w <- igraph::edge_attr( g, eweight )
igraph::edge_attr( g, pref %.% eweight %.% '_c' ) <- w * ifelse( x, intra_factor, 1 )
# vertices
k <- igraph::membership( cms )
igraph::vertex_attr( g, pref %.% 'comm' ) <- k
if( contribs ) {
if( verbose ) message( 'Computing cluster-vertex weights. This may a take a bit...' )
igraph::vertex_attr( g, pref %.% 'wgt_v2c' ) <- graph_cluster_contribs( g, k, mode='vc' )
igraph::vertex_attr( g, pref %.% 'wgt_c2v' ) <- graph_cluster_contribs( g, k, mode='cv' )
}
return( g )
}
#' Create signature for vertex sets.
#'
#' This function will create a unique value for each of the vertex sets defined in the given vector
#' k, by applying the given signature function to the vertex attributes contained in the given
#' graph to the set of topn elements in each set in k, filtered by fltr, sorted according to the
#' results of the given score function.
#'
#' By default, this function will filter by pos to select only nouns, will rank vertices according
#' to the product of their cluster contribution scores, and concantenate the term of the top 10
#' vertices in each cluster.
#'
#' @param g An igraph graph
#' @param fltr A vector to filter vertices in \code{g} before computing scores or sorting.
#' Defaults to \code{ pos == 'NN' | pos == 'NP'}, i.e. nouns.
#' @param k A membership vector with sets to compute signatures for. Defaultf to 'comm',
#' i.e. communities.
#' @param score.func A function to compute vertex scores in each set in \code{k} from the
#' vertex attributes available in \code{g}. Defaults to the product of the
#' cluster-vertex contribution scores. See \link{graph_cluster_contribs}.
#' @param desc Logical. Sort by the value of \code{score.func} in descending order. Defaults
#' to TRUE.
#' @param topn Integer. The number of vertices to include in the signature. Defaults to 10.
#' Set to NA to include all vertices.
#' @param sig.func A signature function that will receive the vertex attribute values for the
#' \code{topn} members in each set, and should return a character value to use as
#' set signature. Defaults to the concatenation of the vertices' terms.
#'
#' @export
graph_make_set_sigs <- function( g,
fltr=( igraph::vertex_attr( g )$pos == 'NN' | igraph::vertex_attr( g )$pos == 'NP' ),
k=igraph::vertex_attr( g, 'comm' ), score=function( df ) df$wgt_v2c * df$wgt_c2v, desc=TRUE,
topn=10, sig.func=function( df ) df$term %>% paste( collapse=' ' )
) {
df <- g %>% gr$vattr() %>% as.data.frame()
df <- df[ fltr, ]
df$crank <- vapply( 1:nrow( df ), function( r ) {
score( df[ r, ] )
}, 0.0 )
topn <- ifelse( is.na( topn ), Inf, topn )
sigs <- vapply( unique( k ), function( ki ) {
rows <- df[ k[fltr] == ki, ][ order( df[ k[fltr] == ki, ]$crank, decreasing=desc ), ]
rows[ 1:min( nrow( rows ), topn ), ] %>%
sig.func() %>% return()
}, '' )
return( sigs )
}
# Clustering and cluster scores ---------------------------------------------------------------
#' Detect communities using the given clustering function.
#'
#' Constructs an igraph communities object by applying the given \code{clust_func} community
#' detection function over the given \code{g} igraph object.
#'
#' \code{clust_func} must follow igraph's clustering functions API, i.e. take a graph object as
#' input and produce an igraph 'communities' object as output.
#'
#' @param g An igraph graph.
#' @param cluster_func A function implementing a clustering algorithm. Defaults to
#' \link{igraph::clustr_louvain}.
#' @param undirected Logical. Ignore edge directionality, Defalts to TRUE.
#' @param quiet Logical. Suppress progress messages. Defaults to FALSE.
#' @param ... Additional parameters passed to \code{clust_func}.
#'
#' @return An igraph communities object with the clustering results.
#'
#' @export
graph_cluster <- function(
g, cluster.func=igraph::cluster_louvain, undirected=TRUE, verbose=FALSE, ...
) {
if( undirected ) {
g <- igraph::as.undirected( g )
}
s <- proc.time()
c <- cluster.func( g, ... )
e <- proc.time()
if( verbose ) message( sprintf(
"Communities extracted in %8.4f seconds: %d groups, %6.4f mod.",
( e - s )[3], length( c ), igraph::modularity( c )
) )
return( c )
}
#' Community and neighbourhood contributions
#'
#' This function computes the contributions of either a vertex's neighbourhood to its enclosing
#' community or a vertex's community to its sorrounding neighbourhood.
#'
#' Contributions are computed for each vertes as a ratio of a measure function of a numerator
#' set equal to a vertex's non-community crossing edges over a measure function over a denominator
#' set equal to a) all of its incident edges in the case of cluster to neighbourhood contribution
#' and b) all of its enclosing community in the case of neighbourhood to cluster contributions.
#'
#' The mode parameters controls which direction is computed, 'vc' computes negihborhood to cluster
#' contribution, 'cv' computes cluster to neighbourhood contributions.
#'
#' @param g An igraph graph or dense adjacency matrix.
#' @param k A vertex-cluster membership vector.
#' @param mode The direction of the measure; one of 'vc' or 'cv'. See details.
#' @param matrix Logical. Return vertex-cluster matrix instead of a score vector. Useful for
#' cluster similarity computations. Defaults to \code{FALSE}
#'
#' @return If \code{matrix} is \code{FALSE}, a vector of length equal to the number of vertices in
#' the given vset with the requested contribution measure values for each vertex in its
#' assigned community.
#' If \code{matrix} is \code{TRUE}, a matrix with as many rows as vertices in \code{vset}
#' and as many columns as unique values in \code{k}.
#'
#' @export
graph_cluster_contribs <- function( ... ) {
UseMethod( "graph_cluster_contribs" )
}
#' @export
graph_cluster_contribs.igraph <- function( g, ..., weight='weight' ) {
m <- igraph::as_adj( g, type='both', attr=weight, sparse=FALSE )
graph_cluster_contribs( m, ... )
}
#' @export
graph_cluster_contribs.matrix <- function( m, k, mode=c('cv','vc'), as.matrix=FALSE ) {
mode <- match.arg( mode )
cwt <- switch( match.arg( mode ), cv=c2v_contrib( m, k ), vc=v2c_contrib( m, k ) )
if( as.matrix ) {
ks <- unique( k )
out <- matrix( NA, nrow=length( V( g ) ), ncol=length( ks ) )
for( i in 1:length( ks ) ) {
out[,i] <- ifelse( k == ks[i], cwt, 0 )
}
} else {
out <- cwt
}
return( out )
}
# Edge scoring --------------------------------------------------------------------------------
#' Compute edge significance scores with the given function
#'
#' This function will apply \code{score.func} over all edges in a graph, passing as parameters each
#' edge's weight, the total incoming strength of its head vertex, the total outgoing strength of
#' its tail vertex and the total weight in the graph, for e.g. compuation of some probabilistic
#' edge significance measure. See \link{graph_edge_score_ident} for a null implementation that
#' simply returns the weight; See \link{graph_edge_score_mlf} for an implemenation of Dianati's
#' Marginal Likelihood Filter.
#'
#' TODO: fast matrix -> igraph edge attribute mappings would allow for considerable speed-up.
#' TODO: get rid of igraph
#'
#' @param g An igraph graph.
#' @param score.func An edge scoring function with signature
#' \code{function( w, head_s, tail_S, total_w )} giving edge's weight, total head
#' vertex strength, total tail vertex strength and total graph weigth.
#' @param src.attr An attribute to read edge weights from. Defaults to \code{'weight'}.
#' @param tgt.attr A target attribute to write scores to. Defaults to \code{NULL}: don't write
#' scores back to graph.
#'
#' @return A vector of edge scores.
#'
#' @export
graph_edge_score <- function(
g, score.func=graph_edge_score_mlf, src.attr='weight', tgt.attr=NULL
) {
edges <- igraph::E( g )
heads <- igraph::head_of( g, edges )
tails <- igraph::tail_of( g, edges )
head_s <- igraph::strength( g, heads, mode='in' )
tail_s <- igraph::strength( g, heads, mode='out' )
weights <- igraph::edge_attr( g, src.attr )
res <- score.func( weights, head_s, tail_s, sum( weights ) )
if( !is.null( tgt.attr ) ) {
edge_attr( g, tgt.attr ) <- res
}
return( res )
}
#' No-op edge scoring function.
#'
#' Does nothing: score edges according to their given weight \code{w}.
#'
#' @param w A vector of edge weights. Returned as is.
#' @param head_s A vector of edge's head vertices' total strength. Ignored.
#' @param tail_s A vector of edge's tail vertices' total strength. Ignored.
#' @param total_w The total weight of edges in the source graph. Ignored.
#'
#' @return This is a no-op function that returns \code{w}.
#'
#' @export
graph_edge_score_ident <- function( w, head_s, tail_s, total ) {
return( w )
}
#' Navid Dianati's Marginal Likelihood Filter.
#'
#' Computes edge significance as the p-value associated to the given edge weights as the
#' outcome of a set of \code{total_w} Bernoulli trials with probability equal to
#' \eqn{head_s * tail_s / total_w ^2}.
#'
#' Integer-valued edge weight vectors \code{w} will use the binomial distribution with parameters
#' \eqn{n} equal to \code{w}, \eqn{p} equal to \code{(h_s*t_s)/total} and \eqn{n} equal to
#' \code{total}.
#'
#' Numeric-valued edge weight vectors \code{w} will use the continuity corrected approximation with
#' a normal distribution with parameters \eqn{x} equal to \code{w+1/2}, \eqn{\mu} equal to
#' \code{(h_s*t_s)} and \eqn{\sigma} equal to \code{((h_s*t_s)/total)*(1-(h_s*t_s)/total)}.
#'
#' TODO: Citation needed.
#'
#' @param w A vector of edge weights.
#' @param h_s A vector of edge's head vertices' total strength.
#' @param t_s A vector of edge's tail vertices' total strength.
#' @param total The total weight of edges in the source graph.
#'
#' @return A real-valued vector with the significance score associated to each vertex by the
#' marginal likelihood filter model.
#'
#' @export
graph_edge_score_mlf <- function( w, h_s, t_s, total ) {
UseMethod( 'graph_edge_score_mlf' )
}
#' @export
graph_edge_score_mlf.integer <- function( w, h_s, t_s, total ) {
p <- ( h_s / total ) * ( t_s / total )
n <- sum( w )
r <- pbinom( w, n, p, lower.tail=FALSE )
return( r )
}
#' @export
graph_edge_score_mlf.numeric <- function( w, h_s, t_s, total ) {
p <- ( h_s / total ) * ( t_s / total )
n <- sum( w )
r <- pnorm( w + 1/2, mean=n*p, sd=sqrt( n*p*( 1 - p ) ) )
return( r )
}
# Cluster alignment ---------------------------------------------------------------------------
#' Cluster alignment
#'
#' This function will compute a distance matrix for the clusters found in two different community
#' extraction results by first computing a vector of vertex weights for each cluster in each
#' solution and then computing a distance matrix between the clusters on both solutions based on
#' these vertex weight vectors.
#'
#' The contrib_func parameter must be a weighting function of the form f( g, c ) -> v, where g is
#' a graph, c is a communities object and the returned value v is a vector of weights for all
#' vertices in g. See graph_cluster_contrib for an example.
#'
#' The dist_func parameter must be a vector distance function of the form f( v1, v2 ) -> x, where
#' v1 and v2 are weight vectors, and x is a scalae value. See any of the similarity and divergence
#' functions in this package for examples.
#'
#' If the given cms1, cms2 objects come from different graphs (i.e. from different time periods,
#' etc), then both graphs must be supplied. If no second graph is supplied, it will be assumed
#' that both community solutions come from the same graph.
#'
#' The label_format parameter must be a format string containing two "\%d" format specs, no more
#' and no less. The first will be used for the cluster solution index (1 or 2), while the second
#' will be used for the cluster index within each solution. See the default value for an example.
#'
#' @param g1 An igraph object.
#' @param cms1 An igraph communities object, produced from g1.
#' @param cms2 An igraph communities object, produced from g1 or from g2 if given.
#' @param g2 An optional, second igraph object if cms2 is not from g1. NULL by default.
#' @param label_format A format sting to name rows and cols in the returned matrix. Defaults to
#' "c\%d_k\%d".
#' @param vid Vertex attribute name for vertex ids, required if aligning across different
#' graphs.
#' @param fill Logical indicating how to align cluster vertex vectors if aligning across
#' different graphs. TRUE (the default) means use vertex union. FALSE will use
#' vertex intersection.
#' @param contrib_func A function to compute vertex weights for a cluster.
#' @param dist_func A function to compute distances between vertex weight vectors.
#' @param ... Additional parameters passed to contrib_func.
#'
#' @return A matrix with as many rows as clusters in cms1 and as many columns as clusters in
#' cms2 with the results of the pairwise application of dist_func.
#'
#' @export
graph_align_clusters <- function(
g1, cms1, cms2, g2=NULL,
label_format="c%d_k%d", vid="name", fill=TRUE,
contrib_func=graph_cluster_contrib_matrix, dist_func=dist_hellinger, ...
) {
filt <- if( !is.null( g2 ) ) {
if( is.null( igraph::vertex_attr( g1, vid ) ) || is.null( igraph::vertex_attr( g2, vid ) ) ) {
stop( "Invalid name for vertex id attr, needed for cross-graph alignment" )
}
if( fill ) { # union
unique( c( igraph::vertex_attr( g1, vid ), igraph::vertex_attr( g2, vid ) ) )
} else { # intersection
igraph::vertex_attr( g1, vid ) %in% igraph::vertex_attr( g2, vid )
}
} else {
igraph::V( g1 ) %>% as.integer()
}
g2 <- if( is.null( g2 ) ) g1 else g2
m1 <- contrib_func( g1, cms1, ... )
m2 <- contrib_func( g2, cms2, ... )
rownames( m1 ) <- igraph::vertex_attr( g1, vid )
rownames( m2 ) <- igraph::vertex_attr( g2, vid )
out <- matrix( NA, nrow=ncol( m1 ), ncol=ncol( m2 ) )
for( i in 1:nrow( out ) ) {
for( j in 1:ncol( out ) ) {
v1 <- m1[,i][filt] %>% ifelse( is.na( . ), 0, . )
v1 <- m2[,j][filt] %>% ifelse( is.na( . ), 0, . )
out[i,j] = dist_func( v1, v2 )
}
}
if( !is.na( label_format ) ) {
rownames( out ) <- sprintf( label_format, 1, 1:ncol( m1 ) )
colnames( out ) <- sprintf( label_format, 2, 1:ncol( m2 ) )
}
return( out )
}
#' Create bipartite graph from a cluster alignment matrix.
#'
#' This function will transform a matrix containing the result of a cluster alignment call into a
#' bipartite graph.
#'
#' @param m A cluster alignment matrix.
#'
#' @return An igraph object containing a bipartite graph
#'
#' @export
#'
#' @importFrom igraph graph_from_adjacency_matrix vertex_attr
graph_alignment_graph <- function( m ) {
k1 = nrow( m ); k2 = ncol( m )
gm <- matrix( 0.0, nrow=k1+k2, ncol=k1+k2 )
rownames( gm ) <- c( rownames( m ), colnames( m ) )
colnames( gm ) <- c( rownames( m ), colnames( m ) )
gm[ 1:k1, ( k1 + 1 ):ncol( gm ) ] <- m
gm[ ( k1 + 1 ):ncol( gm ), 1:k1 ] <- t( m )
g <- igraph::graph_from_adjacency_matrix( gm, weighted=TRUE )
igraph::vertex_attr( g, "type" ) <- c( rep( TRUE, k1 ), rep( FALSE, k2 ) )
return( g )
}
# Non-export utility functions ----------------------------------------------------------------
#' @importFrom igraph is.igraph
chk_igraph <- function( g ) {
if( !igraph::is.igraph( g ) ) {
stop( 'g is not an igraph graph!' )
}
}
chk_comm <- function( comm ) {
if( !'communities' %in% class( comm ) ) {
stop( 'comm is not an igraph communities!' )
}
}
bins_prog <- function( head, tail, pivot, range, lab, w=60 ) {
lo <- ( ( ( 0 + head ) / range ) * w ) %>% floor
s1 <- ( ( ( pivot - head ) / range ) * w ) %>% floor
s2 <- ( ( ( tail - pivot ) / range ) * w ) %>% floor
hi <- ( ( ( range - tail ) / range ) * w ) %>% floor
message( sprintf( sprintf( "[%%s]: |%%%ds|", w + 1 ),
lab,
paste( c(
rep( ' ', lo ),
rep( '-', s1 ),
"|",
rep( '-', s2 ),
rep( ' ', hi )
), collapse='' )
) )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.