#' Vertex Nomination via Seeded Graph Matching
#'
#' Finds the seeds in an \eqn{h}-hop induced nbd of \eqn{G_1} around the
#' VOI, x,that is, finds induced subgraph generated by vertices that are
#' within a path of length \eqn{h} the VOI, and then finds an \eqn{ell}-hop
#' induced nbd of \eqn{G_1} around the seeds within the \eqn{h}-hop nbd of
#' x, and an \eqn{ell}-hop induced nbd of \eqn{G_2} around the corresponding
#' seeds. Then, matches these induced subgraphs via \code{multiStart}.
#'
#' @param x vector of indices for vertices of interest (voi) in \eqn{G_1}
#' @param seeds a numeric matrix, the \eqn{m \times 2}{m*2} matching vertex table,
#' where m is the number of seeds.
#' @param g1 \eqn{G_1} in \code{igraph} object where voi is known
#' @param g2 \eqn{G_2} in \code{igraph}
#' @param h \eqn{h}-hop for distance from voi to other vertices to create
#' \eqn{h}-hop induced subgraph of \eqn{G_1}
#' @param ell \eqn{ell}-hop for distance from seeds to other vertices
#' to create \eqn{ell}-hop induced subgraph of \eqn{G_1}
#' @param R number of restarts for \code{multiStart}
#' @param gamma to be used with \code{multiStart}, max tolerance
#' for alpha, how far away from the barycenter user is willing to go for
#' the initialization of \code{sgm} on any given iteration
#' @param maxiter the number of maxiters for the Frank-Wolfe algorithm.
#' @param pad a scalar value for padding for sgm (cases where two graphs
#' have different number of vertices); defaulted to 0
#' @param verbose logical verbose outputs
#' @param plotF boolean to plot the probability matrix
#'
#' @return \code{VOI} Vertex of Interest
#' @return \code{seeds} \code{s} seeds
#' @return \code{Sx1} \eqn{S_x} the seeds within an h-path, i.e. in the
#' h-neighborhood, of VOI x in \eqn{G_1}
#' @return \code{Sx2} \eqn{S'_x} the corresponding seeds of \eqn{S_x} in \eqn{G_2}
#' @return \code{candidates_for_matching} labels for the candidates for
#' matching VOIs in \eqn{G_2}
#' @return \code{G1_vertices} vertices within ell-neighborhood of S_x in \eqn{G_1}
#' (vertices used in vertex nomination for VOIs)
#' @return \code{G2_vertices} vertices within ell-neighborhood of S'_x in \eqn{G_2}
#' (including candidates x' for matching VOIs x in \eqn{G_1})
#' @return \code{P} matrix \eqn{P(i,j)} is the proportion of times that vertex \eqn{j} in
#' the induced subgraph of \eqn{G_2} was mapped to vertex \eqn{i} in the induced subgraph of \eqn{G_1}.
#' Then the \eqn{i-th} and \eqn{j-th} elements of the labels vector tells you which vertices these actually were
#' in \eqn{G_1} and \eqn{G_2}, respectively.
#'
#' @import igraph
#' @import plyr
#' @author Youngser Park <youngser@jhu.edu>, Kemeng Zhang <kzhang@jhu.edu>
#'
#' @references Patsolic, Heather G.; Park, Youngser; Lyzinski, Vince; Priebe, Carey E. (2017).
#' Vertex Nomination Via Local Neighborhood Matching
#' Online: \url{https://arxiv.org/abs/1705.00674}
#'
#' Fishkind, D. E., Adali, S., Priebe, C. E. (2012). Seeded Graph Matching
#' Online: \url{http://arxiv.org/abs/1209.0367}
#' @export
# Matches Graphs given a seeding of vertex correspondences
vnsgm <- function(x,seeds,g1,g2,h,ell,R,gamma,maxiter=20,pad=0,verbose=FALSE,plotF=FALSE) {
vn.validateInput(x,seeds,g1,g2,h,ell,R,gamma,maxiter,pad,verbose,plotF)
A <- as.matrix(get.adjacency(g1))
B <- as.matrix(get.adjacency(g2))
nv1<-nrow(A)
nv2<-nrow(B)
nv<-max(nv1,nv2)
nsx1 <- setdiff(1:nv1,c(seeds[,1],x))
vec <- c(seeds[,1],x,nsx1)
AA <- A[vec,vec]
ga <- graph_from_adjacency_matrix(AA,mode="undirected")
ns2 <- setdiff(1:nv2,seeds[,2])
vec2 <- c(seeds[,2],ns2)
BB <- B[vec2,vec2]
gb <- graph_from_adjacency_matrix(BB,mode="undirected")
s <- nrow(seeds)
voi <- (s+1):(s+length(x))
vn <- vnsgm.ordered(voi,s,ga,gb,h,ell,R,gamma,maxiter,pad,verbose,plotF=FALSE)
Sx1 = vec[vn$Sx1]
Sx2 = vec2[vn$Sx2]
Cx2 = vec2[vn$candidates_for_matching]
ind1 = vec[sort(vn$G1_vertices)]
ind2 = vec2[sort(vn$G2_vertices)]
P_matrix = vn$P
row.names(P_matrix) = ind1
colnames(P_matrix) = ind2
matched.seed = intersect(seeds[,1],ind1)
P_matrix = P_matrix[order(ind1),order(ind2)]
if (plotF == TRUE) {
gg = plotP(P_matrix,matched.seed,ind1,ind2,x)
print(gg)
}
return(list(VOI=x, seeds=seeds, Sx1=Sx1, Sx2=Sx2, candidates_for_matching=Cx2, G1_vertices=ind1, G2_vertices=ind2, P=P_matrix))
}
#' Vertex Nomination via Seeded Graph Matching
#'
#' Finds the seeds in an \eqn{h}-hop induced nbd of \eqn{G_1} around the
#' VOI, x,that is, finds induced subgraph generated by vertices that are
#' within a path of length \eqn{h} the VOI, and then finds an \eqn{ell}-hop
#' induced nbd of \eqn{G_1} around the seeds within the \eqn{h}-hop nbd of
#' x, and an \eqn{ell}-hop induced nbd of \eqn{G_2} around the corresponding
#' seeds. Then, matches these induced subgraphs via \code{multiStart}.
#' Assume first \eqn{s} vertices in two graphs are matched seeds.
#'
#' @param x vector of indices for vertices of interest (voi) in \eqn{G_1}
#' @param s the number of seeds.
#' @param g1 \eqn{G_1} in \code{igraph} object where voi is known
#' @param g2 \eqn{G_2} in \code{igraph}
#' @param h \eqn{h}-hop for distance from voi to other vertices to create
#' \eqn{h}-hop induced subgraph of \eqn{G_1}
#' @param ell \eqn{ell}-hop for distance from seeds to other vertices
#' to create \eqn{ell}-hop induced subgraph of \eqn{G_1}
#' @param R number of restarts for \code{multiStart}
#' @param gamma to be used with \code{multiStart}, max tolerance
#' for alpha, how far away from the barycenter user is willing to go for
#' the initialization of \code{sgm} on any given iteration
#' @param maxiter the number of maxiters for the Frank-Wolfe algorithm.
#' @param pad a scalar value for padding for sgm (cases where two graphs
#' have different number of vertices); defaulted to 0
#' @param verbose logical verbose outputs
#' @param plotF boolean to plot the probability matrix
#'
#' @return \code{VOI} Vertex of Interest
#' @return \code{seeds} \code{s} seeds
#' @return \code{Sx1} \eqn{S_x} the seeds within an h-path, i.e. in the
#' h-neighborhood, of VOI x in \eqn{G_1}
#' @return \code{Sx2} \eqn{S'_x} the corresponding seeds of \eqn{S_x} in \eqn{G_2}
#' @return \code{candidates_for_matching} labels for the candidates for
#' matching VOIs in \eqn{G_2}
#' @return \code{G1_vertices} vertices within ell-neighborhood of S_x in \eqn{G_1}
#' (vertices used in vertex nomination for VOIs)
#' @return \code{G2_vertices} vertices within ell-neighborhood of S'_x in \eqn{G_2}
#' (including candidates x' for matching VOIs x in \eqn{G_1})
#' @return \code{P} matrix \eqn{P(i,j)} is the proportion of times that vertex \eqn{j} in
#' the induced subgraph of \eqn{G_2} was mapped to vertex \eqn{i} in the induced subgraph of \eqn{G_1}.
#' Then the \eqn{i-th} and \eqn{j-th} elements of the labels vector tells you which vertices these actually were
#' in \eqn{G_1} and \eqn{G_2}, respectively.
#'
#' @author Youngser Park <youngser@jhu.edu>, Kemeng Zhang <kzhang@jhu.edu>
#'
#' @references Patsolic, Heather G.; Park, Youngser; Lyzinski, Vince; Priebe, Carey E. (2017).
#' Vertex Nomination Via Local Neighborhood Matching
#' Online: \url{https://arxiv.org/abs/1705.00674}
#'
#' Fishkind, D. E., Adali, S., Priebe, C. E. (2012). Seeded Graph Matching
#' Online: \url{http://arxiv.org/abs/1209.0367}
#' @export
vnsgm.ordered <- function(x,s,g1,g2,h,ell,R,gamma,maxiter=20,pad=0,verbose=FALSE,plotF=FALSE) {
vn.validateInput.ordered(x,s,g1,g2,h,ell,R,gamma,maxiter,pad,verbose,plotF)
# Note, V = {x,S,W,J}
# sanity check
# W <- intersect(V(g1),V(g2))
# J1 <- setdiff(V(g1),W); m1 <- length(J1)
# J2 <- setdiff(V(g2),W); m2 <- length(J2)
# W <- setdiff(W,x) # exclude x from W
# W <- setdiff(W,S)
# stopifnot(1+s+length(W)+m1 == vcount(g1))
# stopifnot(1+s+length(W)+m2 == vcount(g2))
# end of sanity check
S = 1:s
if (length(x) == 1) {
min.hop = min(distances(g1,x)[,S])
} else {
min.hop = min(apply(distances(g1,x)[,S],1,min))
}
possible = TRUE
if (!is.finite(min.hop)) {
possible = FALSE
warning("The seeds is not within reach from vertex of interest. vnsgm failed to nominate matches for x.")
}
if (h < min.hop) {
h = min.hop
warning("The seeds is not within h neighborhood from voi. Please choose a value of h no smaller than ", min.hop)
}
if (possible) {
# make h-hop nbhd for x in A
Nh <- unlist(ego(g1,h,nodes=x,mindist=1)) # mindist=0: close, 1: open
Sx1 <- Sx2 <- NULL
Sx1 <- sort(intersect(Nh,S)); sx1 <- length(Sx1)
Sx2 <- sort(intersect(V(g2),Sx1)); sx2 <- length(Sx2)
# Find induced subgraph which is ell-neighbors of Sx1 & Sx2
Gx1 <- sort(unique(unlist(ego(g1,ell,nodes=Sx1,mindist=0)))) # mindist=0: close, 1: open
Gx2 <- sort(unique(unlist(ego(g2,ell,nodes=Sx2,mindist=0)))) # mindist=0: close, 1: open
# Candidates for matching VOI in the second graph
Cx2 <- setdiff(Gx2,S)
#Vertices in both graphs for nominating VOIs
ind1 <- c(Sx1,x,setdiff(Gx1,c(Sx1,x)))
ind2 <- c(Sx2,setdiff(Gx2,Sx2))
if (verbose) {
cat("seed = ", Sx1, ", matching ", ind1, " and ", ind2, "\n")
}
A <- as.matrix(g1[][ind1,ind1])
B <- as.matrix(g2[][ind2,ind2])
P <- multiStart(A,B,R,length(Sx1),gamma,pad=pad,iter=maxiter)
matched.seed = intersect(S,ind1)
num.matched.seed = length(matched.seed)
m = length(ind1)
n = length(ind2)
P_trunc = P[1:m,1:n]
P.df = as.data.frame(P_trunc)
row.names(P.df) = ind1
colnames(P.df) = ind2
P_matrix = P.df[order(ind1),order(ind2)]
P_matrix[1:num.matched.seed,] = 0
P_matrix[,1:num.matched.seed] = 0
P_matrix[1:num.matched.seed,1:num.matched.seed] = diag(num.matched.seed)
if (plotF) {
gg = plotP(P_matrix,matched.seed,ind1,ind2,x)
print(gg)
}
} else {
ind1 <- ind2 <- P <- Cx2 <- NULL
}
return(list(VOI=x, seeds=S, Sx1=Sx1, Sx2=Sx2, candidates_for_matching=Cx2, G1_vertices=ind1, G2_vertices=ind2, P=P_matrix))
}
plotP <- function(P,Seed,G1_vertices,G2_vertices,x)
{
P$vertex = sort(G1_vertices)
P$vertex = as.factor(sort(G1_vertices))
x.f = as.factor(x)
P.m <- suppressMessages(melt(P))
P.s <- plyr::ddply(P.m, .(variable), transform,
rescale = scale(value))
P.s$Category <- P.s$vertex
seed.set = as.factor(Seed)
set1 = setdiff(G1_vertices,x.f)
set2 = setdiff(set1,seed.set)
set3 = setdiff(set1,set2)
set4 = setdiff(G1_vertices,set1)
levels(P.s$Category) <- list("VOI" = set4,
"Seed" = set3,
"Other" = set2)
P.s$rescale = (P.s$rescale-min(P.s$rescale))/(max(P.s$rescale)-min(P.s$rescale))
gg = ggplot(P.s, aes(variable, vertex)) +
geom_tile(aes(alpha = rescale,fill = Category), colour = "white") +
scale_alpha(range=c(0,1)) +
xlab("Vertices of Graph 2") + ylab("Vertices of Graph 1") +
labs(title = "Proportion of times Vertices in Graph 1 Matched to Vertices in Graph 2",
alpha = "Proportion") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme_classic(base_size = 9) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0))
return(gg)
}
# Unordered function validation.
vn.validateInput <- function(x,seeds,g1,g2,h,ell,R,gamma,maxiter,pad,verbose,plotF) {
# VOIs x
if (!is.vector(x)) { stop("Error: Input 'x' must be a numeric vector.") }
if (class(x) != "numeric") { stop("Error: Input 'x' must be a numeric vector.") }
# Graphs
if (class(g1) == "dgCMatrix") { g1 = igraph::graph_from_adjacency_matrix(g1) }
if (class(g1) == "matrix") { g1 = igraph::graph_from_adjacency_matrix(g1) }
if (class(g1) != 'igraph') { stop("Input object 'g1' is not an igraph object.") }
if (class(g2) == "dgCMatrix") { g2 = igraph::graph_from_adjacency_matrix(g2) }
if (class(g2) == "matrix") { g2 = igraph::graph_from_adjacency_matrix(g2) }
if (class(g2) != 'igraph') { stop("Input object 'g2' is not an igraph object.") }
# Seeds
if (class(seeds) != "matrix" || ncol(seeds) != 2 || !all(seeds%%1 == 0)) {
stop("Error: Input 'seeds' must be an m by 2 matrix of corresponding seed indices.")
}
if (nrow(seeds) > vcount(g1)) { stop("Error: Number of seeds is greater than number of vertices in g1.") }
if (nrow(seeds) > vcount(g2)) { stop("Error: Number of seeds is greater than number of vertices in g2.") }
# VOIs x and Seeds non-overlap
if (sum(is.element(x, seeds[,1])) != 0) { stop("Error: Input 'x' contains elements from 'seeds'.")}
# h
if (!is.numeric(h)) { stop("Error: Input 'h' must be a positive number.")}
if (length(h) != 1) { stop("Error: Input 'h' must be a positive number.")}
if (h <= 0) { stop("Error: Input 'h' must be a positive number.")}
# ell
if (!is.numeric(ell)) { stop("Error: Input 'ell' must be a positive number.")}
if (length(ell) != 1) { stop("Error: Input 'ell' must be a positive number.")}
if (ell <= 0) { stop("Error: Input 'ell' must be a positive number.")}
# R
if (!is.numeric(R)) { stop("Error: Input 'R' must be a number.")}
if (length(R) != 1) { stop("Error: Input 'R' must be a number.")}
if (R <= 0) { stop("Error: Input 'R' must be a positive number.")}
# gamma
if (!is.numeric(gamma)) { stop("Error: Input 'gamma' must be a number.")}
if (length(gamma) != 1) { stop("Error: Input 'gamma' must be a number.")}
if (gamma < 0 || gamma > 1) { stop("Error: Input 'gamma' must be between 0 and 1.")}
# maxiter
if (!is.numeric(maxiter)) { stop("Error: Input 'maxiter' must be a positive number.")}
if (length(maxiter) != 1) { stop("Error: Input 'maxiter' must be a positive number.")}
if (maxiter <= 0) { stop("Error: Input 'maxiter' must be a positive number.")}
# Padding
if (!is.numeric(pad)) { stop("Error: Input 'pad' must be a number.")}
# Verbosity
if (!is.logical(verbose)) { stop("Error: Input 'verbose' must be a logical.")}
# plotF
if (!is.logical(plotF)) { stop("Error: Input 'plotF' must be a logical.")}
}
# Ordered function validation.
vn.validateInput.ordered <- function(x,s,g1,g2,h,ell,R,gamma,maxiter,pad,verbose,plotF) {
# VOIs x
if (!is.vector(x)) { stop("Error: Input 'x' must be a numeric vector.") }
if (!(class(x) == "numeric" || class(x) == "integer")) { stop("Error: Input 'x' must be a numeric vector.") }
# s
if (!is.numeric(s)) { stop("Error: Input 's' must be a positive number.")}
if (length(s) != 1) { stop("Error: Input 's' must be a positive number.")}
if (s <= 0) { stop("Error: Input 's' must be a positive number.")}
# VOIs x and Seeds non-overlap
if (sum(x < s) != 0) { stop("Error: Input 'x' contains elements from 'seeds'.")}
# Graphs
if (class(g1) == "dgCMatrix") { g1 = igraph::graph_from_adjacency_matrix(g1) }
if (class(g1) == "matrix") { g1 = igraph::graph_from_adjacency_matrix(g1) }
if (class(g1) != 'igraph') { stop("Input object 'g1' is not an igraph object.") }
if (class(g2) == "dgCMatrix") { g2 = igraph::graph_from_adjacency_matrix(g2) }
if (class(g2) == "matrix") { g2 = igraph::graph_from_adjacency_matrix(g2) }
if (class(g2) != 'igraph') { stop("Input object 'g2' is not an igraph object.") }
# h
if (!is.numeric(h)) { stop("Error: Input 'h' must be a positive number.")}
if (length(h) != 1) { stop("Error: Input 'h' must be a positive number.")}
if (h <= 0) { stop("Error: Input 'h' must be a positive number.")}
# ell
if (!is.numeric(ell)) { stop("Error: Input 'ell' must be a positive number.")}
if (length(ell) != 1) { stop("Error: Input 'ell' must be a positive number.")}
if (ell <= 0) { stop("Error: Input 'ell' must be a positive number.")}
# R
if (!is.numeric(R)) { stop("Error: Input 'R' must be a number.")}
if (length(R) != 1) { stop("Error: Input 'R' must be a number.")}
if (R <= 0) { stop("Error: Input 'R' must be a positive number.")}
# gamma
if (!is.numeric(gamma)) { stop("Error: Input 'gamma' must be a number.")}
if (length(gamma) != 1) { stop("Error: Input 'gamma' must be a number.")}
if (gamma < 0 || gamma > 1) { stop("Error: Input 'gamma' must be between 0 and 1.")}
# maxiter
if (!is.numeric(maxiter)) { stop("Error: Input 'maxiter' must be a positive number.")}
if (length(maxiter) != 1) { stop("Error: Input 'maxiter' must be a positive number.")}
if (maxiter <= 0) { stop("Error: Input 'maxiter' must be a positive number.")}
# Padding
if (!is.numeric(pad)) { stop("Error: Input 'pad' must be a number.")}
# Verbosity
if (!is.logical(verbose)) { stop("Error: Input 'verbose' must be a logical.")}
# plotF
if (!is.logical(plotF)) { stop("Error: Input 'plotF' must be a logical.")}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.