VN: Vertex Nomination via Seeded Graph Matching

Documented in vnsgm vnsgm.ordered

#' @rdname vnsgm
#' Finds the seeds in an \eqn{h}-hop induced nbd of \eqn{G_1} around the
#' VOI, x,
#'                      that is, finds induced subgraph generated by
#'                      vertices that are within a path of length \eqn{h}
#'                      the VOI,
#' and then finds an \eqn{ell}-hop induced nbd of \eqn{G_1} around the
#' seeds within the \eqn{h}-hop nbd of x, and an \eqn{ell}-hop induced
#' nbd of \eqn{G_2} around the corresponding seeds.
#' Then, matches these induced subgraphs via \code{multiStart}.
#'
#'
#' @aliases localnbd
#' @param x vector of indices for vertices of interest (voi) in \eqn{G_1}
#' @param seeds vector of a set of seeds
#' @param g1 \eqn{G_1} in \code{igraph} object where voi is known
#' @param g2 \eqn{G_2} in \code{igraph}
#' @param h \eqn{h}-hop for distance from voi to other vertices
#'          to create \eqn{h}-hop induced subgraph of \eqn{G_1}
#' @param ell \eqn{ell}-hop for distance from seeds to other vertices
#'          to create \eqn{ell}-hop induced subgraph of \eqn{G_1}
#' @param R number of restarts for \code{multiStart}
#' @param g gamma to be used with \code{multiStart}, max tolerance for alpha, how far away from the barycenter user is willing to go for
#' the initialization of \code{sgm} on any given iteration
#' @param pad a scalar value for padding for sgm
#' @param sim boolean: default FALSE (no, this is not a simulation) --
#' if TRUE, assumes x = x' (i.e. indices for VOI in \eqn{G_1} are the same as
#' the indices for the corresponding matches in \eqn{G_2})
#' @param verb verbose outputs
#' @param plotF boolean to plot the probability matrix
#'
#' @return \code{seed} \code{s} seeds
#' @return \code{cand} labels for the candidates in \eqn{G_2}
#' @return \code{ind1} labels for the vertices in \eqn{G_1}
#' @return \code{ind2} labels for the vertices in \eqn{G_2}
#' @return \code{P} matrix \eqn{P(i,j)} is the proportion of times that vertex \eqn{j} in
#' the induced subgraph of \eqn{G_2} was mapped to vertex \eqn{i} in the induced subgraph of \eqn{G_1}.
#' Then the \eqn{i-th} and \eqn{j-th} elements of the labels vector tells you which vertices these actually were
#' in \eqn{G_1} and \eqn{G_2}, respectively.
#'
#' @author Youngser Park <youngser@jhu.edu>
#' @export


# Matches Graphs given a seeding of vertex correspondences
vnsgm <- function(x,seeds,g1,g2,h,ell,R,g,pad=0,sim=FALSE,verb=FALSE,plotF=FALSE) {
    A <- as.matrix(get.adjacency(g1))
    B <- as.matrix(get.adjacency(g2))
    nv1<-nrow(A)
    nv2<-nrow(B)
    nv<-max(nv1,nv2)

    nsx1 <- setdiff(1:nv1,c(seeds[,1],x))
    vec <- c(seeds[,1],x,nsx1)

    AA <- A[vec,vec]
    ga <- graph_from_adjacency_matrix(AA,mode="undirected")

    ns2 <- setdiff(1:nv2,seeds[,2])
    vec2 <- c(seeds[,2],ns2)

    BB <- B[vec2,vec2]
    gb <- graph_from_adjacency_matrix(BB,mode="undirected")

    S <- 1:nrow(seeds)
    voi <- (nrow(seeds)+1):(nrow(seeds)+length(x))

    P <- vnsgm.ordered(voi,S,ga,gb,h,ell,R,g,pad,sim,verb,plotF)
    P1 <- P
    P1$x   <- x
    P1$S   <- seeds
    P1$Sx  <- seeds[P$Sx,1]
    P1$Sxp <- seeds[P$Sxp,2]
    P1$Cxp <- vec2[P$Cxp]
    lb2 <- vec[P$labelsGx]
    names(lb2) <- NULL
    P1$labelsGx  <- lb2
    P1$labelsGxp <- vec2[P$labelsGxp]
    return(P1)
}

#' @rdname vnsgm
#' Nominates vertices in second graph to match to VOI in first graph --
#' first reducing the problem, structurally, using provided seeds

#' Finds the seeds in an \eqn{h}-hop induced nbd of \eqn{G_1} around the
#' VOI, x,
#'                      that is, finds induced subgraph generated by
#'                      vertices that are within a path of length \eqn{h}
#'                      the VOI,
#' and then finds an \eqn{ell}-hop induced nbd of \eqn{G_1} around the
#' seeds within the \eqn{h}-hop nbd of x, and an \eqn{ell}-hop induced
#' nbd of \eqn{G_2} around the corresponding seeds.
#' Then, matches these induced subgraphs via \code{multiStart}.
#'
#'
#' @param x vector of indices for vertices of interest (voi) in \eqn{G_1}
#' @param S vector of a set of seeds
#' @param g1 \eqn{G_1} in \code{igraph} object where voi is known
#' @param g2 \eqn{G_2} in \code{igraph}
#' @param h \eqn{h}-hop for distance from voi to other vertices
#'          to create \eqn{h}-hop induced subgraph of \eqn{G_1}
#' @param ell \eqn{ell}-hop for distance from seeds to other vertices
#'          to create \eqn{ell}-hop induced subgraph of \eqn{G_1}
#' @param R number of restarts for \code{multiStart}
#' @param g gamma to be used with \code{multiStart}, max tolerance for alpha, how far away from the barycenter user is willing to go for
#' the initialization of \code{sgm} on any given iteration
#' @param pad a scalar value for padding for sgm
#' @param sim boolean: default FALSE (no, this is not a simulation) --
#' if TRUE, assumes x = x' (i.e. indices for VOI in \eqn{G_1} are the same as
#' the indices for the corresponding matches in \eqn{G_2})
#' @param verb verbose outputs
#' @param plotF boolean to plot the probability matrix
#'
#' @author Youngser Park <youngser@jhu.edu>
#' @export
#'
#'
vnsgm.ordered <- function(x,S,g1,g2,h,ell,R,g,pad=0,sim=TRUE,verb=FALSE,plotF=FALSE) {

### note: may need to fix later: assumes dimension of A is larger ###
### also assumes that B aligns with first nrow(B) vertices of A ###
#g1 <- graph_from_adjacency_matrix(A,mode="undirected")
#g2 <- graph_from_adjacency_matrix(B,mode="undirected")

# Note, V = {x,S,W,J}
# sanity check
#    W <- intersect(V(g1),V(g2))
#    J1 <- setdiff(V(g1),W); m1 <- length(J1)
#    J2 <- setdiff(V(g2),W); m2 <- length(J2)
#    W <- setdiff(W,x) # exclude x from W
#    W <- setdiff(W,S)
    s <- length(S)
#    stopifnot(1+s+length(W)+m1 == vcount(g1))
#    stopifnot(1+s+length(W)+m2 == vcount(g2))
    # end of sanity check

    # make h-hop nbhd for x in A
    Nh <- unlist(ego(g1,h,nodes=x,mindist=1)) # mindist=0: close, 1: open

    # Find S_x = all seeds in Nh, == Sx2
    Sx1 <- Sx2 <- NULL
    Sx1 <- sort(intersect(Nh,S)); sx <- length(Sx1)
    Sx2 <- sort(intersect(V(g2),Sx1)); sx2 <- length(Sx2)
#    stopifnot(identical(Sx1,Sx2))
    case <- ifelse((sx2>0), "possible", "impossible1")

    if (case == "possible") {
        # Find Candidates
        Cx2 <- sort(unique(unlist(ego(g2,ell,nodes=Sx2,mindist=1)))) # mindist=0: close, 1: open
        # make sure seeds aren't included (open: mindist=1)
        Cx2 <- setdiff(Cx2,Sx2)

        if(sim){
            case <- ifelse((x %in% Cx2), "possible", "impossible2")
        }

        # Find induced subgraph from Sx1 & Sx2
        if (any(case=="possible")) {
            Nx1 <- sort(unique(unlist(ego(g1,ell,nodes=Sx1,mindist=0)))) # mindist=0: close, 1: open
            Nx2 <- sort(unique(unlist(ego(g2,ell,nodes=Sx2,mindist=0)))) # mindist=0: close, 1: open
    #subg1 <- induced_subgraph(g1,Nx1); #summary(subg1)
    #subg2 <- induced_subgraph(g2,Nx2); #summary(subg2)
#        stopifnot(x %in% Nx1)

            if(sim){
                wxp <- which(case=="possible")
                xp <- x[wxp]
                (ind1 <- c(Sx1,x,setdiff(Nx1,c(Sx1,xp)))) # is this x or xp??
                (ind2 <- c(Sx2,xp,setdiff(Nx2,c(Sx2,xp))))
            }else{
                (ind1 <- c(Sx1,x,setdiff(Nx1,c(Sx1,x))))
                (ind2 <- c(Sx2,setdiff(Nx2,Sx2)))
            }

            if (verb) {
                cat("seed = ", Sx1, ", matching ", ind1, " and ", ind2, "\n")
            }

            iter <- 20 # The number of iterations for the Frank-Wolfe algorithm
            A <- as.matrix(g1[][ind1,ind1])
            B <- as.matrix(g2[][ind2,ind2])
            P <- multiStart(A,B,R,length(Sx1),g,pad=pad,iter)
            #seeds are assumed to be vertices 1:s in both graphs
            if (plotF) {
                plotP(P,ind2,ind1,Sx1)
            }
        } else {
            ind1 <- ind2 <- P <- NULL
        }
    } else {
        ind1 <- ind2 <- P <- Cx2 <- NULL
    }

    return(list(case=case, x=x, S=S, Sx=Sx1, Sxp=Sx2, Cxp=Cx2, labelsGx=ind1, labelsGxp=ind2, P=P))
}

#function generator
#defunct <- function(msg = "This function is depreciated") function(...) return(warning(msg))

#' @export
#localnbd <- defunct("localnbd changed name to vnsgm")

localnbd <- vnsgm

localnbd2 <- function(x,S,g1,g2,h,R,g,verb=FALSE){

    s <- length(S)
    s1 <- S[1]

    # make h-hop nbhd for x in A
    Nh1 <- unlist(ego(g1,h,nodes=s1,mindist=1)) # mindist=0: close, 1: open
    Nh2 <- unlist(ego(g2,h,nodes=s1,mindist=1)) # mindist=0: close, 1: open
    case <- ifelse((x %in% Nh2 & s>0) , "possible", "impossible")

    # Find S_x = all seeds in Nh, == Sx2
    Sx1 <- S; sx <- length(Sx1)
    Sx2 <- S; sx2 <- length(Sx2)

    if (case=="possible") {
        # Find Candidates
        Cx1 <- setdiff(Nh1,c(Sx1,x))
        Cx2 <- setdiff(Nh2,c(Sx2,x))

        (ind1 <- c(Sx1,x,Cx1))
        (ind2 <- c(Sx2,x,Cx2))

        if (verb) {
            cat("seed = ", Sx1, ", matching ", ind1, " and ", ind2, "\n")
        }

        iter <- 20 # The number of iterations for the Frank-Wolfe algorithm
        A <- as.matrix(g1[][ind1,ind1])
        B <- as.matrix(g2[][ind2,ind2])
        matchnbdD <- multiStart(A,B,R,length(Sx1),g,iter)
        #seeds are assumed to be vertices 1:s in both graphs
    } else {
        Cx2 <- ind1 <- ind2 <- matchnbdD <- NULL
    }

    return(list(case=case, S=S, Sx=Sx1, Cxp=Cx2, labelsGx=ind1, labelsGxp=ind2, matchnbdD=matchnbdD))
}

#' @export
plotP <- function(P,labelsGxp,labelsGx,Sx)
{
    require(Matrix)
    require(lattice)

    p <- image(Matrix(P[,1:length(labelsGxp)]),xlab=expression(V(G*minute[x])), ylab=expression(V(G[x])),
          scales=list(
              #          tck=c(1,0),
              #          alternating=c(3),
              x=list(
                  at=1:length(labelsGxp),
                  labels=as.character(labelsGxp)
              ),
              y=list(
                  at=1:length(labelsGx),
                  labels=as.character(labelsGx)
              )
          ))
    print(p)
    trellis.focus("panel", 1, 1, highlight=FALSE)
    s <- length(Sx)
    lrect((s+0.5),(s+0.5),length(labelsGxp)+0.5,(s+1)+0.5,col="red",alpha=0.2)
    trellis.unfocus()
}