source("~/Dropbox/Worm/Codes/Connectome/mbstructure/R/structure-utils.R") #source("http://www.cis.jhu.edu/~parky/Semipar_vs_Nonpar/utils.r") source("~/Dropbox/RFiles/ccc_utils.R") suppressMessages(library(knitr)) #opts_knit$set(animation.fun = hook_scianimator) #suppressMessages(library(tourr)) #suppressMessages(library(animint)) suppressMessages(library(RColorBrewer))
GM250_seed is an instance of the Graph Matching problem.
In this problem two graphs are given. G1
and G2
.
A partial map between nodes of G1
and G2
are provided in the train data.
The task is to predict the mapping between the unmapped nodes in the test data.
Data for GM250_seed consists of two graphs in raw_data dir:
G1.gml
: attributed undirected graph; ~~250~~ 755 nodes; 5138 edges; ~~12~~ 4 features for each nodeG2.gml
: attributed undirected graph; ~~124~~ 755 nodes; ~~324~~ 5138 edges; ~~12~~ 4 features for each nodetrainData.csv
contains G1
nodes and trainTargets.csv
contains G2
nodes. Together, they constitute 151 known mappings.testData.csv
contains G2
nodes, for which the mappings have to be predicted in testTargets.csv
~~suppressMessages(library(tidyverse)) suppressMessages(library(igraph)) suppressMessages(library(Matrix)) suppressMessages(library(VN)) dtifile <- "~/Dropbox/D3M/D3M/connectome_stats/dwimean_3216.edgelist" mrifile <- "~/Dropbox/D3M/D3M/connectome_stats/fmrimean_2039.edgelist" dtidat <- read.table(dtifile) mridat <- read.table(mrifile) gdti <- graph_from_edgelist(as.matrix(dtidat)[,-3], directed=FALSE); E(gdti)$weight <- dtidat[,3]; summary(gdti); is.connected(gdti) gmri <- graph_from_edgelist(as.matrix(mridat)[,-3], directed=FALSE); E(gmri)$weight <- mridat[,3]; summary(gmri); is.connected(gmri)
out1 <- gmmase(gdti, dmax = 20, embed = "ASE", Kmax = 10, clustering = "GMM", verbose=FALSE) #out1 <- gmmase(g, dmax = 100, embed = "LSE", Kmax = 10, clustering = "GMM", verbose=FALSE) #out1 <- gmmase(g, dmax = 100, embed = "ASE", Kmax = 10, clustering = "Kmeans", verbose=FALSE) Xhat1 <- out1$mc$data Yhat <- out1$Y df2 <- data.frame(Xhat=Xhat1, cluster=as.factor(Yhat)) ggpairs(df2, columns=1:(ncol(df2)-2), mapping=aes(color=cluster, alpha=0.5))
# rearrange the graphs so that seeds are the first m vertices matched.id1 <- match(train$G1.nodeID, V(g1)$nodeID) # 151 perm.g1 <- invPerm(c(matched.id1, (1:vcount(g1))[-matched.id1])) matched.id2 <- unique(match(train$G2.nodeID, V(g2)$nodeID)) # 145 perm.g2 <- invPerm(c(matched.id2, (1:vcount(g2))[-matched.id2])) g1.new <- permute.vertices(g1, perm.g1) g2.new <- permute.vertices(g2, perm.g2) g2.sub <- induced.subgraph(g2.new, 1:nrow(train)); summary(g2.sub)
So, m
= r nrow(train)
correspondence are given.
We will use the first $s = {0,30,60,90,120, 150}$ vertices as seeds and repeat the process 100 times to see the matching performance.
On Sep 9, 2017, at 12:18 PM, Vince Lyzinski vincelyzinski@gmail.com wrote:
hard seeding enforces the seeds throughout the problem (they can't change), while soft seeding just initializes at the seeds, but allows them to change in the course of the optimization
A1 <- as.matrix(g1.new[]) A2 <- as.matrix(g2.new[]) n <- nrow(A1) m <- nrow(A2) gamma <- 1 s <- 10 # numseed M <- rsp(n-s,gamma) S <- diag(n); S[(s+1):n,(s+1):n] <- M system.time(out <- sgm(A2, A1, 0, start=S, pad=0, iteration=30)) newA2 <- out$P %*% A1 %*% t(out$P) (f <- norm(A2[1:m,1:m]-newA2[1:m,1:m], "F")) (matchV <- sum(out$corr[1:m] == 1:m)) #The accuracy for the first `m` = `r m` vertices is `r matchV`, which is `r matchV/m`%.
set.seed(12345) A1 <- as.matrix(g1.new[]) A2 <- as.matrix(g2.new[]) n <- nrow(A1) m <- nrow(A2) gamma <- 1 nmc <- 100 niter <- 30 svec <- seq(0, 150, by=30)
method <- "hard" for (s in svec) { cat("Working on s = ", s, "\n") mc <- foreach (i=1:nmc) %dopar% { ## S is a starting point for softseeding if (method=="soft") { M <- rsp(n-s,gamma) S <- diag(n); S[(s+1):n,(s+1):n] <- M out <- sgm(A2,A1,0,start=S,pad=0,iteration=niter) } else { # "hard" S <-matrix(1/(n-s), n-s, n-s) out <- sgm(A2,A1,s,start=S,pad=0,iteration=niter) if (s > 0) { out$corr <- c(1:s,out$corr) } } newA2 <- out$P %*% A1 %*% t(out$P) f <- norm(A2[1:m,1:m]-newA2[1:m,1:m], "F") matchV <- sum(out$corr[1:m] == 1:m) matchE <- sum((A2[1:m,1:m]+newA2[1:m,1:m])>=2) / 2 c(matchV, matchE, f) } save(mc,n,m,nmc,s,gamma,file=paste0("mc-r49-s",s,"-nmc",nmc,"-niter",niter,"-",method,".Rbin")) }
matchV <- matchE <- f <- NULL for (s in svec) { load(paste0("~/Dropbox/D3M/D3M/r49/mc-r49-s",s,"-nmc",nmc,"-niter",niter,".Rbin")) matchV <- c(matchV, unlist(sapply(mc,"[",1))) matchE <- c(matchE, unlist(sapply(mc,"[",2))/2) f <- c(f, unlist(sapply(mc,"[",3))) } df <- data.frame(init=rep(1:nmc,times=length(svec)), f=f, matchV=matchV, matchE=matchE, s=factor(rep(svec, each=nmc))) df3 <- df %>% group_by(s) %>% arrange(f) %>% mutate(init2=order(f)) %>% select(-init) # "hard" niter <- 30 matchV <- matchE <- f <- NULL for (s in svec) { load(paste0("~/Dropbox/D3M/D3M/r49/mc-r49-s",s,"-nmc1-niter",niter,"-hard.Rbin")); nmc=100 matchV <- c(matchV, rep(unlist(sapply(mc,"[",1)),nmc)) matchE <- c(matchE, rep(unlist(sapply(mc,"[",2)),nmc)) f <- c(f, rep(unlist(sapply(mc,"[",3)),nmc)) } df <- data.frame(init=rep(1:nmc,times=length(svec)), f=f, matchV=matchV, matchE=matchE, s=factor(rep(svec, each=nmc))) df32 <- df %>% group_by(s) %>% arrange(f) %>% mutate(init2=order(f)) %>% select(-init) df33 <- rbind(data.frame(method="soft", df3), data.frame(method="hard", df32)) df4 <- gather(df33, key="variable", value="value", -c(s,init2,method)) ggplot(df4, aes(x=init2,y=value)) + facet_grid(variable~., scales="free") + xlab("initialization") + # geom_point(aes(col=s, shape=method)) + geom_line(aes(col=s, linetype=method), size=1.2) + # scale_color_brewer(palette = "Set1") + theme(text=element_text(size=12)) + theme(strip.text=element_text(size=rel(1.1))) + theme(legend.title=element_text(size=rel(1.1),face="bold")) + labs(color="seeds") + theme(legend.text=element_text(size=rel(1.0),face="plain"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.