require(graphstats) require(igraph) getElbows <- function(dat, n = 3, threshold = FALSE, plot = TRUE, main="", ...) { ## Given a decreasingly sorted vector, return the given number of elbows ## ## Args: ## dat: a input vector (e.g. a vector of standard deviations), or a input feature matrix. ## n: the number of returned elbows. ## threshold: either FALSE or a number. If threshold is a number, then all ## the elements in d that are not larger than the threshold will be ignored. ## plot: logical. When T, it depicts a scree plot with highlighted elbows. ## ## Return: ## q: a vector of length n. ## ## Reference: ## Zhu, Mu and Ghodsi, Ali (2006), "Automatic dimensionality selection from ## the scree plot via the use of profile likelihood", Computational ## Statistics & Data Analysis, Volume 51 Issue 2, pp 918-930, November, 2006. # if (is.unsorted(-d)) if (is.matrix(dat)) { d <- sort(apply(dat,2,sd), decreasing=TRUE) } else { d <- sort(dat,decreasing=TRUE) } if (!is.logical(threshold)) d <- d[d > threshold] p <- length(d) if (p == 0) stop(paste("d must have elements that are larger than the threshold ", threshold), "!", sep="") lq <- rep(0.0, p) # log likelihood, function of q for (q in 1:p) { mu1 <- mean(d[1:q]) mu2 <- mean(d[-(1:q)]) # = NaN when q = p sigma2 <- (sum((d[1:q] - mu1)^2) + sum((d[-(1:q)] - mu2)^2)) / (p - 1 - (q < p)) lq[q] <- sum( dnorm( d[1:q ], mu1, sqrt(sigma2), log=TRUE) ) + sum( dnorm(d[-(1:q)], mu2, sqrt(sigma2), log=TRUE) ) } q <- which.max(lq) if (n > 1 && q < (p-1)) { q <- c(q, q + getElbows(d[(q+1):p], n-1, plot=FALSE)) } if (plot==TRUE) { if (is.matrix(dat)) { sdv <- d # apply(dat,2,sd) plot(sdv,type="b",xlab="dim",ylab="stdev",main=main,...) points(q,sdv[q],col=2,pch=19) } else { plot(dat, type="b",main=main,...) points(q,dat[q],col=2,pch=19) } } return(q) }
num_class1 <- 90 num_class2 <- 50 assignments <- c(rep(1, num_class1), rep(2, num_class2)) p <- 0.5 B_er <- matrix(c(p), nrow = 1) g_er <- sample_sbm(num_class1 + num_class2, pref.matrix=B_er, block.sizes=c(num_class1 + num_class2)) gs.plot.heatmap(g_er, title="ER", src.label="Vertex", tgt.label="Vertex") ## Embed both with ASE; get singular values from adjacency matrix; ## select dimenstion with dimselect. A_er <- igraph::as_adj(g_er) sigma_er <- svd(A_er)$d[1:10] getElbows(sigma_er, plot=TRUE)
num_class1 <- 90 num_class2 <- 50 assignments <- c(rep(1, num_class1), rep(2, num_class2)) B_sbm <- matrix(c(0.8, 0.6, 0.6, 0.8), nrow = 2) g_sbm <- sample_sbm(num_class1 + num_class2, pref.matrix=B_sbm, block.sizes=c(num_class1, num_class2)) gs.plot.heatmap(g_sbm, title="2-Block SBM", src.label="Vertex", tgt.label="Vertex") ## Embed both with ASE; get singular values from adjacency matrix; ## select dimenstion with dimselect. A_sbm <- igraph::as_adj(g_sbm) sigma_sbm <- svd(A_sbm)$d[1:10] getElbows(sigma_sbm, plot=TRUE)
num_class1 <- 90 num_class2 <- 50 assignments <- c(rep(1, num_class1), rep(2, num_class2)) B_sbm <- matrix(c(0.8, 0.4, 0.4, 0.8), nrow = 2) g_sbm <- sample_sbm(num_class1 + num_class2, pref.matrix=B_sbm, block.sizes=c(num_class1, num_class2)) gs.plot.heatmap(g_sbm) ## Embed both with ASE; get singular values from adjacency matrix; ## select dimenstion with dimselect. A_sbm <- igraph::as_adj(g_sbm) sigma_sbm <- svd(A_sbm)$d[1:gorder(g_sbm)] getElbows(sigma_sbm, plot=TRUE)
gs.plot.heatmap(fly.left, title="Fly Left Hemisphere", src.label="Source Vertex", tgt.label="Target Vertex") A_fly <- igraph::as_adj(fly.left) sigma_fly <- rARPACK::svds(A_fly, k=10)$d getElbows(sigma_fly, plot=TRUE)
A_fly <- igraph::as_adj(fly.left) sigma_fly <- rARPACK::svds(A_fly, k=gorder(fly.left)-1)$d getElbows(sigma_fly, plot=TRUE)
Problem: when we check over a large number of dimensions, the function does not work properly, despite the dimensions to choose being visually fairly obvious.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.