require(graphstats)
require(igraph)


getElbows <- function(dat, n = 3, threshold = FALSE, plot = TRUE, main="", ...) {
    ## Given a decreasingly sorted vector, return the given number of elbows
    ##
    ## Args:
    ##   dat: a input vector (e.g. a vector of standard deviations), or a input feature matrix.
    ##   n: the number of returned elbows.
    ##   threshold: either FALSE or a number. If threshold is a number, then all
    ##   the elements in d that are not larger than the threshold will be ignored.
    ##   plot: logical. When T, it depicts a scree plot with highlighted elbows.
    ##
    ## Return:
    ##   q: a vector of length n.
    ##
    ## Reference:
    ##   Zhu, Mu and Ghodsi, Ali (2006), "Automatic dimensionality selection from
    ##   the scree plot via the use of profile likelihood", Computational
    ##   Statistics & Data Analysis, Volume 51 Issue 2, pp 918-930, November, 2006.

    #  if (is.unsorted(-d))

    if (is.matrix(dat)) {
        d <- sort(apply(dat,2,sd), decreasing=TRUE)
    } else {
        d <- sort(dat,decreasing=TRUE)
    }

    if (!is.logical(threshold))
        d <- d[d > threshold]

    p <- length(d)
    if (p == 0)
        stop(paste("d must have elements that are larger than the threshold ",
                   threshold), "!", sep="")

    lq <- rep(0.0, p)                     # log likelihood, function of q
    for (q in 1:p) {
        mu1 <- mean(d[1:q])
        mu2 <- mean(d[-(1:q)])              # = NaN when q = p
        sigma2 <- (sum((d[1:q] - mu1)^2) + sum((d[-(1:q)] - mu2)^2)) /
            (p - 1 - (q < p))
        lq[q] <- sum( dnorm(  d[1:q ], mu1, sqrt(sigma2), log=TRUE) ) +
            sum( dnorm(d[-(1:q)], mu2, sqrt(sigma2), log=TRUE) )
    }

    q <- which.max(lq)
    if (n > 1 && q < (p-1)) {
        q <- c(q, q + getElbows(d[(q+1):p], n-1, plot=FALSE))
    }

    if (plot==TRUE) {
        if (is.matrix(dat)) {
            sdv <- d # apply(dat,2,sd)
            plot(sdv,type="b",xlab="dim",ylab="stdev",main=main,...)
            points(q,sdv[q],col=2,pch=19)
        } else {
            plot(dat, type="b",main=main,...)
            points(q,dat[q],col=2,pch=19)
        }
    }

    return(q)
}

Simulated ER Data

num_class1 <- 90
num_class2 <- 50
assignments <- c(rep(1, num_class1), rep(2, num_class2))
p <- 0.5
B_er  <- matrix(c(p), nrow = 1)

g_er <- sample_sbm(num_class1 + num_class2, pref.matrix=B_er, block.sizes=c(num_class1 + num_class2))

gs.plot.heatmap(g_er, title="ER", src.label="Vertex", tgt.label="Vertex")

## Embed both with ASE; get singular values from adjacency matrix;
## select dimenstion with dimselect.
A_er <- igraph::as_adj(g_er)
sigma_er <- svd(A_er)$d[1:10]
getElbows(sigma_er, plot=TRUE)

Simulated 2-block Data, Small k

num_class1 <- 90
num_class2 <- 50
assignments <- c(rep(1, num_class1), rep(2, num_class2))
B_sbm <- matrix(c(0.8, 0.6,
                  0.6, 0.8), nrow = 2)

g_sbm <- sample_sbm(num_class1 + num_class2, pref.matrix=B_sbm, block.sizes=c(num_class1, num_class2))

gs.plot.heatmap(g_sbm, title="2-Block SBM", src.label="Vertex", tgt.label="Vertex")

## Embed both with ASE; get singular values from adjacency matrix;
## select dimenstion with dimselect.
A_sbm <- igraph::as_adj(g_sbm)
sigma_sbm <- svd(A_sbm)$d[1:10]
getElbows(sigma_sbm, plot=TRUE)

Simulated 2-block Data, Big k

num_class1 <- 90
num_class2 <- 50
assignments <- c(rep(1, num_class1), rep(2, num_class2))
B_sbm <- matrix(c(0.8, 0.4,
                  0.4, 0.8), nrow = 2)

g_sbm <- sample_sbm(num_class1 + num_class2, pref.matrix=B_sbm, block.sizes=c(num_class1, num_class2))

gs.plot.heatmap(g_sbm)

## Embed both with ASE; get singular values from adjacency matrix;
## select dimenstion with dimselect.
A_sbm <- igraph::as_adj(g_sbm)
sigma_sbm <- svd(A_sbm)$d[1:gorder(g_sbm)]
getElbows(sigma_sbm, plot=TRUE)

Binarized Real Data

Fly Left Hemisphere, Small k

gs.plot.heatmap(fly.left, title="Fly Left Hemisphere", src.label="Source Vertex", tgt.label="Target Vertex")
A_fly <- igraph::as_adj(fly.left)
sigma_fly <- rARPACK::svds(A_fly, k=10)$d
getElbows(sigma_fly, plot=TRUE)

Fly Left Hemisphere, Big k

A_fly <- igraph::as_adj(fly.left)
sigma_fly <- rARPACK::svds(A_fly, k=gorder(fly.left)-1)$d
getElbows(sigma_fly, plot=TRUE)

Problem: when we check over a large number of dimensions, the function does not work properly, despite the dimensions to choose being visually fairly obvious.



neurodata/graphstats documentation built on May 14, 2019, 5:19 p.m.