#'
#' Given a decreasingly sorted vector, return the given number of elbows
#'
#' @param dat a input vector (e.g. a vector of standard deviations), or a input feature matrix
#' @param n the number of returned elbows
#' @param threshold either FALSE or a number. If threshold is a number, then all the elements in d that are not larger than the threshold will be ignored.
#' @param plot logical. When T, it depicts a scree plot with highlighted elbows
#' @param main a string of the plot title
#'
#' @return a vector of length \eqn{n}
#'
#' @references Zhu, Mu and Ghodsi, Ali (2006), Automatic dimensionality selection from
#' the scree plot via the use of profile likelihood, Computational
#' Statistics & Data Analysis, Volume 51 Issue 2, pp 918-930, November, 2006.
#'
#' @author Youngser Park <youngser@jhu.edu>
#' @export
#' @importFrom graphics plot points
#' @importFrom stats sd dnorm
#'
getElbows <- function(dat, n = 3, threshold = FALSE, plot = TRUE, main="", ...) {
## Given a decreasingly sorted vector, return the given number of elbows
##
## Args:
## dat: a input vector (e.g. a vector of standard deviations), or a input feature matrix.
## n: the number of returned elbows.
## threshold: either FALSE or a number. If threshold is a number, then all
## the elements in d that are not larger than the threshold will be ignored.
## plot: logical. When T, it depicts a scree plot with highlighted elbows.
##
## Return:
## q: a vector of length n.
##
## Reference:
## Zhu, Mu and Ghodsi, Ali (2006), "Automatic dimensionality selection from
## the scree plot via the use of profile likelihood", Computational
## Statistics & Data Analysis, Volume 51 Issue 2, pp 918-930, November, 2006.
# if (is.unsorted(-d))
if (is.matrix(dat)) {
d <- sort(apply(dat,2,sd), decreasing=TRUE)
} else {
d <- sort(dat,decreasing=TRUE)
}
if (!is.logical(threshold))
d <- d[d > threshold]
p <- length(d)
if (p == 0)
stop(paste("d must have elements that are larger than the threshold ",
threshold), "!", sep="")
lq <- rep(0.0, p) # log likelihood, function of q
for (q in 1:p) {
mu1 <- mean(d[1:q])
mu2 <- mean(d[-(1:q)]) # = NaN when q = p
sigma2 <- (sum((d[1:q] - mu1)^2) + sum((d[-(1:q)] - mu2)^2)) /
(p - 1 - (q < p))
lq[q] <- sum( dnorm( d[1:q ], mu1, sqrt(sigma2), log=TRUE) ) +
sum( dnorm(d[-(1:q)], mu2, sqrt(sigma2), log=TRUE) )
}
q <- which.max(lq)
if (n > 1 && q < (p-1)) {
q <- c(q, q + getElbows(d[(q+1):p], n-1, plot=FALSE))
}
if (plot==TRUE) {
if (is.matrix(dat)) {
sdv <- d # apply(dat,2,sd)
plot(sdv,type="b",xlab="dim",ylab="stdev",main=main,...)
points(q,sdv[q],col=2,pch=19)
} else {
plot(dat, type="b",main=main,...)
points(q,dat[q],col=2,pch=19)
}
}
return(q)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.