knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(vimix) ## Load a dataset containing 200 2-dimensional data points data <- c(rnorm(1000,-3), rnorm(1000,3)) ## Use variational inference for mixture of Gaussians to find clusters output <- vimix(data, 10)
## Plot cluster labels library(ggplot2) library(broom) library(gridExtra) ## Convert data matrix and cluster labels to data.frame data <- tidy(data) data$label <- as.factor(output$label) ## Plot clusters ggplot(data, aes(x = x, col = label)) + geom_histogram()
## Check that the lower bound is monotonically increasing lb <- tidy(output$L[-1]) lb$ELBO <- lb$x lb$x <- NULL lb$iter <- c(1:length(output$L[-1])) lb$number_clusters <- output$Cl[-1] ## Plot clusters plot_lb <- ggplot(lb, aes(x=iter,y=ELBO)) + geom_line(linetype = "dashed") + geom_point() ## Plot number of non-empty clusters plot_nc <- ggplot(lb, aes(x=iter,y=number_clusters)) + geom_line(linetype = "dashed") + geom_point() grid.arrange(plot_lb, plot_nc, ncol = 2)
data <- c(rnorm(100,-3), rnorm(100,3)) maxK <- 10 n_random_starts <- 30 ELBO <- matrix(0, maxK-1, n_random_starts) for(k in 2:maxK){ for(j in 1:n_random_starts){ output <- vimix(data, K = k) ELBO[k-1,j] <- output$L[length(output$L)] } } library(reshape) ELBO <- melt(t(ELBO)) names(ELBO) <- c('start_n', 'K', 'ELBO') ELBO$K <- ELBO$K + 1 ggplot(ELBO, aes(x = K, y = ELBO)) + geom_point() + geom_jitter()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.