knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 5, fig.height = 4 )
tagmEM is a function which employs generalised t-augmented Gaussian mixture modelling to identify sets of observations which cluster (i.e. have common mean and variance components) using an expectation maximisation algorithm. Currently the software can accommodate univariate data only. In a future release this will be upgraded to allow for multi-variate data to be analysed. Below we give a quick guide to installation and an example of how to use the function - including a comparison with the very popular Gaussian mixture modelling package 'Mclust'.
options(warn = -1)
install.packages("devtools", repos='http://cran.us.r-project.org') library(devtools) install_github("cnfoley/tagmEM", build_vignettes = TRUE)
library(tagmEM); library(mclust);
set.seed(314); cluster_sizes = c(rep(1,10),25,35,30); N = sum(cluster_sizes); K = length(cluster_sizes); centres = c(runif(10,-1,1), runif(K-10,-10,10)); std_devs = c(rep(5,10),sqrt(runif(K-1,0.1,1))); data = NULL; truth = NULL; count = 1; for(i in cluster_sizes){ data = c(data, rnorm(i, mean = centres[count], sd = std_devs[count])); truth = c(truth, rep(count,i)); count = count + 1; } # cluster centres table(cluster_size = cluster_sizes, cluster_centre = round(centres,3));
rng = range(data); sig = sd(data) + sqrt(rng[2]-rng[1]); res_tagm = tagmEM(data, junk_mixture = TRUE, df = 4, junk_mean = mean(data), junk_sd = sig); # # Summarise number of observations in each cluster table(res_tagm$results$best$cluster_class); # # Summarise cluster centres: table(res_tagm$results$best$cluster_mean); # # Assess quality of identified clusters against ground truth using adjusted Rand index ## All obs adjustedRandIndex(x = res_tagm$results$best$cluster, y = truth); ## Remove junk jnk_obs = which(res_tagm$results$best$cluster_class=="Junk"); adjustedRandIndex(x = res_tagm$results$best$cluster[-jnk_obs], y = truth[-jnk_obs]);
res_mclust = mclust::Mclust(data); # # Summarise number of observations in each cluster table(res_mclust$classification); # # Summarise cluster centres: mn = round(res_mclust$parameters$mean,3); table(mn[res_mclust$classification]); # Assess quality of identified clusters against ground truth using adjusted Rand index adjustedRandIndex(x = res_mclust$classification, y = truth);
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.