#' Function to create clusters using time series features and model based clustering
#'
#' @param data Data to be used for clustering. Has to include columns: id, date and outcome
#' @param max_clust Maximum number of clusters
#'
get_gmm_clusters <- function(data, max_clust) {
require(mclust)
require(feasts)
data_features <- data %>%
as.data.frame() %>%
drop_na() %>%
as_tsibble(key = id, index = date) %>%
features(
outcome,
feature_set(
tags = c("tile", "acf", "stl", "spectral", "lumpiness", "roll", "count")
)) %>%
select(id, trend_strength, spikiness, linearity, curvature, acf1, n_crossing_points, longest_flat_spot, var_tiled_var,
shift_level_index, shift_kl_index, spectral_entropy, var_tiled_var, var_tiled_mean) %>%
column_to_rownames(var = "id") %>%
drop_na() %>%
scale()
mc <- Mclust(data_features, G = max_clust)
message("Optimal number of clusters found: ", mc$G)
gmm_cluster_tbl <- mc$classification %>%
as.data.frame() %>%
rownames_to_column("id") %>%
set_names("id", "cluster") %>%
as_tibble()
gmm_cluster_prob_tbl <- mc$z %>%
as.data.frame() %>%
rownames_to_column("id") %>%
as_tibble() %>%
pivot_longer(-id) %>%
group_by(id) %>%
filter(value == max(value)) %>%
ungroup() %>%
select(-name)
return_list <- list()
return_list$cluster <- gmm_cluster_tbl
return_list$prob <- gmm_cluster_prob_tbl
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.