Nothing
#' Cluster items based on k-means across features
#'
#' Given a tidy table of features describing each item, perform k-means
#' clustering using [kmeans()] and retidy the data into
#' one-row-per-cluster.
#'
#' @param tbl Table
#' @param item Item to cluster (as a bare column name)
#' @param feature Feature column (dimension in clustering)
#' @param value Value column
#' @param k Number of clusters
#' @param fill What to fill in for missing values
#' @param ... Other arguments passed on to [kmeans()]
#'
#' @seealso [widely_hclust()]
#'
#' @importFrom rlang :=
#'
#' @examples
#'
#' library(gapminder)
#' library(dplyr)
#'
#' clusters <- gapminder %>%
#' widely_kmeans(country, year, lifeExp, k = 5)
#'
#' clusters
#'
#' clusters %>%
#' count(cluster)
#'
#' # Examine a few clusters
#' clusters %>% filter(cluster == 1)
#' clusters %>% filter(cluster == 2)
#'
#' @export
widely_kmeans <- function(tbl, item, feature, value, k, fill = 0, ...) {
item_str <- as.character(substitute(item))
feature_str <- as.character(substitute(feature))
value_str <- as.character(substitute(value))
form <- stats::as.formula(paste(item_str, "~", feature_str))
m <- tbl %>%
reshape2::acast(form, value.var = value_str, fill = fill)
clustered <- stats::kmeans(m, k, ...)
# Add the clusters to the original table
i <- match(rownames(m), as.character(tbl[[item_str]]))
tibble::tibble(!!sym(item_str) := tbl[[item_str]][i],
cluster = factor(clustered$cluster)) %>%
dplyr::arrange(cluster)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.