#' Plot clusters of an umapscan object
#'
#' @param us umapscan object
#' @param parent name of the parent cluster
#' @param noise_inherit_parent if TRUE, 'Noise' points are given their parent cluster
#' membership
#' @param alpha point transparency for clusters plot
#' @param ellipses if TRUE, plot confidence ellipses around clusters
#' @param fixed if TRUE, force coord_fixed on plot
#' @param labels if TRUE, use cluster label instead of id if available
#'
#' @seealso
#' [clust_compute()], [clust_describe()]
#' @export
#'
#' @examples
#' library(dplyr)
#' iris_num <- iris %>% select_if(is.numeric)
#' us <- new_umapscan(iris_num, n_neighbors = 25, min_dist = 0.1, seed = 1337)
#' us <- clust_compute(us, minPts = 3, eps = 0.5, graph = FALSE)
#' clust_plot(us, alpha = 1)
#' us <- clust_compute(us, minPts = 3, eps = 0.45, graph = FALSE, parent = "3")
#' clust_plot(us, alpha = 0.5, ellipses = FALSE, parent = "3")
#' clust_plot(us)
#'
#' @import ggplot2
#' @importFrom tidyr drop_na
clust_plot <- function(us, parent = "", noise_inherit_parent = FALSE, alpha = 1, ellipses = TRUE, fixed = FALSE, labels = TRUE) {
clust <- clust_membership(us, parent,
noise_inherit_parent = noise_inherit_parent, labels = labels)
if (all(is.na(clust))) stop("No defined clusters in umapscan object.")
d_clust <- us$umap[!is.na(clust),]
clust <- clust[!is.na(clust)]
clust[clust == "<Noise>"] <- NA
d_clust$cluster <- clust
color_scale <- qualitative_palette(clust, label = "Cluster")
g <- ggplot(d_clust, aes(x = .data$.umap_x, y = .data$.umap_y, color = factor(.data$cluster))) +
geom_point(size = 1, alpha = alpha) +
color_scale +
guides(colour = guide_legend(override.aes = list(alpha = 1))) +
xlab("") +
ylab("")
if (ellipses) {
d_ellipses <- d_clust %>% tidyr::drop_na(.data$cluster)
g <- g + stat_ellipse(data = d_ellipses, na.rm = TRUE)
}
if (fixed) {
g <- g + coord_fixed()
}
g
}
#' Describe clusters of an umapscan object
#'
#' @param us umapscan object to describe clusters
#' @param parent name of the parent cluster
#' @param labels if TRUE, use cluster label instead of id if available
#' @param type plot type, either `"boxplot"`, `"ridges"`, `"barplot"` or `"keyness"`
#' @param position if type = "barplot", `position` argument to add to `geom_bar`
#' @param keyness_measure if type = "keyness", passed as `measure` argument to `quanteda::textstat_keyness`
#' @param n_terms if type = "keyness", number of terms to show
#' @param text_size if type = "keyness", plot text size
#' @param free_scale if type = "keyness", don't use the same scale on each barplot
#' @param show_negative if TRUE and type = "keyness", show negative keyness features
#'
#' @details
#' "boxplot" and "ridges" are suitable for continuous variables, "barplot" is better for
#' binary categorical ones. "keyness" is suitable when the original data is a document-feature
#' matrix.
#'
#' @seealso
#' [clust_compute()], [clust_get_data()], [clust_membership()],
#' [clust_rename()]
#'
#'
#'
#' @export
#' @importFrom tidyr pivot_longer
#' @importFrom ggridges geom_density_ridges
#' @import dplyr
#'
#' @examples
#' library(dplyr)
#' iris_num <- iris %>% select_if(is.numeric)
#' us <- new_umapscan(iris_num, n_neighbors = 25, min_dist = 0.1, seed = 1337)
#' us <- clust_compute(us, minPts = 3, eps = 0.5)
#' clust_describe(us)
#' us <- clust_compute(us, minPts = 3, eps = 0.45, parent = "3")
#' clust_describe(us, type = "ridge")
#' clust_describe(us, parent = "3")
clust_describe <- function(
us, parent = "", labels = TRUE,
type = c("boxplot", "ridges", "barplot", "keyness"),
position = "fill",
keyness_measure = c("chi2", "lr", "exact", "pmi"),
n_terms = 20,
text_size = 10,
free_scale = TRUE,
show_negative = TRUE) {
type <- match.arg(type)
keyness_measure <- match.arg(keyness_measure)
clusters <- clust_membership(us, parent, noise_inherit_parent = FALSE, labels = labels)
clusters[clusters == "<Noise>"] <- NA
select <- !is.na(clusters)
d <- us$data %>% dplyr::filter(select)
clusters <- clusters[select]
if (type != "keyness") {
d$cluster <- clusters
d_long <- d %>%
tidyr::pivot_longer(-.data$cluster) %>%
dplyr::mutate(
cluster = factor(.data$cluster)
)
}
if (type == "boxplot") {
fill_scale <- qualitative_palette(d_long$cluster, "Cluster", type = "fill")
g <- ggplot(d_long, aes(x = .data$cluster, y = .data$value, fill = .data$cluster)) +
geom_boxplot(outlier.shape = NA) +
fill_scale +
facet_wrap(~name, scales = "free_y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("") +
ylab("")
}
if (type == "ridges") {
fill_scale <- qualitative_palette(d_long$cluster, "Cluster", type = "fill")
g <- ggplot(d_long, aes(x = .data$value, y = .data$cluster, fill = .data$cluster)) +
ggridges::geom_density_ridges(alpha = .7) +
fill_scale +
facet_wrap(~name, scales = "free_x") +
guides(fill = guide_legend(override.aes = list(alpha = 1)))
}
if (type == "barplot") {
fill_scale <- qualitative_palette(d_long$value, "Value", type = "fill")
g <- ggplot(d_long, aes(x = .data$cluster, fill = factor(.data$value))) +
geom_bar(position = position) +
fill_scale +
facet_wrap(~name, scales = "free_y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("") +
ylab("")
}
if (type == "keyness") {
g <- clust_describe_keyness(
d, clusters,
keyness_measure = keyness_measure,
n_terms = n_terms,
text_size = text_size,
free_scale = free_scale,
show_negative = show_negative
)
return(invisible(g))
}
g
}
## Keyness-based clusters description
clust_describe_keyness <- function(
d, clusters,
keyness_measure = c("chi2", "lr", "exact", "pmi"),
n_terms = 20,
text_size = 10,
free_scale = TRUE,
show_negative = TRUE) {
if (!requireNamespace("gridExtra", quietly = TRUE)) {
stop("Package \"gridExtra\" needed for this function to work. Please install it.",
call. = FALSE)
}
tabs <- keyness_stats(
clusters,
d,
keyness_measure = keyness_measure,
n_terms = n_terms,
show_negative = show_negative
)
stat_col <- switch(
keyness_measure,
"chi2" = "chi2",
"lr" = "G2",
"exact" = "or",
"pmi" = "pmi"
)
if (free_scale) {
maxs <- purrr::map_dbl(tabs, ~{max(abs(.x[[stat_col]]), na.rm = TRUE)})
range <- c(0, max(maxs))
} else {
range <- NULL
}
plots <- purrr::imap(tabs, function(tab, name) {
keyness_barplot(tab, range = range, title = name,
stat_col = stat_col, n_terms = n_terms, text_size = text_size)
})
lay <- matrix(seq_along(tabs), nrow = 1)
g <- gridExtra::grid.arrange(grobs = plots, layout_matrix = lay)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.