inst/doc/usage.R

## ----echo = FALSE, message = FALSE--------------------------------------------
library(dplyr)
library(ggplot2)
library(tglkmeans)
theme_set(theme_classic())
set.seed(60427)

## -----------------------------------------------------------------------------
data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data

## ----fig.show='hold'----------------------------------------------------------
data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "true cluster")

## -----------------------------------------------------------------------------
rownames(data) <- data$id
data_for_clust <- data %>% select(starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
    k = 5,
    metric = "euclid",
    verbose = TRUE
)

## -----------------------------------------------------------------------------
names(km)

## -----------------------------------------------------------------------------
km$centers

## -----------------------------------------------------------------------------
km$cluster

## -----------------------------------------------------------------------------
km$size

## -----------------------------------------------------------------------------
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))

## ----fig.show='hold'----------------------------------------------------------
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")

## -----------------------------------------------------------------------------
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE,
    reorder_func = median
)
km$centers

## -----------------------------------------------------------------------------
data$V1[sample(1:nrow(data), round(nrow(data) * 0.2))] <- NA
data

## -----------------------------------------------------------------------------
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))

## ----fig.show='hold'----------------------------------------------------------
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")

## -----------------------------------------------------------------------------
data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    id_column = TRUE
)

## -----------------------------------------------------------------------------
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))

## -----------------------------------------------------------------------------
km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)

d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))

## -----------------------------------------------------------------------------
km1 <- TGL_kmeans_tidy(data %>% select(starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])

Try the tglkmeans package in your browser

Any scripts or data that you put into this service are public.

tglkmeans documentation built on May 29, 2024, 10:58 a.m.