knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(celery)
library(palmerpenguins)
library(tidymodels)

Fit

kmeans_spec <- k_means(k = 5) %>%
  set_engine_celery("stats")

penguins_rec_1 <- recipe(~ ., data = penguins) %>%
  update_role(species, island, new_role = "demographic") %>%
  step_dummy(sex)


penguins_rec_2 <- recipe(species ~ ., data = penguins) %>%
  step_dummy(sex, island)

wflow_1 <- workflow() %>%
  add_model(kmeans_spec) %>%
  add_recipe(penguins_rec_1)


wflow_2 <- workflow() %>%
  add_model(kmeans_spec) %>%
  add_recipe(penguins_rec_2)

We need workflows!

# dropping NA first so rows match up later, this is clunky
pen_sub <- penguins %>% 
  drop_na() %>%
  select(-species, -island, -sex)

kmeans_fit <- kmeans_spec %>% fit( ~., pen_sub)

kmeans_fit %>%
  predict(new_data = pen_sub)

### try my new version
kmeans_fit %>%
  extract_cluster_assignment()
penguins %>%
  drop_na() %>%
  mutate(
    preds = predict(kmeans_fit, new_data = pen_sub)$.pred_cluster
  ) %>%
  count(preds, sex, species)

Diagnostics

talk to Emil

recipe( ~ demo1 + predictor1) %>%
  step_celery(kmeans_fit) # doesn't quite make sense
get_SS(Cluster ~ v1 + v2)
recipe(Cluster ~ v1 + v2) %>%
  step_pca() %>%
  get_ss()

... or maybe fit enrichment() on a recipe and it automatically uses the variables with the "enrich" role?
how would PCA fit in on this?

How does fit() access the right variables?

... is this even worth it given we can attach cluster assignments? I say yes, because what if we are trying to "cross-validate".

penguins_2 <- penguins %>%
  drop_na() %>%
  mutate(
    preds = predict(kmeans_fit, new_data = pen_sub)$.pred_cluster
  ) 

debugonce(enrichment)
penguins_2 %>%
  enrichment(preds, species)


kbodwin/celery documentation built on March 26, 2022, 12:33 a.m.