inst/doc/clustra_vignette.R

## ----setup, echo = FALSE------------------------------------------------------
knitr::opts_knit$set(
  collapse = TRUE,
  comment = "#>"
)
start_knit = proc.time()
data.table::setDTthreads(1) # manage data.table threads

## ----gendata------------------------------------------------------------------
library(clustra)
mc = 2 # If running on a unix or a Mac platform, increase up to # cores
if (.Platform$OS.type == "windows") mc = 1
init = "random"
set.seed(12345)
data = gen_traj_data(n_id = c(500, 1000, 1500, 2000), types = c(2, 1, 3, 2), 
                     intercepts = c(70, 130, 120, 130), m_obs = 25, 
                     s_range = c(-365, -14), e_range = c(0.5*365, 2*365),
                     noise = c(0, 15))
head(data)

## ----plotdata, fig.width = 7, fig.height = 9----------------------------------
plot_sample(data[id %in% sample(unique(data[, id]), 9)], group = "true_group")

## ----clustra4-----------------------------------------------------------------
set.seed(12345)
cl4 = clustra(data, k = 4, maxdf = 10, conv = c(10, 0), mccores = mc,
             verbose = TRUE)

## ----smooths, fig.width = 7, fig.height = 7-----------------------------------
plot_smooths(data, group = NULL)
plot_smooths(data, cl4$tps)

## ----rand4--------------------------------------------------------------------
MixSim::RandIndex(cl4$data_group, data[, true_group])

## ----gen2_clustra, fig.width = 7, fig.height = 9------------------------------
set.seed(12345)
data2 = gen_traj_data(n_id = c(500, 1000, 1500, 2000), types = c(2, 1, 3, 2), 
                     intercepts = c(70, 130, 120, 130), m_obs = 25,
                     s_range = c(-365, -14), e_range = c(60, 2*365), 
                     noise = c(0, 30))
plot_sample(data2[id %in% sample(unique(data2[, id]), 9)], group = "true_group")

cl4a = clustra(data2, k = 4, maxdf = 10, conv = c(10, 0), mccores = mc,
             verbose = TRUE)
MixSim::RandIndex(cl4a$data_group, data2[, true_group])

## ----smooths2, fig.width = 7, fig.height = 7----------------------------------
plot_smooths(data2, group = NULL)
plot_smooths(data2, cl4a$tps)

## ----sil, fig.width = 7-------------------------------------------------------
set.seed(12345)
sil = clustra_sil(data, kv = c(2, 3, 4, 5), mccores = mc, maxdf = 10,
                  conv = c(7, 1), verbose = TRUE)
lapply(sil, plot_silhouette)

## ----sil2, fig.width = 7------------------------------------------------------
sil = clustra_sil(cl4)
lapply(sil, plot_silhouette)

## ----rand_plot, fig.width = 7, fig.height=7, eval = FALSE---------------------
#  set.seed(12345)
#  ran = clustra_rand(data, k = c(2, 3, 4, 5), mccores = mc,
#                     replicates = 10, maxdf = 10, conv = c(7, 1), verbose = TRUE)
#  rand_plot(ran)

## ----rand_plot_d, fig.width = 7, fig.height=7, eval = FALSE-------------------
#  set.seed(12345)
#  ran = clustra_rand(data, k = c(2, 3, 4, 5), starts = "distant", mccores = mc,
#                     replicates = 10, maxdf = 10, conv = c(7, 1), verbose = TRUE)
#  rand_plot(ran)

## ----hclust, fig.width = 7, fig.height = 7------------------------------------
set.seed(12345)
cl30 = clustra(data, k = 40, maxdf = 10, conv = c(10, 0), mccores = mc,
             verbose = TRUE)
gpred = function(tps, newdata) 
  as.numeric(mgcv::predict.bam(tps, newdata, type = "response",
                               newdata.guaranteed = TRUE))
resp = do.call(rbind, lapply(cl30$tps, gpred, newdata = data.frame(
  time = seq(min(data2$time), max(data2$time), length.out = 100))))
plot(hclust(dist(resp)))

## ----hclust_d, fig.width = 7, fig.height = 7----------------------------------
set.seed(12345)
cl30 = clustra(data, k = 40, starts = "distant", maxdf = 10, conv = c(10, 0), mccores = mc,
             verbose = TRUE)
gpred = function(tps, newdata) 
  as.numeric(mgcv::predict.bam(tps, newdata, type = "response",
                               newdata.guaranteed = TRUE))
resp = do.call(rbind, lapply(cl30$tps, gpred, newdata = data.frame(
  time = seq(min(data2$time), max(data2$time), length.out = 100))))
plot(hclust(dist(resp)))

## ----finish-------------------------------------------------------------------
cat("clustra vignette run time:\n")
print(proc.time() - start_knit)

Try the clustra package in your browser

Any scripts or data that you put into this service are public.

clustra documentation built on Oct. 14, 2023, 9:15 a.m.