knitr::opts_chunk$set(echo = TRUE) library(ParetoTI) library(ggplot2) library(cowplot)
First, let's simulate a triangle in 3D.
# set random seed set.seed(4355) # generate archetype positions archetypes = generate_arc(arc_coord = list(c(5, 0, 4), c(-10, 15, 0), c(-30, -20, -5)), mean = 0, sd = 1) # generate 1000 data points that are a convex combination (weighted sum) of archetypes data = generate_data(archetypes$XC, N_examples = 1e3, jiiter = 0.04, size = 0.99) colnames(data) = paste0("cell", seq_len(ncol(data))) # plot plot_arc(arc_data = archetypes, data = data, which_dimensions = 1:2) + ylim(-19, 17) + ggtitle("Ground truth archetypes")
# find archetypes arc = fit_pch(data, noc = 3) # find k-means clusters clusters = fit_pch(data, noc = 3, method = "kmeans") # find Louvain clusters lou_clusters = fit_pch(data, noc = 3, method = "louvain") # show both side-by-side plot_grid(plot_arc(arc_data = arc, data = data, which_dimensions = 1:2) + ylim(-18, 17) + ggtitle("Detected archetypes (PCHA)"), plot_arc(arc_data = clusters, data = data, which_dimensions = 1:2, data_lab = as.character(apply(clusters$S, 2, which.max))) + ylim(-18, 17) + ggtitle("K-means clusters"), plot_arc(arc_data = lou_clusters, data = data, which_dimensions = 1:2, data_lab = as.character(apply(lou_clusters$S, 2, which.max))) + ylim(-18, 17) + ggtitle("Louvain clusters"), align = "vh", nrow = 1)
# bootstrap archetypes arc_rob = fit_pch_bootstrap(data, n = 200, sample_prop = 0.65, seed = 2543, noc = 3) # bootstrap kmeans clusters_rob = fit_pch_bootstrap(data, n = 200, sample_prop = 0.65, seed = 2543, noc = 3, method = "kmeans") # bootstrap Louvain # specific noc is achieved by optimising clustering resolution to get noc clusters lou_clusters_rob = fit_pch_bootstrap(data, n = 200, sample_prop = 0.9, seed = 2543, noc = 3, method = "louvain", method_options = list(resolution = 0.1)) # show both side-by-side plot_grid(plot_arc(arc_data = arc_rob, data = data, which_dimensions = 1:2) + ylim(-18, 17) + ggtitle("Detected archetypes (PCHA)"), plot_arc(arc_data = clusters_rob, data = data, which_dimensions = 1:2, data_lab = as.character(apply(clusters$S, 2, which.max))) + ylim(-18, 17) + ggtitle("K-means clusters"), plot_arc(arc_data = lou_clusters_rob, data = data, which_dimensions = 1:2, data_lab = as.character(apply(lou_clusters$S, 2, which.max))) + ylim(-18, 17) + ggtitle("Louvain clusters"), align = "vh")
# trying different number of archetypes arc_ks = k_fit_pch(data, ks = 2:5, bootstrap = T, bootstrap_N = 200, maxiter = 500, bootstrap_type = "m", clust_options = list(cores = 3), seed = 2543, replace = FALSE, volume_ratio = "none", # set to "none" if too slow order_type = "align", sample_prop = 0.65, reference = T) # trying different number of clusters cluster_ks = k_fit_pch(data, ks = 2:5, bootstrap = T, bootstrap_N = 200, maxiter = 500, bootstrap_type = "m", clust_options = list(cores = 3), seed = 2543, replace = FALSE, volume_ratio = "none", # set to "none" if too slow order_type = "align", sample_prop = 0.65, reference = T, method = "kmeans") lou_cluster_ks = k_fit_pch(data, ks = 2:5, bootstrap = T, bootstrap_N = 200, maxiter = 500, bootstrap_type = "m", clust_options = list(cores = 3), seed = 2543, replace = FALSE, volume_ratio = "none", # set to "none" if too slow sample_prop = 0.95, method = "louvain", method_options = list(resolution = 0.1, noc_optim_iter = 500)) # try resolutions for more iterations # Show variance explained by k-vertex model on top of k-1 model (each k separately) plot_grid(plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw() + ggtitle("Archetypes (PCHA)"), plot_arc_var(cluster_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw() + ggtitle("K-means clusters"), plot_arc_var(lou_cluster_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw() + ggtitle("Louvain clusters"), align = "vh") # Show variance in position of vertices obtained using bootstraping # - use this to find largest k that has low variance plot_grid(plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) + theme_bw() + ylab("Mean variance in position of vertices") + ggtitle("Archetypes (PCHA)"), plot_arc_var(cluster_ks, type = "total_var", point_size = 2, line_size = 1.5) + theme_bw() + ylab("Mean variance in position of vertices") + ggtitle("K-means clusters"), plot_arc_var(lou_cluster_ks, type = "total_var", point_size = 2, line_size = 1.5) + theme_bw() + ggtitle("Louvain clusters"), align = "vh")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.