knitr::opts_chunk$set(echo = TRUE)
In this document, we evaluate the performance of the various types of initialization. - Hclust - Random - SNF - SVD
To perform the evaluation, we simulate 10 datasets, then we compute the ARI and the percentage of the variance explained.
library(Matrix) library(tidyverse) library(PintMF) library(CrIMMix) library(future) library(mclust)
Four un balanced groups are simulated.
nclust <- 4 nByclust= c(5, 10, 25, 10)
clust_function <- function(R, nclust){ clust <- R$W %>% dist %>% hclust(method="ward.D2") %>% cutree(nclust) }
set.seed(55) perf <- do.call(rbind, lapply(1:10, function (ii){ print(ii) c_1 <- simulateY(nclust = nclust, n_byClust = nByclust, J=100, prop=0.05, noise=0.2) c_2 <- simulateY(nclust = nclust, n_byClust = nByclust, J=50, flavor="binary", params=list(c(p=0.6)), prop=0.1, noise=0.3) params_beta <- list(c(mean1=-1, mean2=1, sd1=0.5, sd2=0.5)) c_3 <- simulateY(nclust = nclust, n_byClust = nByclust, J=500,flavor="beta", params=params_beta, prop=0.2, noise=0.2) data <- list(c_1$data, c_2$data, c_3$data) true.clust <- c_1$true.clusters data_names <- c("gaussian", "binary", "beta-like") names(data) <- data_names data_t <- data data_t[[3]] <- log2(data[[3]]/(1-data[[3]])) R_snf <- SolveInt(Y=data_t, p=4, max.it=5, verbose=FALSE, init_flavor="snf", flavor_mod="glmnet") R_svd <- SolveInt(Y=data_t, p=4, max.it=5, verbose=TRUE, init_flavor="svd", flavor_mod="glmnet") R_random <- SolveInt(Y=data_t, p=4, max.it=5, verbose=FALSE, init_flavor="random", flavor_mod="glmnet") R_hclust <- SolveInt(Y=data_t, p=4, max.it=5, verbose=FALSE, init_flavor="hclust", flavor_mod="glmnet") df_ari <- data.frame(sim=ii,ARI=c(adjustedRandIndex(clust_function(R_snf, nclust), true.clust), adjustedRandIndex(clust_function(R_svd, nclust), true.clust), adjustedRandIndex(clust_function(R_random, nclust), true.clust), adjustedRandIndex(clust_function(R_hclust, nclust), true.clust)), method=c("SNF", "SVD", "Random", "hclust")) df_pve <- data.frame( PVE=c(R_snf$pve, R_svd$pve, R_random$pve, R_hclust$pve), method= rep(c("SNF","SVD", "Random", "Hclust"), times=c(length(R_snf$pve),length(R_svd$pve), length(R_random$pve), length(R_hclust$pve) ))) return(list(ari=df_ari, pve=df_pve)) }))
ARI_dat <- do.call(rbind, perf[1:10]) g <- ARI_dat %>% ggplot(aes(x=method, fill=method, y=ARI))+geom_violin()+theme_bw()+theme(legend.position = "none", axis.text.x = element_text(size=15), axis.title.y = element_text(size=15), axis.text.y = element_text(size=10))+xlab("")+geom_point(alpha=0.5) ggsave(filename = '../../Figs/eval_init.pdf', plot = g, width=7, height=5) g
pve_dat <- do.call(rbind, lapply(perf[11:20], function(g){ g <- g %>% group_by(method) %>% mutate(it = row_number()) } )) g_pve <- pve_dat%>% ggplot(aes(x=it, y=PVE,fill=method)) + geom_smooth(se = TRUE, aes(colour=method))+theme_bw()+ theme(legend.position="right", axis.text.x = element_text(size=15),axis.title.y = element_text(size=15))+scale_x_continuous( breaks = 1:10)+xlab('iteration') g_pve ggsave(filename = '../../Figs/pve_init.pdf', plot = g_pve, width=7, height=5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.