Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(scellpam)
## -----------------------------------------------------------------------------
# Initially, state of debug is FALSE. Turn it on exclusively for the
# parallelpam part with
ScellpamSetDebug(FALSE,debparpam=TRUE)
# There is another parameter, debjmat, to turn on messages about
# binary matrix creation/manipulation. By default is FALSE but turn it on
# if you like with
# ScellpamSetDebug(FALSE,debparpam=TRUE,debjmat=TRUE)
## -----------------------------------------------------------------------------
# Create the matrix with row names V1 to V5000 and column names d1 to d500
nvec<-5000
ndim<-500
P<-matrix(runif(nvec*ndim),nrow=nvec)
rownames(P)<-paste0("V",1:nvec)
colnames(P)<-paste0("d",1:ndim)
# Write it to disk as a binary file in jmatrix format. Please,
# see vignette jmatrixsc.
JWriteBin(P,"datatest.bin",dtype="float",dmtype="full",
comment="Synthetic problem data to test PAM")
## -----------------------------------------------------------------------------
JMatInfo("datatest.bin")
## -----------------------------------------------------------------------------
CalcAndWriteDissimilarityMatrix("datatest.bin","datatestL2.bin",
distype="L2",restype="float",
comment="L2 distance for vectors in
jmatrix file datatest.bin",nthreads=-1)
## -----------------------------------------------------------------------------
JMatInfo("datatestL2.bin")
## -----------------------------------------------------------------------------
L=ApplyPAM("datatestL2.bin",k=5,init_method="BUILD",max_iter=1000,
nthreads=-1)
## -----------------------------------------------------------------------------
Lbuild=ApplyPAM("datatestL2.bin",k=5,init_method="BUILD",max_iter=0,nthreads=-1)
Llab1=ApplyPAM("datatestL2.bin",k=5,init_method="LAB",max_iter=0,nthreads=-1)
Llab2=ApplyPAM("datatestL2.bin",k=5,init_method="LAB",max_iter=0,nthreads=-1)
## -----------------------------------------------------------------------------
Llab2Final=ApplyPAM("datatestL2.bin",k=5,init_method="PREV",
initial_med=Llab2$med,nthreads=-1)
## -----------------------------------------------------------------------------
# Which are the indexes of the points chosen as medoids?
L$med
#
# In which class has point 147 been classified?
L$clasif[147]
#
# And which is the index (row in the dissimilarity matrix)
# of the medoid closest to point 147?
L$med[L$clasif[147]]
## -----------------------------------------------------------------------------
min(L$clasif)
max(L$clasif)
## -----------------------------------------------------------------------------
S=CalculateSilhouette(Llab2$clasif,"datatestL2.bin",nthreads=-1)
## -----------------------------------------------------------------------------
Sclus <- NumSilToClusterSil(Llab2$clasif,S)
library(cluster)
plot(Sclus)
## -----------------------------------------------------------------------------
Lfilt=FilterBySilhouetteQuantile(S,Llab2,"datatest.bin",
"datatestFilt.bin","datatestL2.bin",
"datatestL2Filt.bin",0.2)
## -----------------------------------------------------------------------------
Lfinal=ApplyPAM("datatestL2Filt.bin",k=length(Lfilt$med),
init_method="PREV",initial_med=Lfilt$med,nthreads=-1)
## ----results='hide'-----------------------------------------------------------
d = GetSubdiag("datatestL2.bin")
## ----eval=FALSE---------------------------------------------------------------
# library(cluster)
# clusterpam = pam(d,diss=TRUE,k=5)
# print(sort(clusterpam$id.med))
# print(sort(L$med))
## ----eval=FALSE---------------------------------------------------------------
# # Be patient, this may take some time...
# Dm = GetJManyRows("datatestL2.bin",seq(1:nvec))
## ----eval=FALSE---------------------------------------------------------------
# library(ClusterR)
# ClusterRpam = Cluster_Medoids(Dm,clusters=5)
# print(sort(ClusterRpam$medoid_indices))
# print(sort(L$med))
## ----eval=FALSE---------------------------------------------------------------
# TDparallelpam = GetTD(L,"datatestL2.bin")
#
# # This is to adapt cluster package output format to ours, since this is what our GetTD function expects...
# Lcl = list()
# Lcl$med = clusterpam$id.med
# Lcl$clasif = clusterpam$clustering
# TDcluster = GetTD(Lcl,"datatestL2.bin")
#
# # The same with ClusterR package:
# LclR = list()
# LclR$med = ClusterRpam$medoid_indices
# LclR$clasif = ClusterRpam$clusters
# TDClusterR = GetTD(LclR,"datatestL2.bin")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.