Nothing
#' K-means algorithm for the clustering of variables
#'
#' K-means algorithm for the clustering of variables. Directional or local groups may be defined.
#' Each group of variables is associated with a latent component.
#' Moreover external information collected on the observations or on the variables may be introduced.
#'
#' The initalization can be made at random, repetitively, or can be defined by the user.
#'
#' The parameter "strategy" makes it possible to choose a strategy for setting aside variables
#' that do not fit into the pattern of any cluster.
#'
#' @param X The matrix of the variables to be clustered
#' @param Xu The external variables associated with the columns of X
#' @param Xr The external variables associated with the rows of X
#' @param method The criterion to use in the cluster analysis.\cr
#' 1 or "directional" : the squared covariance is used as a measure of proximity (directional groups). \cr
#' 2 or "local" : the covariance is used as a measure of proximity (local groups)
#' @param sX TRUE/FALSE : standardization or not of the columns X (TRUE by default)\cr
#' (predefined -> cX = TRUE : column-centering of X)
#' @param sXr TRUE/FALSE : standardization or not of the columns Xr (FALSE by default)\cr
#' (predefined -> cXr = TRUE : column-centering of Xr)
#' @param sXu TRUE/FALSE : standardization or not of the columns Xu (FALSE by default)\cr
#' (predefined -> cXu= FALSE : no centering, Xu considered as a weight matrix)
#' @param clust : a number i.e. the size of the partition, K,
#' or a vector of INTEGERS i.e. the group membership of each variable in the initial partition (integer between 1 and K)
#' @param iter.max maximal number of iteration for the consolidation (20 by default)
#' @param nstart nb of random initialisations in the case where init is a number (100 by default)
#' @param strategy "none" (by default), or "kplusone" (an additional cluster for the noise variables),
#' or "sparselv" (zero loadings for the noise variables)
#' @param rho a threshold of correlation between 0 and 1 (0.3 by default)
#'
#' @return \item{tabres}{
#' The value of the clustering criterion at convergence.\cr
#' The percentage of the explained initial criterion value.\cr
#' The number of iterations in the partitioning algorithm.}
#' \item{clusters}{ the group's membership}
#' \item{comp}{ The latent components of the clusters}
#' \item{loading}{ if there are external variables Xr or Xu : The loadings of the external variables}
#' @seealso CLV, LCLV
#'
#' @references Vigneau E., Qannari E.M. (2003). Clustering of variables around latents components. Comm. Stat, 32(4), 1131-1150.
#' @references Vigneau E., Chen M., Qannari E.M. (2015). ClustVarLV: An R Package for the clustering of Variables around Latent Variables. The R Journal, 7(2), 134-148
#' @references Vigneau E., Chen M. (2016). Dimensionality reduction by clustering of variables while setting aside atypical variables. Electronic Journal of Applied Statistical Analysis, 9(1), 134-153
#' @examples data(apples_sh)
#' #local groups with external variables Xr
#' resclvkmYX <- CLV_kmeans(X = apples_sh$pref, Xr = apples_sh$senso,method = "local",
#' sX = FALSE, sXr = TRUE, clust = 2, nstart = 20)
#' @export
#'
CLV_kmeans <- function(X,Xu=NULL,Xr=NULL,method,sX=TRUE,sXr=FALSE,sXu=FALSE,
clust, iter.max=20, nstart=100,strategy="none",rho=0.3)
{
if (method=="directional") method=1
if (method=="local") method=2
if(method!=1 & method!=2) stop("method should be 1/directional or 2/local")
if (missing(clust))
stop("'clust' must be an integer (the number of clusters) or a vector of vector a vector of intergers (the initial partition)")
cX=TRUE
cXr=TRUE
cXu=FALSE
# if colnames(X) is null, colnames(x) is created
if (is.null(colnames(X))) colnames(X)=paste("V.",1:ncol(X),sep="")
# verification if some variables have constant values (standard deviation=0)
who<-which(apply(X,2,sd)==0)
if ((length(who)>0)&(sX==TRUE)) {
listwho<-c(": ")
for (r in 1:length(who)) {listwho=paste(listwho,colnames(X)[who[r]],",")}
stop("The variables",listwho," have constant values (standard deviation=0). Please remove these variables from the X matrix.")
}
if (length(who)>0) {
listwho<-c(": ")
for (r in 1:length(who)) {listwho=paste(listwho,colnames(X)[who[r]],",")}
warning("The variables",listwho," have constant values (standard deviation=0). Please remove these variables from the X matrix.")
}
X<- scale(X, center=cX, scale=sX)
p <- ncol(X)
n <- nrow(X)
# verification if there are NA values
valmq=FALSE
if (sum(is.na(X))>0) {
valmq=TRUE
tauxNA=sum(is.na(X))/(n*p)
}
if (is.null(Xr)) {
EXTr<-0
} else {
EXTr<-1
Xr<- scale(Xr, center=cXr, scale=sXr)
ntilde <- dim(Xr)[1]
q<-dim(Xr)[2]
if (n != ntilde) stop("X and Xr must have the same number of observations")
}
if (is.null(Xu)) {
EXTu<-0
} else {
EXTu<-1
Xu<- scale(Xu, center=cXu, scale=sXu)
ptilde <- dim(Xu)[1]
m<-dim(Xu)[2]
if (p != ptilde) {stop("X and Xu must be defined for the same number of
variables") }
if (EXTr==1) {stop("this procedure doesn't allow Xr and Xu to be defined
simultaneously. Use LCLV instead")}
}
if (valmq & ((EXTr==1)|(EXTu==1))) stop("The matrix X contains missing values. Use a X matrix without missing value for CLV with external data")
crit<-crit_init(method,X,EXTr,Xr,EXTu,Xu)
sbegin <- sum(crit)
if (length(clust) == 1) {
K <- clust
out<-mat_init(X,EXTr,Xr,EXTu,Xu,K)
comp<-out$comp
if (!valmq) comp <- as.matrix(X[,sort(sample.int(p, K))]) # K columns of X chosen at random
if (valmq) { # K groups of variables randomly defined, the K comp are defined accordingly
gp<-sample(1:K,p,replace=TRUE)
comp<-matrix(NA,nrow=n,ncol=K)
for (k in 1:K) {
ind <- which(gp == k)
if (length(ind) > 0) {
res<-consol_calcul(method,X,EXTr,Xr,EXTu,Xu,ind)
comp[,k]<-res$comp
}
}
}
if (EXTr==1) { a<-out$a }
if (EXTu==1) { u<-out$u }
groupes <- as.factor(consol_affect(method,X,Xr,Xu,EXTr,EXTu,comp,a,u))
if(sum(is.na(groupes))>0) warning("a variable has not been allocated to any cluster at initialisation step")
} else {
nstart = 1
if (!is.numeric(clust))
stop("clust must be a vector of integers")
groupes <- as.factor(clust)
K <- length(levels(groupes))
if (p < K)
stop("more cluster's centers than variables")
if (length(which(clust > K)) > 0)
stop("clusters must be numbered from 1 to K")
if (p != length(groupes))
stop("the length of clust must be equal to the number of variables")
out<-mat_init(X,EXTr,Xr,EXTu,Xu,K)
comp<-out$comp
if (EXTr==1) {a<-out$a}
if (EXTu==1) {u<-out$u}
}
if (length((intersect(strategy,c("kplusone","sparselv","none"))))==0)
stop("strategy must be either 'kplusone', 'sparselv', 'none'")
if (strategy=="kplusone" & (EXTu!=0 | EXTr!=0) )
stop(" 'k+1' strategy is not available with external variables, yet")
if (strategy=="sparselv" & (EXTu!=0 | EXTr!=0) )
stop(" 'Sparse LV' strategy is not available with external variables, yet")
#####################################################################
if (strategy=="none" | rho==0 )
listcc = clvk_none(X,n,p,sbegin,EXTr,EXTu,Xu,Xr,method,K,comp,groupes,a,u,iter.max,nstart)
if (strategy=="sparselv")
listcc = clvk_sparse(X,n,p,sbegin,EXTr,EXTu,Xu,Xr,method,K,comp,groupes,a,u,iter.max,nstart,rho)
if (strategy=="kplusone")
listcc = clvk_kp1(X,n,p,sbegin,EXTr,EXTu,Xu,Xr,method,K,comp,groupes,a,u,iter.max,nstart,rho)
#####################################################################
param<-list(X=X,method=method,n = n, p = p,K = K,nstart = nstart,EXTu=EXTu,EXTr=EXTr,
sX=sX,sXr=sXr,cXu=cXu,sXu=sXu,strategy=strategy,rho=rho)
listcc= c(listcc, list(param=param))
class(listcc) = "clv"
# if (strategy=="sparselv") class(listcc) = "sparselv"
# if (strategy=="kplusone") class(listcc) = "kplusone"
return(listcc)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.