R/RcppExports.R

Defines functions factkm redkm mrand dpcakm dispca doublekm CronbachAlpha disfa cluster

Documented in cluster CronbachAlpha disfa dispca doublekm dpcakm factkm mrand redkm

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @name cluster
#' @title classification variable
#' @description
#' Recodes the binary and row-stochastic membership matrix U into the classification variable (similar to the "cluster" output returned by kmeans()).
#' 
#' @usage cluster(U)
#' 
#' @param U Binary and row-stochastic matrix.
#' 
#' @return \item{cl}{vector of length n indicating, for each element, the index of the cluster to which it has been assigned.}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # standardizing the data
#' iris <- scale(iris)
#' 
#' # double k-means with 3 unit-clusters and 2 components for the variables
#' p1 <- redkm(iris, K = 3, Q = 2)
#' cl <- cluster(p1$U)
#' 
#' @export
NULL

cluster <- function(U) {
    .Call(`_drclust_cluster`, U)
}

#' @name disfa
#' @title Disjoint Factor Analysis
#' @description
#' Performs disjoint factor analysis, i.e., a Factor Analysis with a simple structure. In fact, each factor is defined by a disjoint subset of variables, resulting thus, in a simplified, easier to interpret loading matrix A and factors. Estimation is carried out via Maximum Likelihood.
#'  
#' 
#' @usage disfa(X, Q, Rndstart, verbose, maxiter, tol, constr, prep, print)
#' 
#' @param X Units x variables numeric data matrix.
#' @param Q Number of factors.
#' @param Rndstart Number of runs to be performed (Defaults is 20).
#' @param verbose Outputs basic summary statistics for each run (1 = enabled; 0 = disabled, default option).
#' @param maxiter Maximum number of iterations allowed (if convergence is not yet reached. Default is 100).
#' @param tol Tolerance threshold (maximum difference between the values of the objective function of two consecutive iterations such that convergence is assumed. Default is 1e-6).
#' @param constr is a vector of length J = nr. of variables, pre-specifying to which cluster some of the variables must be assigned. Each component of the vector can assume integer values from 1 o Q (See example for more details), or 0 if no constraint on the variable is imposed (i.e., it will be assigned based on the plain algorithm).
#' @param prep Pre-processing of the data. 1 performs the z-score transform (default choice); 2 performs the min-max transform; 0 leaves the data un-pre-processed.
#' @param print Prints summary statistics of the performed method (1 = enabled; 0 = disabled, default option).
#' 
#' @return returns a list of estimates and some descriptive quantities of the final results.
#' @return \item{V}{Variables x factors membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which cluster each variable has been assigned.}
#' @return \item{A}{Variables x components loading matrix.}
#' @return \item{Psi}{Specific variance of each observed variable, not accounted for by the common factors (matrix).}
#' @return \item{discrepancy}{Value of the objective function, to be minimized. Difference between the observed and estimated covariance matrices (scalar).}
#' @return \item{RMSEA}{Adjusted Root Mean Squared Error (scalar).}
#' @return \item{AIC}{Aikake Information Criterion (scalar).}
#' @return \item{BIC}{Bayesian Information Criterion (scalar).}
#' @return \item{GFI}{Goodness of Fit Index (scalar).}
#'
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references 
#' Vichi M. (2017) "Disjoint factor analysis with cross-loadings" <doi:10.1007/s11634-016-0263-9>
#' 
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # No constraint on variables
#' out <- disfa(iris, Q = 2)
#' 
#' # Constraint: the first two variables must contribute to the same factor.
#' outc <- disfa(iris, Q = 2, constr = c(1,1,0,0))
#' 
#' @export
NULL

disfa <- function(X, Q, Rndstart = 10L, verbose = 0L, maxiter = 100L, tol = 1e-6, constr = 00L, prep = 1L, print = 0L) {
    .Call(`_drclust_disfa`, X, Q, Rndstart, verbose, maxiter, tol, constr, prep, print)
}

#' @name CronbachAlpha
#' @title Cronbach Alpha
#' @description
#' Computes the Cronbach Alpha index on a units x variables data matrix. It measures the internal reliability, i.e., the propensity of J variables of a data matrix (n units x J variables) to be concordantly correlated with a single factor (composite indicator).
#' 
#' @usage CronbachAlpha(X)
#' 
#' @param X Units x variables numeric data matrix.
#' 
#' @return \item{as}{Cronbach's Alpha}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references Cronbach L. J. (1951) "Coefficient alpha and the internal structure of tests" <doi:10.1007/BF02310555>
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # standardizing the data
#' iris <- scale(iris)
#' 
#' # compute Cronbach's Alpha
#' as <- CronbachAlpha(iris)
#' @export
NULL

CronbachAlpha <- function(X) {
    .Call(`_drclust_CronbachAlpha`, X)
}

#' @name doublekm
#' @title Double k-means Clustering
#' @description
#' Performs simultaneous \emph{k}-means partitioning on units and variables (rows and columns of the data matrix). 
#' 
#' @usage doublekm(Xs, K, Q, Rndstart, verbose, maxiter, tol, prep, print)
#' 
#' @param Xs Units x variables numeric data matrix.
#' @param K Number of clusters for the units.
#' @param Q Number of clusters for the variables.
#' @param Rndstart Number of runs to be performed (Defaults is 20).
#' @param verbose Outputs basic summary statistics for each run (1 = enabled; 0 = disabled, default option).
#' @param maxiter Maximum number of iterations allowed (if convergence is not yet reached. Default is 100).
#' @param tol Tolerance threshold. It is the maximum difference between the values of the objective function of two consecutive iterations such that convergence is assumed (default is 1e-6).
#' @param prep Pre-processing of the data. 1 performs the z-score transform (default choice); 2 performs the min-max transform; 0 leaves the data un-pre-processed.
#' @param print Prints summary statistics of the results (1 = enabled; 0 = disabled, default option).
#' 
#' @return returns a list of estimates and some descriptive quantities of the final results.
#' @return \item{U}{Units x clusters membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which unit-cluster each unit has been assigned.}
#' @return \item{V}{Variables x clusters membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which variable-cluster each variable has been assigned.}
#' @return \item{centers}{K x Q matrix of centers containing the row means expressed in terms of column means.}
#' @return \item{totss}{The total sum of squares (scalar).}
#' @return \item{withinss}{Vector of within-row-cluster sum of squares, one component per cluster.}
#' @return \item{columnwise_withinss}{Vector of within-column-cluster sum of squares, one component per cluster.}
#' @return \item{betweenss}{Amount of deviance captured by the model (scalar).}
#' @return \item{K-size}{Number of units assigned to each row-cluster (vector).}
#' @return \item{Q-size}{Number of variables assigned to each column-cluster (vector).}
#' @return \item{pseudoF}{Calinski-Harabasz index of the resulting (row-) partition (scalar).}
#' @return \item{loop}{The index of the (best) run from which the results have been chosen.}
#' @return \item{it}{the number of iterations performed during the (best) run.}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references Vichi M. (2001) "Double k-means Clustering for Simultaneous Classification of Objects and Variables" <doi:10.1007/978-3-642-59471-7_6>
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # double k-means with 3 unit-clusters and 2 variable-clusters
#' out <- doublekm(iris, K = 3, Q = 2)
#' 
#' @export
NULL

doublekm <- function(Xs, K, Q, Rndstart = 20L, verbose = 0L, maxiter = 100L, tol = 1e-6, prep = 1L, print = 0L) {
    .Call(`_drclust_doublekm`, Xs, K, Q, Rndstart, verbose, maxiter, tol, prep, print)
}

#' @name dispca
#' @title Disjoint Principal Components Analysis
#' @description
#' Performs disjoint PCA, that is, a simplified version of PCA. Computes each one of the Q principal components from a different subset of the J variables (resulting thus, in a simplified, easier to interpret loading matrix A). 
#' 
#' 
#' @usage dispca(X, Q, Rndstart, verbose, maxiter, tol, prep, print, constr)
#' 
#' @param X Units x variables numeric data matrix.
#' @param Q Number of factors.
#' @param Rndstart Number of runs to be performed (Defaults is 20).
#' @param verbose Outputs basic summary statistics for each run (1 = enabled; 0 = disabled, default option).
#' @param maxiter Maximum number of iterations allowed (if convergence is not yet reached. Default is 100).
#' @param tol Tolerance threshold (maximum difference between the values of the objective function of two consecutive iterations such that convergence is assumed). Default is 1e-6.
#' @param prep Pre-processing of the data. 1 performs the z-score transform (default choice); 2 performs the min-max transform; 0 leaves the data un-pre-processed.
#' @param print Prints summary statistics of the results (1 = enabled; 0 = disabled, default option).
#' @param constr is a vector of length J = nr. of variables, pre-specifying to which cluster some of the variables must be assigned. Each component of the vector can assume integer values from 1 o Q (See example for more details), or 0 if no constraint on the variable is imposed (i.e., it will be assigned based on the plain algorithm).
#' 
#' @return returns a list of estimates and some descriptive quantities of the final results.
#' @return \item{V}{Variables x factors membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which cluster it has been assigned.}
#' @return \item{A}{Variables x components loading matrix.}
#' @return \item{betweenss}{Amount of deviance captured by the model (scalar).}
#' @return \item{totss}{total amount of deviance (scalar).} 
#' @return \item{size}{Number of variables assigned to each column-cluster (vector).}
#' @return \item{loop}{The index of the (best) run from which the results have been chosen.}
#' @return \item{it}{the number of iterations performed during the (best) run.}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references 
#' Vichi M., Saporta G. (2009) "Clustering and disjoint principal component analysis" <doi:10.1016/j.csda.2008.05.028>
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # No constraint on variables
#' out <- dispca(iris, Q = 2)
#' 
#' # Constraint: the first two variables must contribute to the same factor.
#' outc <- dispca(iris, Q = 2, constr = c(1,1,0,0))
#' @export
NULL

dispca <- function(X, Q, Rndstart = 20L, verbose = 0L, maxiter = 100L, tol = 1e-6, prep = 1L, print = 0L, constr = 00L) {
    .Call(`_drclust_dispca`, X, Q, Rndstart, verbose, maxiter, tol, prep, print, constr)
}

#' @name dpcakm
#' @title Clustering with Disjoint Principal Components Analysis
#' @description
#' Performs simultaneously k-means partitioning on units and disjoint PCA on the variables, computing each principal component from a different subset of variables. The result is a simplified, easier to interpret loading matrix A, 
#' the principal components and the clustering. The reduced subspace is identified by the centroids.
#' 
#' 
#' @usage dpcakm(X, K, Q, Rndstart, verbose, maxiter, tol, constr, print, prep)
#' 
#' @param X Units x variables numeric data matrix.
#' @param K Number of clusters for the units.
#' @param Q Number of principal components.
#' @param Rndstart Number of runs to be performed (Defaults is 20).
#' @param verbose Outputs basic summary statistics for each run (1 = enabled; 0 = disabled, default option).
#' @param maxiter Maximum number of iterations allowed (if convergence is not yet reached. Default is 100).
#' @param tol Tolerance threshold (maximum difference between the values of the objective function of two consecutive iterations such that convergence is assumed. Default is 1e-6).
#' @param constr is a vector of length J = nr. of variables, pre-specifying to which cluster some of the variables must be assigned. Each component of the vector can assume integer values from 1 o Q = nr. of variable-cluster / principal components (See examples for more details), or 0 if no constraint on the variable is imposed (i.e., it will be assigned based on the plain algorithm).
#' @param print Prints summary statistics of the results (1 = enabled; 0 = disabled, default option).
#' @param prep Pre-processing of the data. 1 performs the z-score transform (default choice); 2 performs the min-max transform; 0 leaves the data un-pre-processed.
#'  
#' 
#' @return returns a list of estimates and some descriptive quantities of the final results.
#' @return \item{V}{Variables x factors membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which cluster each variable has been assigned.}
#' @return \item{U}{Units x clusters membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which cluster each unit has been assigned.}
#' @return \item{A}{Variables x components loading matrix.}
#' @return \item{centers}{K x Q matrix of centers containing the row means expressed in the reduced space of Q principal components.}
#' @return \item{totss}{The total sum of squares (scalar).}
#' @return \item{withinss}{Vector of within-cluster sum of squares, one component per cluster.}
#' @return \item{betweenss}{Amount of deviance captured by the model (scalar).}
#' @return \item{K-size}{Number of units assigned to each row-cluster (vector).}
#' @return \item{Q-size}{Number of variables assigned to each column-cluster (vector).}
#' @return \item{pseudoF}{Calinski-Harabasz index of the resulting partition (scalar).}
#' @return \item{loop}{The index of the (best) run from which the results have been chosen.}
#' @return \item{it}{the number of iterations performed during the (best) run.}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references 
#' Vichi M., Saporta G. (2009) "Clustering and disjoint principal component analysis" <doi:10.1016/j.csda.2008.05.028>
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # No constraint on variables
#' out <- dpcakm(iris, K = 3, Q = 2, Rndstart = 5)
#' 
#' # Constraint: the first two variables must contribute to the same factor.
#' outc <- dpcakm(iris, K = 3, Q = 2, Rndstart = 5,constr = c(1,1,0,0))
#' @export
NULL

dpcakm <- function(X, K, Q, Rndstart = 20L, verbose = 0L, maxiter = 100L, tol = 1e-6, constr = 00L, print = 0L, prep = 1L) {
    .Call(`_drclust_dpcakm`, X, K, Q, Rndstart, verbose, maxiter, tol, constr, print, prep)
}

#' @name mrand
#' @title Adjusted Rand Index
#' @description
#' Performs the Adjusted Rand Index on a confusion matrix (row-by-column product of two partition-matrices). ARI is a measure of the similarity between two data clusterings.
#' 
#' @usage mrand(N)
#' 
#' @param N Confusion matrix.
#' 
#' @return \item{mri}{Adjusted Rand Index of a confusion matrix (scalar).}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references Rand W. M. (1971) "Objective criteria for the evaluation of clustering methods" <doi:10.2307/2284239>
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # standardizing the data
#' iris <- scale(iris)
#' 
#' # double k-means with 3 unit-clusters and 2 components for the variables
#' p1 <- redkm(iris, K = 3, Q = 2, Rndstart = 10)
#' p2 <- doublekm(iris, K=3, Q=2, Rndstart = 10)
#' mri <- mrand(t(p1$U)%*%p2$U)
#' @export
NULL

mrand <- function(N) {
    .Call(`_drclust_mrand`, N)
}

#' @name redkm
#' @title k-means on a reduced subspace
#' @description
#' Performs simultaneously k-means partitioning on units and principal component analysis on the variables. 
#' 
#' @usage redkm(X, K, Q, Rndstart, verbose, maxiter, tol, rot, prep, print)
#' 
#' @param X Units x variables numeric data matrix.
#' @param K Number of clusters for the units.
#' @param Q Number of principal components w.r.t. variables.
#' @param Rndstart Number of runs to be performed (Defaults is 20).
#' @param verbose Outputs basic summary statistics for each run (1 = enabled; 0 = disabled, default option).
#' @param maxiter Maximum number of iterations allowed (if convergence is not yet reached. Default is 100).
#' @param tol Tolerance threshold (maximum difference between the values of the objective function of two consecutive iterations such that convergence is assumed. Default is 1e-6).
#' @param rot performs varimax rotation of axes obtained via PCA. (=1 enabled; =0 disabled, default option)
#' @param prep Pre-processing of the data. 1 performs the z-score transform (default choice); 2 performs the min-max transform; 0 leaves the data un-pre-processed.
#' @param print Tolerancestats summary statistics of the performed method (1 = enabled; 0 = disabled, default option).
#' 
#' 
#' 
#' @return returns a list of estimates and some descriptive quantities of the final results.
#' @return \item{U}{Units x clusters membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which cluster each unit has been assigned.}
#' @return \item{A}{Variables x components loading matrix (orthonormal).}
#' @return \item{centers}{K x Q matrix of centers containing the row means expressed in the reduced space of Q principal components.}
#' @return \item{totss}{The total sum of squares (scalar).}
#' @return \item{withinss}{Vector of within-cluster sum of squares, one component per cluster.}
#' @return \item{betweenss}{Amount of deviance captured by the model (scalar).}
#' @return \item{size}{Number of units assigned to each cluster (vector).}
#' @return \item{pseudoF}{Calinski-Harabasz index of the resulting partition (scalar).}
#' @return \item{loop}{The index of the (best) run from which the results have been chosen.}
#' @return \item{it}{the number of iterations performed during the (best) run.}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references 
#' de Soete G., Carroll J. (1994) "K-means clustering in a low-dimensional Euclidean space" <doi:10.1007/978-3-642-51175-2_24>
#' 
#' Kaiser H.F. (1958) "The varimax criterion for analytic rotation in factor analysis" <doi:10.1007/BF02289233>
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # reduced k-means with 3 unit-clusters and 2 components for the variables
#' out <- redkm(iris, K = 3, Q = 2, Rndstart = 15, verbose = 0, maxiter = 100, tol = 1e-7, rot = 1)
#' 
#' @export
NULL

#' @name factkm
#' @title Factorial k-means
#' @description
#' Performs simultaneously k-means partitioning on units and principal component analysis on the variables. 
#' Identifies the best partition in a Least-Squares sense in the best reduced space of the data. Both the data 
#' and the centroids are used to identify the best Least-Squares reduced subspace, where also their distances is measured.
#' 
#' 
#' @usage factkm(X, K, Q, Rndstart, verbose, maxiter, tol, rot, prep, print)
#' 
#' @param X Units x variables numeric data matrix.
#' @param K Number of clusters for the units.
#' @param Q Number of principal components w.r.t. variables.
#' @param Rndstart Number of runs to be performed (Defaults is 20).
#' @param verbose Outputs basic summary statistics for each run (1 = enabled; 0 = disabled, default option).
#' @param maxiter Maximum number of iterations allowed (if convergence is not yet reached. Default is 100).
#' @param tol Tolerance threshold (maximum difference in the values of the objective function of two consecutive iterations such that convergence is assumed. Default is 1e-6).
#' @param rot performs varimax rotation of axes obtained via PCA. (=1 enabled; =0 disabled, default option)
#' @param prep Pre-processing of the data. 1 performs the z-score transform (default choice); 2 performs the min-max transform; 0 leaves the data un-pre-processed.
#' @param print Prints summary statistics of the results (1 = enabled; 0 = disabled, default option).
#' 
#' 
#' @return returns a list of estimates and some descriptive quantities of the final results.
#' @return \item{U}{Units x clusters membership matrix (binary and row-stochastic). Each row is a dummy variable indicating to which cluster each unit has been assigned.}
#' @return \item{A}{Variables x components loading matrix (orthonormal).}
#' @return \item{centers}{K x Q matrix of centers containing the row means expressed in the reduced space of Q principal components.}
#' @return \item{totss}{The total sum of squares.}
#' @return \item{withinss}{Vector of within-cluster sum of squares, one component per cluster.}
#' @return \item{betweenss}{amount of deviance captured by the model.}
#' @return \item{size}{Number of units assigned to each cluster.}
#' @return \item{pseudoF}{Calinski-Harabasz index of the resulting partition.}
#' @return \item{loop}{The index of the (best) run from which the results have been chosen.}
#' @return \item{it}{the number of iterations performed during the (best) run.}
#' 
#' @author Ionel Prunila, Maurizio Vichi
#' 
#' @references 
#' Vichi M., Kiers H.A.L. (2001) "Factorial k-means analysis for two-way data" <doi:10.1016/S0167-9473(00)00064-5>
#' 
#' Kaiser H.F. (1958) "The varimax criterion for analytic rotation in factor analysis" <doi:10.1007/BF02289233>
#' 
#' @examples
#' # Iris data 
#' # Loading the numeric variables of iris data
#' iris <- as.matrix(iris[,-5]) 
#' 
#' # factorial k-means with 3 unit-clusters and 2 components for the variables
#' out <- factkm(iris, K = 3, Q = 2, Rndstart = 15, verbose = 0, maxiter = 100, tol = 1e-7, rot = 1)
#' 
#' @export
NULL

redkm <- function(X, K, Q, Rndstart = 20L, verbose = 0L, maxiter = 100L, tol = 1e-6, rot = 0L, prep = 1L, print = 0L) {
    .Call(`_drclust_redkm`, X, K, Q, Rndstart, verbose, maxiter, tol, rot, prep, print)
}

factkm <- function(X, K, Q, Rndstart = 20L, verbose = 0L, maxiter = 100L, tol = 1e-6, rot = 0L, prep = 1L, print = 0L) {
    .Call(`_drclust_factkm`, X, K, Q, Rndstart, verbose, maxiter, tol, rot, prep, print)
}

Try the drclust package in your browser

Any scripts or data that you put into this service are public.

drclust documentation built on May 29, 2024, 3:51 a.m.