Nothing
#' K-Median Cluster Component Analysis
#'
#' K-Median Cluster Component Analysis, a distribution-free soft-clustering method
#' for preference rankings.
#'
#'
#' @param X A n by m data matrix containing preference rankings, in which there are n judges and m objects to be judged. Each row is a ranking of the objects which are represented by the columns.
#' @param k The number of cluster components
#' @param control a list of options that control details of the \code{cca} algorithm governed by the function
#' \code{ccacontrol}. The options govern maximum number of iterations of \code{cca} (itercca=1 is the default), the algorithm chosen to
#' compute the median ranking (default, "quick"), and other options related
#' to the consrank algorithm, which is called by \code{cca}
#' @param \dots arguments passed bypassing \code{ccacontrol}
#'
#' @details The user can use any algorithm implemented in the \code{consrank} function from the \pkg{ConsRank} package. All algorithms allow the user to set the option 'full=TRUE'
#' if the median ranking(s) must be searched in the restricted space of permutations instead of in the unconstrained universe of rankings of n items including all possible ties.
#' There are two classification uncertainty measures: Us and Uprods. "Us" is the geometric
#' mean of the membership probabilities of each individual, normalized in such a way that
#' in the case of maximum uncertainty Us=1. "Ucca" is the average of all the "Us".
#' "Uprods" is the product of the membership probabilities of each individual, normalized in such a way that
#' in the case of maximum uncertainty Uprods=1. "Uprodscca" is the average of all the "Uprods".
#'
#' @return An object of the class "cca". It contains:
#' \tabular{lll}{
#' pk\tab \tab the membership probability matrix\cr
#' clc\tab \tab cluster centers\cr
#' oclc \tab \tab cluster centers in terms of orderings\cr
#' idc\tab \tab crisp partition: id of the cluster component associated with the highest membership probability\cr
#' Hcca \tab \tab Global homogeneity measure (tau_X rank correlation coefficient) \cr
#' hk \tab \tab Homogeneity within cluster\cr
#' props \tab \tab estimated proportion of cases within cluster\cr
#' Us \tab \tab Uncertainty measure per-individual (see details)\cr
#' Ucca \tab \tab Global uncertainty measure\cr
#' Uprods \tab \tab Uncertainty measure per-individual (see details)\cr
#' Uprodscca \tab \tab Global uncertainty measure\cr
#' consrankout \tab \tab complete output of rank aggregation algorithm, containing eventually multiple median rankings
#'}
#'
#' @examples
#' \donttest{
#' data(Irish)
#' set.seed(135) #for reproducibility
#' # CCA with four components
#' ccares <- cca(Irish$rankings, 4, itercca=10)
#' summary(ccares)
#' }
#'
#'
#' @author Antonio D'Ambrosio \email{antdambr@unina.it}
#'
#' @references D'Ambrosio, A. and Heiser, W.J. (2019). A Distribution-free Soft Clustering Method for Preference Rankings. Behaviormetrika , vol. 46(2), pp. 333–351, DOI: 10.1007/s41237-018-0069-5
#' @references Heiser W.J., and D'Ambrosio A. (2013). Clustering and Prediction of Rankings within a Kemeny Distance Framework.
#' In Berthold, L., Van den Poel, D, Ultsch, A. (eds). Algorithms from and for Nature and Life.pp-19-31. Springer international. DOI: 10.1007/978-3-319-00035-0_2.
#' @references Ben-Israel, A., and Iyigun, C. (2008). Probabilistic d-clustering. Journal of Classification, 25(1), pp.5-26. DOI: 10.1007/s00357-008-9002-z
#'
#' @keywords Adjusted Concordance Index
#' @keywords Normalized Degree of Concordance
#' @keywords Soft clustering
#' @keywords Preference rankings
#'
#' @import ConsRank
#' @importFrom pracma repmat
#' @importFrom stats runif
#' @importFrom utils combn
#'
#' @seealso \code{ccacontrol}
#' @seealso \code{ranktree}
#'
#'
#' @export
cca <- function(X,k,control=ccacontrol(...), ...){
out <- list(pk=NULL,clc=NULL,oclc=NULL,idc=NULL,Hcca=NULL,
hk=NULL, props=NULL, Us=NULL, Ucca=NULL,
Uprods=NULL,Uprodscca=NULL, consrankout=NULL,
replications=NULL, settings=control)
if (control$itercca==1){
res <- ccacore(X,k,control)
out$pk <- res$pk
out$clc <- res$clc
out$oclc <- rank2order(res$clc,items=colnames(X))
out$idc <- res$idc
out$Hcca <- res$Hcca
out$hk <- res$hk
out$props <- res$props
out$Us <- res$Us
out$Ucca <- res$Ucca
out$Uprods <- res$Us2
out$Uprodscca <- res$Ucca2
out$consrankout <- res$Clusters
out$replications <- 1
}else{
iterout <- out
for (j in 1:control$itercca){
res <- ccacore(X,k,control)
iterout$pk[[j]] <- res$pk
iterout$clc[[j]] <- res$clc
iterout$oclc[[j]] <- NA
iterout$idc[[j]] <- res$idc
iterout$Hcca[[j]] <- res$Hcca
iterout$hk[[j]] <- res$hk
iterout$props[[j]] <- res$props
iterout$Us[[j]] <- res$Us
iterout$Ucca[[j]] <- res$Ucca
iterout$Uprods[[j]] <- res$Us2
iterout$Uprodscca[[j]] <- res$Ucca2
iterout$consrankout[[j]] <- res$Clusters
}#end for
Ucca <- unlist(iterout$Ucca)
idbest <- which.min(Ucca)
out$pk <- iterout$pk[[idbest]]
out$clc <- iterout$clc[[idbest]]
out$oclc <- rank2order(iterout$clc[[idbest]],items=colnames(X))
out$idc <- iterout$idc[[idbest]]
out$Hcca <- iterout$Hcca[[idbest]]
out$hk <- iterout$hk[[idbest]]
out$props <- iterout$props[[idbest]]
out$Us <- iterout$Us[[idbest]]
out$Ucca <- iterout$Ucca[[idbest]]
out$Uprods <- iterout$Uprods[[idbest]]
out$Uprodscca <- iterout$Uprodscca[[idbest]]
out$consrankout <- iterout$Clusters[[idbest]]
out$replications <- control$itercca
}#end if
class(out) <- c("cca","ConsRankClass")
print(out)
return(out)
}
#-------------------------------------------------------------------------
ccacore <- function(X,
k,
control) {
#K-Median cluster component analysis by D'Ambrosio & Heiser
r <- nrow(X)
co <- ncol(X)
R <- 10
L <- 0
Prold <- matrix(0, 1, r)
PXold <- matrix(0, k, r)
maxcuf <- (1 / k) ^ k
Cs <- t(replicate(k, order(runif(co))))
if (nrow(unique(Cs))!=k){ #cannot have same cluster center
repeat{
Cs <- t(replicate(k, order(runif(co))))
if((nrow(unique(Cs))==k)){break}
}
}
# #-----------------
# Cs <- t(apply(matrix(0, k, co), 1, function(x)
# sample(c))) #random cluster centers
#
# if (nrow(unique(Cs))!=k){
#
# repeat{
#
# Cs <- t(apply(matrix(0, k, co), 1, function(x)
# sample(co))) #random cluster centers
#
# if((nrow(unique(Cs))==k)){break}
#
# }
#
# }
# #-----------------
Accs1 <- toler <- 1e-10
TTTT <- 10
while (TTTT > toler) {
L <- L + 1
d <- kemenyd(X, Cs)
dd <- d
PK <- combprobs(dd, k) #equation 5, behaviormetrika paper
Uk <- PK ^ 2
#ddd <- d
#ddd[ddd == 0] <- 1
# forint <- sqrt(PK ^ 2 * ddd)
# intensity <- matrix(colSums(forint) / sum(forint), ncol = 1) #cluster intensity
intensity <- matrix(colMeans(PK), ncol=1)
Uk <- Uk / repmat(matrix(colSums(Uk), nrow = 1), dim(d)[1], 1) #normalize intensity weight
Clusters <- vector(mode = "list")
for (c in 1:k) {
Clusters[[c]] <- consrank(
X, #all data
wk = Uk[, c], #weigths
algorithm = control$algorithm,
ps = control$ps,
proc=control$proc,
full=control$full,
itermax=control$consrankitermax,
np=control$np,
gl=control$gl,
ff=control$ff,
cr=control$cr
)
}
# centers <- list.rbind(sapply(Clusters, rbind)[1, ])
# itaux <- list.rbind(sapply(Clusters, rbind)[2, ]) #h_k, eq. 6 behaviormetrika
centers <- matrix(0,k,co)
itaux <- matrix(0,k,ncol=1)
for (h in 1:k){
centers[h,] <- Clusters[[h]]$Consensus[1,]
itaux[h,1] <- Clusters[[h]]$Tau[1]
}
Globtau <- crossprod(itaux, intensity) #H_cca, eq. 7 behaviormetrica
# Accsiter <- colMeans(d * PK)[1]
Accsiter <- colMeans(d * (PK^2))[1] #loss function, eq. 2
TTTT <- abs(Accsiter - Accs1)
Accs1 <- Accsiter
# print(Accs1)
# flush.console()
indexC <- apply(PK, 1, which.max)
Cs <- centers #messo oggi, 19/4/2021
} #end while
unc1 <- (apply(PK, 1, prod)) / maxcuf #first uncertainty measure:
# jdf <- apply(dd, 1, prod) / PrK$den
#
# unc2 <- k*jdf / (apply(dd, 1, prod)^(1/k))
#
# unc2[which(is.na(unc2))] <- 0 #eq. 8 long format
unc2 <- k*( (apply(PK, 1, prod)) )^(1/k) #eq. 8 behaviormetrika
#geometric mean for each individual, normalized to be equal to one
#in case of maximum uncertailnty
return(
list(
pk = PK, #membership
clc = centers, #cluster centers
idc = indexC, #id of observations in the clusters
Hcca = Globtau, #Global homogeneity
hk = itaux, #Homogeneity within clusters
props = intensity, #estimated proportions within cluster
Us = unc2, #normalized classifiability (CUF) for each id
Ucca = mean(unc2), #Global normalized classifiability (CUF)
Us2 = unc1, #normalized classifiability (only prod of probs)
Ucca2 = mean(unc1), #Global normalized classifiability
loss = Accsiter, #loss function
Iters = L, #iterations
Clusters = Clusters #info about the k output of consrank
)
)
}
#--------------------------------------
combprobs <- function(D,k){
if (k==2){
denom <- rowSums(D)
Numeratore <- cbind(D[,2],D[,1])
} else {
combinazioni <- t(combn(k,(k-1)))
productories <- matrix(0,dim(D)[1],dim(combinazioni)[1])
for (j in 1:k){productories[,j] <- apply(D[,combinazioni[j,]],1,prod)}
denom<-rowSums(productories)
indk<-apply(combinazioni,1,function(x) setdiff(seq(1:k),x))
Numeratore<-matrix(0,dim(D)[1],k)
for (j in 1:k){
Numeratore[,indk[j]]<-productories[,j]
}
#Numeratore<-productories[,indk]
}
PK<-Numeratore/repmat(matrix(denom,ncol=1),1,k)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.