Nothing
#' #' Association Matrix
#' #'
#' #' This function creates an association matrix for a clustering/partition.
#' #' The (i,j) element of the matrix is 1 if item i and j are in the same
#' #' cluster/subset and 0 otherwise.
#' #'
#' #' @param cl A vector containing cluster labels for a clustering/partition.
#' #' @return A matrix of 0s and 1s indicating whether items i and j are in
#' #' the same cluster/subset.
#' #' @examples
#' #'
#' #' cl <- rep(1:3,times=c(2,4,3))
#' #' association.matrix(cl)
#' #'
#' #' @export association.matrix
#' association.matrix <- function(cl){
#' n <- length(cl)
#' cl1 <- rep(cl,n)
#' cl2 <- rep(cl,each=n)
#' matrix(cl1==cl2, ncol=n)*1
#' }
#'
#'
#' #' Variance Ratio
#' #'
#' #' This function calculates the variance of the expected pairwise allocation
#' #' matrix (EPAM) within clusters/subsets over the total variance of the expected
#' #' pairwise allocation matrix.
#' #'
#' #' The \code{\link{variance.ratio}} function takes as input an object of class
#' #' \code{salso.confidence} and calculates the variance ratio for the estimated
#' #' partition from the corresponding expected pairwise allocation matrix (EPAM).
#' #'
#' #' The variance ratio is the weighted average of the within cluster variances of
#' #' the EPAM, weighted by the number of pairwise EPAM values per cluster, over
#' #' the total variance of the EPAM.
#' #'
#' #' @param x,y If \code{y} is not specified then \code{x} must be an object of
#' #' class \code{salso.confidence}. Otherwise, \code{x} is a vector of cluster
#' #' labels and \code{y} is an expected pairwise allocation matrix.
#' #' @return A vector of variance ratios.
#' #' @examples
#' #' x <- rep(c(1,2,3), times=c(2,3,5))
#' #' y <- diag(10)
#' #' y[upper.tri(y)] <- runif(45)
#' #' variance.ratio(x,y)
#' #'
#' #' @family Default Mass Selection
#' #' @export
#' variance.ratio <- function(x,y) {
#' if (inherits(x,"salso.confidence")) {
#' cl.am <- association.matrix(x$estimate)
#' y <- x$psm
#' x <- x$estimate
#' } else {
#' cl.am <- association.matrix(x)
#' }
#' cl.vars <- sapply(unique(x), function(i) {
#' if (sum(x == i) < 3) return(c(0,1))
#' cl.am[x != i, x != i] <- 0
#' z <- y[upper.tri(y) & cl.am]
#' c(sum((mean(z) - z)^2)/(length(z)-1),length(z))
#' })
#'
#' cl.var <- sum(apply(cl.vars,2,prod)/sum(cl.vars[2,]))
#' z <- y[upper.tri(y)]
#' tot.var <- sum((mean(z) - z)^2)/(length(z) - 1)
#'
#' cl.var/tot.var
#' }
#'
#' #' Partition Confidence
#' #'
#' #' This function calculates the partition confidence of a partition estimate
#' #' from the corresponding expected pairwise allocation matrix (EPAM).
#' #'
#' #' The \code{\link{partition.confidence}} takes as input an object of class
#' #' \code{salso.confidence} and then calculates the partition confidence from the
#' #' expected pairwise allocation matrix.
#' #'
#' #' The partition confidence is the average values of the EPAM for items that are
#' #' clustered together. Items which are in their own subset do not contribute to
#' #' partition confidence.
#' #'
#' #' @inheritParams variance.ratio
#' #' @return A vector of partition confidences.
#' #'
#' #' @examples
#' #' x <- rep(c(1,2,3), times=c(2,3,5))
#' #' y <- diag(10)
#' #' y[upper.tri(y)] <- runif(45)
#' #' partition.confidence(x,y)
#' #'
#' #' @family Default Mass Selection
#' #' @export
#' partition.confidence <- function(x,y){
#' if (inherits(x,"salso.confidence")) {
#' if (length(unique(x$estimate)) == length(x$estimate)) return(0)
#' cl.am <- association.matrix(x$estimate)
#' y <- x$psm
#' } else {
#' cl.am <- association.matrix(x)
#' }
#' mean(y[upper.tri(y) & cl.am])
#' }
#'
#'
#' #' Mass Selection Algorithm
#' #'
#' #' This function selects the optimal mass value for Cluster Analysis via Random
#' #' Partition distributions using the Ewens-Pitman attraction distribution.
#' #'
#' #' The \code{\link{mass.algorithm}} function is used internally in the
#' #' \code{\link{default.mass}} function.
#' #'
#' #' The default value for \code{w} is \code{c(1,1,1)}.
#' #'
#' #' The general algorithm is as follows: \enumerate{ \item Rank the partition
#' #' confidence (\code{pc}) and variance ratio (\code{vr}). Select the
#' #' \code{mass_i} value which minimizes the weigthed sum of \eqn{w_1 pc_i + w_2
#' #' vr_i + w_3 n_i}. }
#' #'
#' #' The two stage algorithm proceeds as follows: \enumerate{ \item Rank the
#' #' partition confidence (\code{pc}) and variance ratio (\code{vr}). For each
#' #' number of clusters \code{n} select the index which minimizes the weigthed sum
#' #' of \eqn{w_1 pc_i + w_2 vr_i}. \item Rerank the \code{pc} and \code{vr} of the
#' #' selected indices and select the \code{mass_i} value which minimizes the
#' #' weigthed sum of \eqn{w_1 pc_i + w_2 vr_i + w_3 n_i} from among the selected
#' #' indices. }
#' #'
#' #'
#' #' @param mass a vector of mass values
#' #' @param pc a vector of partition confidences for the partition estimates at
#' #' the corresponding mass values
#' #' @param vr a vector of variance ratios for the partition estimates at the
#' #' corresponding mass values
#' #' @param n a vector of the number of subsets in the partition estimates at the
#' #' correpsonding mass values
#' #' @param w a vector of length 3 specifying the weights of \code{pc}, \code{vr},
#' #' and \code{n}
#' #' @param two.stage logical; if \code{TRUE}, the two stage algorithm is
#' #' implemented
#' #'
#' #' @return A matrix containing the `best' \code{mass} value and corresponding
#' #' values for \code{pc}, \code{vr}, and \code{n}. The matrix also contains the
#' #' mass values for the partitions estimate with more one more and one less
#' #' subset that the selected mass value.
#' #'
#' #' @family Default Mass Selection
#' #' @importFrom graphics abline lines plot
#' #' @export
#' #'
#' mass.algorithm <- function(mass,pc,vr,n,w=c(1,1,1),two.stage=TRUE) {
#' if (length(w) != 3) stop("Weights must be assigned to: PC VR N")
#'
#' compute.rankings <- function(subset=1:length(pc)) {
#' rankings <- rank(1-pc[subset])*w[1] + rank(vr[subset])*w[2] + n[subset]*w[3]
#' if (length(rankings) > 3 && FALSE) {
#' best <- which.min(sapply(1:(length(rankings)-2), function(i) sum(rankings[i:(i+2)]))) + 1
#' } else {
#' best <- which.min(rankings)
#' }
#'
#' c(n=n[subset][best], mass = mass[subset][best], pc=pc[subset][best], vr=vr[subset][best])
#' }
#'
#' if (two.stage) {
#' all <- sapply(unique(n[n>1]), function(i) compute.rankings(n == i))
#' rankings <- rank(1-all["pc",])*w[1] + rank(all["vr",])*w[2] + all["n",]*w[3]
#' index <- which.min(rankings)
#' if (index >= ncol(all)) {
#' out <- t(all[,c(best=index, index-1)])
#' } else {
#' out <- t(all[,c(best=index, index-1, index+1)])
#' }
#' } else {
#' best <- compute.rankings(n>1)
#' index <- which(unique(n[n>1]) == best["n"])
#' others <- sapply(unique(n[n>1])[index + c(-1,1)], function(i) compute.rankings(n == i))
#'
#' out <- rbind(best,t(others))
#' }
#'
#' plot(mass,pc,type="l",ylim=c(0,1),col="dodgerblue", ylab="")
#' lines(vr[n>1]~mass[n>1],col="forestgreen")
#' abline(v=out[1,"mass"])
#' for (i in 2:nrow(out)) {
#' abline(v=out[i,"mass"], lty=2)
#' }
#'
#' out
#' }
#'
#' #' Default Mass Selection
#' #'
#' #' This function selects an optimal mass value for Cluster Analysis via Random
#' #' Partition Distribtuions, using the Ewens-Pitman Attraction distribution.
#' #'
#' #' The function draws \code{n.draws} partitions at each specified mass value. If
#' #' a vector of mass values is not given, then the default of
#' #' \code{seq(0.1,10,0.2)} is used for loss
#' #' \code{"VI.lb"} and \code{seq(0.1,5,0.05)} used for
#' #' the other loss functions.
#' #'
#' #' If a list of expected pairwise allocation matrices (EPAM) is provided,
#' #' additional draws at matching mass values are added to the corresponding
#' #' matrix. Additionally, no new draws are needed for estimation, if a list of
#' #' EPAMs is provided.
#' #'
#' #' A partition/clustering estimate from each EPAM is obtained using the SALSO
#' #' method in \code{\link[salso]{salso}}. The estimate given minimizes the
#' #' specified \code{loss} function with respect to the EPAM.
#' #'
#' #' The function then uses the \code{\link{mass.algorithm}} to select the optimal
#' #' mass value for clustering estimation.
#' #'
#' #'
#' #' @param mass optional, a vector of mass values.
#' #' @param list.epam optional, a list of expected pairwise allocation matrices.
#' #' Each matrix in the list needs the attributes "\code{mass}" and
#' #' "\code{n.draws}".
#' #' @param dis a dissimilarity structure of class \code{dist}.
#' #' @param new.draws logical; if \code{TRUE} then new draws are obtained at each
#' #' mass value.
#' #' @param w a vector of length 3 of the weights to be used in the
#' #' \code{\link{mass.algorithm}}.
#' #' @param discount parameter of the Ewens-Pitman Attraction distribution.
#' #' @param temp temperature parameter of the Ewens-Pitman Attraction
#' #' distribution.
#' #' @param loss One of \code{"binder"} or \code{"VI.lb"} to indicate
#' #' the optimization should seek to minimize the expectation of the
#' #' Binder loss (Binder 1978) or the lower bound of the expectation of the variation of
#' #' information loss (Wade & Ghahramani 2017), respectively.
#' #' @param n.draws number of draws of partitions to be obtained at each mass
#' #' value.
#' #' @param two.stage logical; if \code{TRUE}, the two stage algorithm is
#' #' implemented in \code{\link{mass.algorithm}}.
#' #' @param parallel logical; if \code{TRUE} computations will take advantage
#' #' multiple CPU cores.
#' #' @param x An object from the \code{\link{default.mass}} function.
#' #' @param ... currently ignored
#' #'
#' #' @return An object of class \code{shallot.default.mass}. This object is a list
#' #' containing a matrix of `best' possible mass values to maximize partition
#' #' confidence and minimize the variance ratio, the clustering estimate, the
#' #' expected pairwise allocation matrix, parameters used for optimization and
#' #' the EPA distribution, and the list of expected pairwise allocation matrices
#' #' for each mass value.
#' #'
#' #' @family Default Mass Selection
#' #' @importFrom salso salso
#' #' @export
#' #'
#' default.mass <- function(mass, list.epam, dis, new.draws=TRUE, w=c(1,1,1), discount=0, temp=10, loss="binder", n.draws=100L, two.stage=TRUE, parallel=TRUE) {
#' if (missing(mass)) {
#' if (loss == "VI.lb" ) {
#' mass <- seq(0.1,10,0.2)
#' } else {
#' mass <- seq(0.1,5,by=0.05)
#' }
#' }
#'
#' single.epam <- function(mass,discount,dis,n.draws,temp=10,parallel=parallel) {
#' n <- nrow(as.matrix(dis))
#' epa <- ewens.pitman.attraction(mass(mass), discount(discount), attraction(permutation(n.items=n, fixed=FALSE), decay.exponential(temperature(temp), dis)))
#' draws <- sample.partitions(epa, n.draws, parallel = parallel)
#' as.matrix(pairwise.probabilities(draws))
#' }
#'
#' if(missing(list.epam) && new.draws == FALSE) stop("must provide list.epam or obtain new draws")
#' if (missing(list.epam) || new.draws == TRUE) {
#' out <- lapply(mass, single.epam, n.draws=n.draws, discount=discount, dis=dis, temp=temp, parallel=parallel)
#' sapply(1:length(out), function(i) {
#' attr(out[[i]],"mass") <<- mass[i]
#' attr(out[[i]],"n.draws") <<- n.draws
#' attr(out[[i]],"discount") <<- discount
#' attr(out[[i]],"temperature") <<- temp
#' })
#' if (!missing(list.epam)) {
#' mass0 <- sapply(list.epam,attr,"mass")
#' combine <- which(mass0 %in% mass)
#' add <- which(!(mass %in% mass0))
#' if (length(combine) > 0) {
#' sapply(combine, function(i) {
#' n.draws0 <- attr(list.epam[[i]],"n.draws")
#' which.out <- which(mass0[i] == mass)
#' list.epam[[i]] <<- (list.epam[[i]]*n.draws0 + out[[which.out]]*n.draws)/(n.draws + n.draws0)
#' attr(list.epam[[i]],"n.draws") <<- n.draws0 + n.draws
#' })
#' }
#' if (length(add) > 0) {
#' list.epam <- c(list.epam, out[add])
#' }
#' list.epam <- list.epam[order(sapply(list.epam,attr,"mass"))]
#' } else {
#' list.epam <- out
#' }
#' }
#'
#' out.cl <- lapply(list.epam, function(x) {
#' attr(x, "mass") <- NULL
#' attr(x, "discount") <- NULL
#' attr(x, "n.draws") <- NULL
#' attr(x, "temperature") <- NULL
#' salso(x, loss=loss, parallel=parallel)$estimate
#' })
#'
#' mass <- sapply(list.epam, attr, "mass")
#' weight <- sapply(list.epam, attr, "n.draws")
#' pc <- mapply(partition.confidence, out.cl, list.epam)
#' vr <- mapply(variance.ratio, out.cl, list.epam)
#' ncl <- sapply(out.cl,function(X) length(unique(X)))
#'
#' best.mass <- mass.algorithm(mass,pc,vr,ncl,w,two.stage)
#' best.index <- which(mass == best.mass[1,"mass"])
#'
#' output <- list(criteria=best.mass,
#' clustering = out.cl[[best.index]],
#' expectedPairwiseAllocationMatrix = list.epam[[best.index]],
#' params = list(discount = discount, temperature = temp, loss = loss),
#' algorithm = data.frame(mass=mass, pc=pc, vr=vr, nClusters=ncl, nDraws=weight),
#' list.epam = list.epam)
#' class(output) <- "shallot.default.mass"
#' output
#' }
#'
#' #' @rdname default.mass
#' #' @export
#' #'
#' print.shallot.default.mass <- function(x, ...) {
#' cat("Clustering estimated using loss: ",x$params$loss,", with discount ",
#' x$params$discount," and temperature ", x$params$temperature,"\n\n",sep="")
#' cat("The selected mass value is", x$criteria[1,"mass"],"\n")
#' cat("number of clusters: ",x$criteria[1,"n"],"\npartition confidence: ",x$criteria[1,"pc"],"\nvariance ratio: ", x$criteria[1,"vr"], "\n", sep="")
#'
#' cat("\nestimated clustering:\n", x$clustering)
#' cat("\n")
#' }
#'
#'
#'
#'
#'
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.