Nothing
#' Append a Cluster Membership Variable to a Dataframe
#'
#' @name assignCluster
#'
#' @details
#' Correctly creates a cluster membership variable that can be attached to a dataframe when only a subset of the observations in that dataframe were used to create the clustering solution.
#' NAs are assigned to the observations of the original dataframe not used in creating the clustering solution.
#'
#' This code originally by Dan Putler, used with permission.
#'
#' @author Dan Putler
#'
#' @param clusterData The data matrix used in the clustering solution. The data matrix may have have only a subset of the observations contained in the original dataframe.
#' @param origData The original dataframe from which the data used in the clustering solution were taken.
#' @param clusterVec An integer variable containing the cluster membership assignments for the observations used in creating the clustering solution.
#' This vector can be created using \code{cutree} for clustering solutions generated by \code{hclust} or the \code{cluster} component of a list object created by \code{kmeans} or \code{KMeans}.
#'
#' @return A factor (with integer labels) that indicate the cluster assignment for each observation, with an NA value given to observations not used in the clustering solution.
#'
#' @seealso \code{\link[stats]{hclust}}, \code{\link[stats]{cutree}}, \code{\link[stats]{kmeans}}, \code{\link{KMeans}}.
#'
#' @examples
#' ## Load USArrests data set
#' data(USArrests)
#'
#' ## Create three cluster
#' USArrkm3 <- KMeans(USArrests[USArrests$UrbanPop<66, ], centers=3)
#'
#' ## Create a variable with cluster assignment
#' assignCluster(USArrests[USArrests$UrbanPop<66, ], USArrests, USArrkm3$cluster)
#'
#' @export
assignCluster <- function(clusterData, origData, clusterVec){
rowsDX <- row.names(clusterData)
rowsX <- row.names(origData)
clustAssign <- rep(NA, length(rowsX))
validData <- rowsX %in% rowsDX
clustAssign[validData] <- clusterVec
return(as.factor(clustAssign))
}
#' K-Means Clustering Using Multiple Random Seeds
#'
#' @name KMeans
#'
#' @keywords misc
#'
#' @details
#' Finds a number of k-means clusting solutions using R's \code{kmeans} function, and selects as the final solution the one that has the minimum total within-cluster sum of squared distances.
#' @param x A numeric matrix of data, or an object that can be coerced to such a matrix (such as a numeric vector or a dataframe with all numeric columns).
#' @param centers The number of clusters in the solution.
#' @param iter.max The maximum number of iterations allowed.
#' @param num.seeds The number of different starting random seeds to use. Each random seed results in a different k-means solution.
#'
#' @return
#' A list with components:
#' \describe{
#' \item{cluster}{A vector of integers indicating the cluster to which each point is allocated.}
#' \item{centers}{A matrix of cluster centres (centroids).}
#' \item{withinss}{The within-cluster sum of squares for each cluster.}
#' \item{tot.withinss}{The within-cluster sum of squares summed across clusters.}
#' \item{betweenss}{The between-cluster sum of squared distances.}
#' \item{size}{The number of points in each cluster.}
#' }
#'
#' @author Dan Putler
#'
#' @seealso \code{\link[stats]{kmeans}}
#'
#' @examples
#' data(USArrests)
#' KMeans(USArrests, centers=3, iter.max=5, num.seeds=5)
#'
#' @export
KMeans <- function (x, centers, iter.max=10, num.seeds=10) {
## fixed 15 Mar 05 by J. Fox
if(mode(x)=="numeric") x<-data.frame(new.x=x)
KM <- kmeans(x=x, centers=centers, iter.max=iter.max)
for(i in 2:num.seeds) {
newKM <- kmeans(x=x, centers=centers, iter.max=iter.max)
if(sum(newKM$withinss) < sum(KM$withinss)) {
KM <- newKM
}
}
KM$tot.withinss <- sum(KM$withinss)
xmean <- apply(x, 2, mean)
centers <- rbind(KM$centers, xmean)
bss1 <- as.matrix(dist(centers)^2)
KM$betweenss <- sum(as.vector(bss1[nrow(bss1),])*c(KM$size,0))
return(KM)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.