Nothing
###############################
## Functions for predkmeans ##
###############################
##
## This file contains:
##
## predkmeans()
## print.predkmeans()
## summary.predkmeans()
## print.summary.predkmeans()
## relevel.predkmeans()
## getExpMahal()
## getH()
## getMu()
## getSigma2()
##
##' @name predkmeans
##
##' @title Predictive K-means Clustering
##
##' @description Uses a Mixture-of-experts algorithm to find
##' cluster centers that are influenced by prediction covariates.
##
## Inputs:
##' @param X An \code{n} by \code{p} matrix or data frame of data to be clustered.
##' @param R Covariates used for clustering. Required unless doing k-means
##' clustering (i.e. \code{sigma2=0} and \code{sigma2fixed=TRUE}).
##' @param K Number of clusters
##' @param mu starting values for cluster centers. If NULL (default),
##' then value is chosen according to \code{muStart}.
##' @param muStart Character string indicating how initial value
##' of mu should be selected. Only used if mu=NULL. Possible
##' values are \code{"random"} or \code{"kmeans"} (default).
##' @param sigma2 starting value of sigma2. If set to \code{0} and
##' \code{sigma2fixed=TRUE}, the standard k-means is done
##' instead of predictive k-means.
##' @param sigma2fixed Logical indicating whether sigma2
##' should be held fixed. If FALSE, then
##' sigma2 is estimated using Maximum Likelihood.
##' @param maxitEM Maximum number of EM iterations for
##' finding the Mixture of Experts solution. If doing regular
##' k-means, this is passed as \code{iter.max}.
##' @param tol convergence criterion
##' @param maxitMlogit Maximum number of iterations in the
##' mlogit optimization (nested within EM algorithm)
##' @param muRestart Gives max number of attempts at picking
##' starting values. Only used when muStart='random'.
##' If selected starting values for mu are constant
##' within each cluster, then the starting values
##' are re-selected up to muRestart times.
##' @param convEM controls the measure of convergence for the
##' EM algorithm. Should be one of "mu", "gamma", or
##' "both". Defaults to "both." The EM algorithm
##' stops when the Frobenius norm of the change in mu,
##' the change in gamma, or the change in mu and the change in gamma
##' is less than 'tol'.
##' @param verbose numeric vector indicating how much output to produce
##' @param nStarts number of times to perform EM algorithm
##' @param returnAll A list containing all \code{nStarts} solutions is
##' included in the output.
##' @param ... Additional arguments passed to \code{\link{mlogit}}
##
##' @details
##' A thorough description of this method is provided in Keller et al. (2017).
##' The algorithm for sovling the mixture of Experts model is
##' based upon the approach presented by Jordan and Jacobs (1994).
##'
##' If \code{sigma2} is 0 and \code{sigm2fixed} is TRUE, then standard k-means clustering (using \code{\link{kmeans}}) is done instead.
##'
##
##' @return An object of class \code{predkmeans}, containing the following elements:
##' \item{res.best}{A list containing the results from the best-fitting solution to the Mixture of Experts problem:
##' \describe{
##' \item{mu}{Maximum-likelihood estimate of intercepts from normal mixture model. These are the cluster centers.}
##' \item{gamma}{Maximum-likelihood estimates of the mixture coefficeints.}
##' \item{sigma2}{If \code{sigma2fixed=FALSE}, the maximum likelihood estimate of \code{sigma2}}
##' \item{conv}{Indicator of covergence.}
##' \item{objective}{Value of the log-likelihood.}
##' \item{iter}{Number of iterations.}
##' \item{mfit}{A subset of output from \code{\link{mlogit}}.}
##' }}
##' \item{center}{Matrix of cluster centers}
##' \item{cluster}{Vector of cluster labels assigned to observations}
##' \item{K}{Number of clusters}
##' \item{sigma2}{Final value of sigma^2.}
##' \item{wSS}{Mean within-cluster sum-of-squares}
##' \item{sigma2fixed}{Logical indicator of whether sigma2 was held fixed}
##list(mu=mu, gamma=gamma, sigma2=sigma2, conv=converged, objective= obj, iter=iter, mfit=mfit[c("beta", "fitted01", "fitted", "res.best", "status")])
##
##
##
##' @seealso \code{\link{predictML.predkmeans}, \link{predkmeansCVest}}
##' @references Keller, J.P., Drton, M., Larson, T., Kaufman, J.D., Sandler, D.P., and Szpiro, A.A. (2017). Covariate-adaptive clustering of exposures for air pollution epidemiology cohorts. \emph{Annals of Applied Statistics}, 11(1):93--113.
##' @references Jordan M. and Jacobs R. (1994). Hierarchical mixtures of
##' experts and the EM algorithm. \emph{Neural computation 6}(2),
##' 181-214.
##
##' @importFrom stats kmeans model.matrix
##' @author Joshua Keller
##' @export
##
##' @examples
##' n <- 200
##' r1 <- rnorm(n)
##' r2 <- rnorm(n)
##' u1 <- rbinom(n, size=1,prob=0)
##' cluster <- ifelse(r1<0, ifelse(u1, "A", "B"), ifelse(r2<0, "C", "D"))
##' mu1 <- c(A=2, B=2, C=-2, D=-2)
##' mu2 <- c(A=1, B=-1, C=-1, D=-1)
##' x1 <- rnorm(n, mu1[cluster], 4)
##' x2 <- rnorm(n, mu2[cluster], 4)
##' R <- model.matrix(~r1 + r2)
##' X <- cbind(x1, x2)
##' pkm <- predkmeans(X=cbind(x1, x2), R=R, K=4)
##' summary(pkm)
predkmeans <- function(X, R, K, mu=NULL, muStart=c("kmeans","random"), sigma2=0, sigma2fixed=FALSE,maxitEM=100, tol=1e-5, convEM=c("both", "mu", "gamma"), nStarts=1, maxitMlogit=500,verbose=0, muRestart=1000, returnAll=FALSE, ...) {
if(is.null(K)){
stop("K not provided. Please provide.")
}
# Check that arguments are correct
muStart <- match.arg(muStart)
convEM <- match.arg(convEM)
n <- nrow(X)
if (n<K){
stop("Cannot select more clusters than observations")
}
# If sigma2 is 0 and fixed, then return kmeans result
if (sigma2==0 && sigma2fixed){
res.best <- kmeans(x=X, centers=K, nstart=nStarts, iter.max=maxitEM)
centers <- res.best$centers
tr.assign <- res.best$cluster
wSS <- res.best$tot.withinss
out <- list(res.best= res.best, centers=centers, cluster=tr.assign, K=K, sigma2=0, wSS=wSS, sigma2fixed= sigma2fixed)
class(out) <- "predkmeans"
return(out)
}
# Check dimensions, wait until now, in case doing k-means
# and R not needed
if (n!=nrow(R)){
stop("Number of outcome observations does not match number of covariate observations.")
}
R <- as.matrix(R)
d <- ncol(X) # Dimension of outcome
if (is.null(colnames(X))) colnames(X) <- paste0("X", 1:d)
p <- ncol(R)
mu_orig <- mu # Store original value of mu
results <- vector("list", nStarts)
for (i in 1:nStarts){
if (verbose>0) cat("Fit", i, "of", nStarts, "\n")
# Initialize mu
mu <- mu_orig
if (is.null(mu)){
if (muStart=="random"){
all_same <- TRUE
all_same_iter <- 1
while(all_same){
all_same_iter <- all_same_iter + 1
if (all_same_iter > muRestart){
stop(paste0("Picked ", muRestart, " random sets of starting centers and all were equal. Check whether data is identical. If necessary, change seed or increase 'all_same_iter_limit'."))
}
mu <- X[sample(1:nrow(X), size=K), ]
all_same <- all(apply(mu, 2, function(x) diff(range(x))<.Machine$double.eps ^ 0.5))
}
} else if (muStart=="kmeans"){
mu <- stats::kmeans(X, centers=K)$centers
}
rownames(mu) <- 1:K
} # if (is.mull(mu))
# Compute initial values for h (Step E-1)
h0 <- assignCluster(X, centers=mu)
h0 <- stats::model.matrix(~0 + factor(h0, levels=1:K))
#colnames(h0) <- paste0("C", 1:K)
colnames(h0) <- 1:K
# Compute mu, gamma, and sigma2 (Step M-1)
mu <- getMu(X, h0)
if(!sigma2fixed){
sigma2 <- getSigma2(X, mu, h0)
}
mfit0 <- mlogit(h0, R, add.intercept=F, verbose=verbose>3, suppressFittedWarning=TRUE, iterlim=maxitMlogit)
gamma <- mfit0$beta # cbind(C1=0, mfit0$beta)
converged <- 0
iter <- 0
convCheck <- .Machine$integer.max
if (verbose>2) cat("Mixture of Experts code running with\n", K, " experts each finding a mean in ", d, " variables.\nThe gating networks each have ", p, " parameters\n", sep="")
# EM algorithm
while (!converged){
iter <- iter + 1
if (iter> maxitEM) {
if (verbose>1) cat("Stopping: Reached maximum iterations\n")
converged <- 9
break
}
# E-step
h <- getH(X, R, gamma, mu, sigma2=sigma2)
# M-step, Part 1a: mu
mu_new <- getMu(X, h)
# M-step, Part 1b: sigma2
if(!sigma2fixed){
sigma2 <- getSigma2(X, mu_new, h)
}
# M-step, Part 2: gamma
mfit <- mlogit(Y=h, X=R, beta=as.vector(gamma[,-1]), add.intercept=F, iterlim= maxitMlogit, verbose=verbose>3, checkY=FALSE, ...)
# Quesiton: Should I stop freeze the parameters for a certain cluster, if
# I have achieved all 0/1's?
if (!mfit$res.best$conv) {
if (verbose>1) message("EM-algorithm: mlogit optimization error in updating gamma.")
converged <- 8
break
} else {
gamma_new <- mfit$beta # cbind(0, mfit$beta)
}
# Check for convergence
convCheck <- switch(convEM, mu=norm(mu_new - mu, type="F") <tol,
gamma= norm(gamma_new - gamma, type="F") <tol,
both=norm(mu_new - mu, type="F") <tol && norm(gamma_new - gamma, type="F") < tol)
# Force at least three iterations?
if(convCheck && iter>2) {
converged <- 1
convCheck <- norm(mu_new - mu, type="F")
}
gamma <- gamma_new
mu <- mu_new
} ## while (!converged)
# Compute the objective function value
#Uproxy <- exp(R %*% gamma)/rowSums(exp(R %*% gamma))
Uproxy <- R %*% gamma
Uproxy <- apply(Uproxy, 2, function(x) 1/rowSums(exp(Uproxy-x)))
# Objective is the log likelihood
obj <- 0
for (kk in 1:nrow(X)){
q <- sweep(mu, 2, X[kk,])
obj <- obj + log(sum(sigma2^(-d/2)*exp(-1/(2*sigma2)*rowSums(q*q)) * Uproxy[kk,]))
}
results[[i]] <- list(mu=mu, gamma=gamma, sigma2=sigma2, conv=converged, objective= obj, iter=iter, h=h, mfit=mfit[c("beta", "fitted01", "fitted", "res.best", "status")])
if (verbose>1) {
cat("EM Algorithm completed\n")
cat("Convergence code:", converged, "\n")
cat("Number of EM iterations:", iter, "\n")
cat("Norm of last mu update:", convCheck, "\n")
cat("Value of objective:", obj, "\n")
cat("Sigma^2:", sigma2, "\n\n")
}
}
best.fit <- which.max(lapply(results, function(x) x$objective))
# if(!results[[best.fit]]$mfit$status$conv){
# warning("Best Fit has mlogit optimization error.")
# }
res.best <- results[[best.fit]]
centers <- res.best $mu
tr.assign <- assignCluster(X, centers= centers)
wSS <- sum((X - centers[tr.assign,])^2)
out <- list(res.best= res.best, centers=centers, cluster=tr.assign, K=K, sigma2= res.best$sigma2, wSS=wSS, sigma2fixed= sigma2fixed)
if (returnAll){
out <- c(out, res.all=list(results), best.fit=best.fit)
}
class(out) <- "predkmeans"
out
}## predkmeans()
###############################
## S3Methods for predkmeans ##
###############################
##' @export
print.predkmeans <- function(x, ...){
if(!inherits(x,"predkmeans")){
stop("x must be of (or inherit) class 'predkmeans'.")
}
cat("Predictive k-means object with\n" )
cat(" ", x$K, "Clusters\n")
cat(" ", ncol(x$centers), "Variables\n")
# cat("Convergence status: ", x$res.best$conv)
invisible(x)
}##print.predkmeans()
##' @export
summary.predkmeans <- function(object, ...){
class(object) <- "summary.predkmeans"
object
}##summary.predkmeans()
##' @export
print.summary.predkmeans <- function(x, ...){
cat("Predictive k-means object with\n" )
cat(" ", x$K, "Clusters\n")
cat(" ", ncol(x$centers), "Variables\n")
cat("Convergence status: ", ifelse(inherits(x$res.best, "kmeans"), 1, x$res.best$conv), "\n")
cat("Sigma^2 = ", round(x$sigma2, 2), " (Fixed = ", x$sigma2fixed, ")\n", sep="")
cat("Within-cluster Sum-of-Squares (wSS) = ", round(x$wSS, 2), "\n")
cat("Cluster centers are:\n")
print(x$centers)
}
##' @name relevel.predkmeans
##' @title Re-order cluster labels
##' @description Function for re-ordering the order of clusters in a predkmeans object.
##
##' @param x object of class \code{predkmeans}
##' @param ref New reference group ("Cluster 1"). Only used if \code{order} is NULL.
##' @param order New order of clusters.
##' @param ... Ignored additional arguments.
##
##' @details The elements of the \code{order} argument should refer
##' to the current position of clusters, with the position
##' giving the new order. So \code{c(3, 1, 2)} moves 1 to 2, 2 to 3, and 3 to 1.
##
##' @author Joshua Keller
##' @export
##' @family methods for predkmeans objects
##' @importFrom stats relevel
##' @examples
##' n <- 200
##' r1 <- rnorm(n)
##' r2 <- rnorm(n)
##' u1 <- rbinom(n, size=1,prob=0)
##' cluster <- ifelse(r1<0, ifelse(u1, "A", "B"), ifelse(r2<0, "C", "D"))
##' mu1 <- c(A=2, B=2, C=-2, D=-2)
##' mu2 <- c(A=1, B=-1, C=-1, D=-1)
##' x1 <- rnorm(n, mu1[cluster], 4)
##' x2 <- rnorm(n, mu2[cluster], 4)
##' R <- model.matrix(~r1 + r2)
##' X <- cbind(x1, x2)
##' pkm <- predkmeans(X=cbind(x1, x2), R=R, K=4)
##' table(pkm$cluster)
##'
##' # Move cluster '4' to be first
##' pkm2 <- relevel(pkm, ref=4)
##' table(pkm2$cluster)
##' # Re-order based upon number of observations in each cluster
##' pkm3 <- relevel(pkm, order=order(table(pkm$cluster), decreasing=TRUE))
##' table(pkm3$cluster)
relevel.predkmeans <- function(x, ref=NULL, order=NULL, ...) {
if(!inherits(x, "predkmeans")){
stop("x must be of class predkmeans.")
}
if (is.null(order)){
if (is.null(ref)){
return(x)
}
if (length(ref)>1){
warning("Only first element of 'ref' used.")
ref <- ref[1]
}
ref <- suppressWarnings(as.integer(ref))
if (is.na(ref)){
stop("Cannot convert 'ref' to integer.")
}
order <- 1:x$K
order <- c(ref, order[-ref])
}
xcluster <- x$cluster
for (k in 1:x$K){
x$cluster[xcluster==order[k]] <- k
}
x$centers <- x$centers[order, ]
rownames(x$centers) <- 1:x$K
if (inherits(x$res.best,"kmeans")){
x$res.best$cluster <- x$cluster
x$res.best$centers <- x$centers
x$res.best$withinss <- x$res.best$withinss[order]
x$res.best$size <- x$res.best$size[order]
} else {
x$res.best$mu <- x$res.best$mu[order,]
rownames(x$res.best$mu) <- 1:x$K
x$res.best$gamma <- x$res.best$gamma[,order]
colnames(x$res.best$gamma) <- 1:x$K
x$res.best$h <- x$res.best$h[,order]
colnames(x$res.best$h) <- 1:x$K
x$res.best$mfit$beta <- x$res.best$mfit$beta[,order]
colnames(x$res.best$mfit$beta) <- 1:x$K
x$res.best$mfit$fitted <- x$res.best$mfit$fitted[,order]
colnames(x$res.best$mfit$fitted) <- 1:x$K
x$res.best$mfit$res.best$note <- "Cluster have been re-ordered. See re-ordering sequence in 'reorder' object."
x$res.best$mfit$res.best$reorder <- c(x$res.best$mfit$res.best$reorder, list(order))
}
return(x)
}## relevel.predkmeans
###############################################
# For each row Xi of X, this computes
# exp(-1/(2*sigma2) * (Xi - mu)^T(Xi - mu))
# This is computed for the matrix X at once, rather than row by row
getExpMahal <- function(X, mu, sigma2) {
q <- sweep(X, 2, mu)
exp(-1/(2*sigma2)*rowSums(q*q))
}
###############################################
# Computes h, the expected value of z (the latent variable
# indicating cluster membership) conditional on the outcomes
# X and the current parameter values of gamma and mu
#
# Input:
# X -- Matrix of outcome (e.g., AQS) data. This should have n rows of
# observations and d columns of observed variables (e.g., pollutants)
# R -- Matrix of covariate information (e.g., GIS variables). This should
# have n rows of observations and p number of covariate columns.
# This matrix is taken as-is, so if an intercept is desired it should already
# be included as a column of R.
# gamma -- Matrix of parameter estimates for the multinomial logistic
# regression of the geographic covariates. Should contain
# p rows of parameters for each of K columns (corresponding
# to the cluster centers
# mu -- Matrix of cluster centers. Should have d columns for each of
# K rows
#
getH <- function(X, R, gamma, mu, sigma2=sigma2){
Px <- apply(mu, 1, function(m) getExpMahal(X, m, sigma2=sigma2))
# Rg <- exp(R %*% gamma)/rowSums(exp(R %*% gamma))
Rg <- R %*% gamma
Rg <- apply(Rg, 2, function(x) 1/rowSums(exp(Rg-x)))
num <- Px*Rg
h <- num/rowSums(num)
h
}
##############################################
# Computes the weighted centers of clusters for the EM
# implementation of the Mixture of Experts problem.
getMu <- function(X, h){
mu <- apply(h, 2, function(hk) colSums(hk*X)/sum(hk))
t(mu)
}
getSigma2 <- function(X, mu, h){
K <- ncol(h)
p <- ncol(mu)
mahal <- apply(mu, 1, function(m) rowSums(sweep(X, 2, m)^2))
num <- sum(h*mahal)
denom <- p*sum(h)
num/denom
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.