#' @title Unsupervised random forest clustering with fpc.
#
#' @description Unsupervised random forest clustering. A Random forest (RF) classifier is trained to predict
#' the data labeled as class ``True.Data" and a synthetic data labeled as class ``Synthetic.Data".
#' The synthetic data is generated by taking a random sample from each dimension of the true data, with
#' or without replacement (see \code{\link{RFdist}}). The dissimilarity matrix from \code{\link{RFdist}} is
#' then passed to the algorithms in the "flexible point clustering"
#' \code{fpc} package for clustering and selection of optimal number of clusters through the bootstrap
#' cluster-wise stability method.
#
#' @name UnsupRF
#
#' @param data data.frame or matrix
#' @param RFdist RF distance matrix computed from \code{\link{RFdist}}.
#' @param B number of bootstraps
#' @param clustermethod clustering method, options are \code{pamkCBI}, or \code{claraCBI}, or \code{hclustCBI}.
#' Not to sure about \code{hclustCBI} see the \code{fpc} package. \code{pamkCBI} is
#' recommended for RF dissimilarity matrix, but we have found standard
#' \code{hclust} in base R works well with Ward's minimum variance creterion
#' @param classification type of prediction for finding optimal number of clusters
#' see \code{\link[fpc]{nselectboot}}.
#' @param krange integer vector; numbers of clusters to be tried
#' @param kopt user provided optimal number of clusters
#' @param run.boot (logical) run bootstrap cluster-wise stability ?
#' @param fun function to determine mediods, should be \code{mean}, \code{median},
#' or \code{sum}. See \code{\link{mediod}}
#' @param x object of class \code{\link{UnsupRF}}
#' @param \dots further arguments passed to or from other methods.
#' @return A list with elements:
#' \enumerate{
#' \item cluster.model: The cluster model
#' \item cluster: cluster memberships
#' \item kopt: optimal number of clusters
#' \item mediods: a mediod object
#' }
#' @import fpc
NULL
#' @rdname UnsupRF
#' @export
UnsupRF <- function(data, ...) UseMethod("UnsupRF")
#
#' @rdname UnsupRF
#' @export
#' @examples
#' \dontrun{
#' set.seed(12345)
#' data(iris)
#' dat <- iris[, -5]
#' RF.dist <- RFdist(data=dat, ntree = 10, no.rep=20, syn.type = "permute",
#' importance=TRUE, oob.prox=TRUE, proxConver=FALSE)
#' #
#' Clus.res <- UnsupRF(data = dat, RFdist=RF.dist$RFdist,
#' B = 5, clustermethod=pamkCBI, classification="centroid",
#' krange= 2:4, kopt=2, run.boot = TRUE)
#' print(Clus.res)
#' clusters <- Clus.res$clusters
#' kopt <- Clus.res$kopt # optimal number of clusters
#' }
#
# performs clustering using data or similarity matrix and
# optionally select optimal number of clusters through
# bootstrap
UnsupRF.default <- function(data, RFdist, B = 10, clustermethod=pamkCBI, classification = "centroid",
krange= 2:5, kopt=2, run.boot = FALSE, fun = "sum", ...){
if(run.boot){
nsel <- nselectboot(RFdist, B=B, clustermethod=clustermethod,
classification=classification, krange=krange,...)
kopt <- nsel$kopt
}
clust.mod <- clustermethod(RFdist, k = kopt, ...)
clusters = clust.mod$partition
med <- mediod(x = RFdist, clusters=clusters, fun = fun)
res = list(cluster.model = clust.mod, clusters = clusters, kopt = kopt, mediods = med)
class(res) <- "UnsupRF"
return(res)
}
#' @rdname UnsupRF
#' @method print UnsupRF
#' @export
print.UnsupRF <- function(x,...){
if (!inherits(x, "UnsupRF")) stop("Object must be a \"UnsupRF \"'")
# print("*** Cluster Model ***")
# print(x$cluster.model)
print("*** Optimal number of clusters ***")
print(x$kopt)
print("*** Distribution of clusters ***")
print(table(x$clusters))
print(x$mediods)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.