#' Get optimal number of components
#'
#' Compute the average silhouette coefficient for a given set of components on a mixOmics result.
#' Foreach given ncomp, the mixOmics method is performed with the sames arguments and the given `ncomp`.
#' Longitudinal clustering is performed and average silhouette coefficient is computed.
#'
#' @param object A mixOmics object of the class `pca`, `spca`, `mixo_pls`, `mixo_spls`, `block.pls`, `block.spls`
#'
#' @param max.ncomp integer, maximum number of component to include.
#' If no argument is given, `max.ncomp=object$ncomp`
#'
#' @param X a numeric matrix/data.frame or a list of data.frame for \code{block.pls}
#'
#' @param Y (only for \code{pls}, optional for \code{block.spls}) a numeric matrix, with the same nrow as \code{X}
#'
#' @param indY (optional and only for \code{block.pls}, if Y is not provided), an integer which indicates the position of the matrix response in the list X
#'
#' @param ... Other arguments to be passed to methods (pca, pls, block.pls)
#'
#' @return
#' \code{getNcomp} returns a list with class "ncomp.tune.silhouette" containing the following components:
#'
#' \item{ncomp}{a vector containing the tested ncomp}
#' \item{silhouette}{a vector containing the average silhouette coefficient by ncomp}
#' \item{dmatrix}{the distance matrix used to compute silhouette coefficient}
#'
#' @seealso
#' \code{\link{getCluster}}, \code{\link{silhouette}}, \code{\link[mixOmics]{pca}}, \code{\link[mixOmics]{pls}}, \code{\link[mixOmics]{block.pls}}
#'
#' @examples
#' # random input data
#' demo <- suppressWarnings(get_demo_cluster())
#'
#' # pca
#' pca.res <- mixOmics::pca(X=demo$X, ncomp = 5)
#' res.ncomp <- getNcomp(pca.res, max.ncomp = 4, X = demo$X)
#' plot(res.ncomp)
#'
#' # pls
#' pls.res <- mixOmics::pls(X=demo$X, Y=demo$Y)
#' res.ncomp <- getNcomp(pls.res, max.ncomp = 4, X = demo$X, Y=demo$Y)
#' plot(res.ncomp)
#'
#' # block.pls
#' block.pls.res <- suppressWarnings(mixOmics::block.pls(X=list(X=demo$X, Z=demo$Z), Y=demo$Y))
#' res.ncomp <- suppressWarnings(getNcomp(block.pls.res, max.ncomp = 4,
#' X=list(X=demo$X, Z=demo$Z), Y=demo$Y))
#' plot(res.ncomp)
#'
#' @export
#' @import mixOmics
getNcomp <- function(object, max.ncomp = NULL, X, Y = NULL, indY = NULL, ...){
#-- checking input parameters ---------------------------------------------#
#--------------------------------------------------------------------------#
#-- object
allowed_object = c("pca", "mixo_pls", "block.pls")
if(!any(class(object) %in% allowed_object)){
stop("invalid object, should be one of c(pca, mixo_pls, block.pls)")
}
#-- max.ncomp
if(is_almostInteger(max.ncomp)){
if (max.ncomp < 1)
stop("'max.ncomp' should be greater than 1")
if(is(object, "block.pls")){
if (max.ncomp > min(ncol(object$X[[1]]), nrow(object$X[[1]])))
stop("use smaller 'max.ncomp'")
} else {
if (max.ncomp > min(ncol(object$X), nrow(object$X)))
stop("use smaller 'max.ncomp'")
}
} else {
max.ncomp <- unique(object$ncomp)
}
#-- run #pca / pls / block.pls
ncomp.opt.res <- ncomp.silhouette(object, X, Y, max.ncomp, indY, ...)
to_return <- list()
to_return[["ncomp"]] <- c(0,1:max.ncomp)
to_return[["silhouette"]] <- c(0,ncomp.opt.res$silhouette.res)
to_return[["dmatrix"]] <- ncomp.opt.res$dmatrix
to_return[["choice.ncomp"]] <- to_return[["ncomp"]][which.max(to_return[["silhouette"]])]
class(to_return) <- "ncomp.tune.silhouette"
return(invisible(to_return))
}
ncomp.silhouette <- function(object, X = X, max.ncomp = max.ncomp, ...){
UseMethod("ncomp.silhouette")
}
#' @import mixOmics
ncomp.silhouette.pca <- function(object, X, Y, max.ncomp, indY, ...){
#-- check X
X <- validate_matrix_X(X)
#-- dmatrix
dmatrix <- dmatrix.spearman.dissimilarity(X)
silhouette.res <- vector(length = max.ncomp)
#-- iterative ncomp silhouette coef.
for(comp in 1:max.ncomp){
#-- mixo pca
mixo.res <- mixOmics::pca(X = X, ncomp = comp, ...)
#-- cluster
cluster.res <- getCluster(X = mixo.res)
# same names, same cluster
stopifnot(all(cluster.res$molecule == colnames(dmatrix)))
#-- silhouette
sil <- silhouette(dmatrix, cluster.res$cluster)
#-- store
silhouette.res[comp] <- sil$average
}
return(list(silhouette.res = silhouette.res, dmatrix = dmatrix))
}
#' @import mixOmics
ncomp.silhouette.mixo_pls <- function(object, X, Y, max.ncomp, indY, ...){
#-- check X
X <- validate_matrix_X(X)
#-- check Y
Y <- validate_matrix_Y(Y)
#-- dmatrix
dmatrix <- dmatrix.spearman.dissimilarity(cbind(X,Y))
silhouette.res <- vector(length = max.ncomp)
#-- iterative ncomp silhouette coef.
for(comp in 1:max.ncomp){
#-- mixo pls
mixo.res <- mixOmics::pls(X = X, Y=Y, ncomp = comp, ...)
#-- cluster
cluster.res <- getCluster(X = mixo.res)
# same names, same cluster
stopifnot(all(cluster.res$molecule == colnames(dmatrix)))
#-- silhouette
sil <- silhouette(dmatrix, cluster.res$cluster)
#-- store
silhouette.res[comp] <- sil$average
}
return(list(silhouette.res = silhouette.res, dmatrix = dmatrix))
}
#' @import mixOmics
ncomp.silhouette.block.pls <- function(object, X, Y, max.ncomp, indY, ...){
#-- check X
X <- validate_list_matrix_X(X)
data <- do.call("cbind", X)
#-- Y
if(!is.null(Y)){
Y <- validate_matrix_Y(Y)
dmatrix <- dmatrix.spearman.dissimilarity(cbind(data,Y))
indY <- NULL
} else {
indY <- validate_indY(indY = indY, X=X)
dmatrix <- dmatrix.spearman.dissimilarity(data)
}
silhouette.res <- vector(length = max.ncomp)
#-- iterative ncomp silhouette coef.
for(comp in 1:max.ncomp){
#-- mixo block.pls
if(is.null(indY)){
mixo.res <- mixOmics::block.pls(X = X, ncomp = comp, Y = Y, ...)
}else{
mixo.res <- mixOmics::block.pls(X = X, ncomp = comp, indY = indY, ...)
}
#-- cluster
cluster.res <- getCluster(X = mixo.res)
# same names, same cluster
stopifnot(all(cluster.res$molecule == colnames(dmatrix)))
#-- silhouette
sil <- silhouette(dmatrix, cluster.res$cluster)
#-- store
silhouette.res[comp] <- sil$average
}
return(list(silhouette.res = silhouette.res, dmatrix = dmatrix))
}
#' @export
#' @import ggplot2
plot.ncomp.tune.silhouette <- function(x, title = NULL, ...){
stopifnot(is(x, "ncomp.tune.silhouette"))
# check title
if(!is.character(title)){title = NULL}
data <- as.data.frame(list(ncomp = x$ncomp, silhouette = x$silhouette))
ggplot_df <- ggplot2::ggplot(data, aes(x=ncomp, y = silhouette)) + geom_line() + geom_point() +
geom_vline(xintercept = x$choice.ncomp, lty=2, col = "grey") +
theme_bw() +
xlab("Number of Principal Components") +
ylab("Average Silhouette Coefficient")
# add title
if(is.character(title)){
ggplot_df <- ggplot_df + ggtitle(title)
}
print(ggplot_df)
return(invisible(ggplot_df))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.