#' @title Plots from a "spwkm" object
#' @export
#'
#' @description Produces several graphics to help interpreting a \code{spwkm} object.
#'
#' @param x An object of class \code{spwkm}.
#' @param what A character string indicating which element of \code{x} to be plotted. See section
#' "Details" below for further information.
#' @param Which A numerical vector indexing the groups or the variables to be displayed. See section
#' "Details" below for further information.
#' @param xtitle The title of the x-axis.
#' @param ytitle The title of the y-axis.
#' @param title The title of the graphic.
#' @param showlegend A boolean. If \code{showlegend=NULL} (default value), the legend is displayed.
#' @param legendtitle The title of the legend.
#' @param ... Further arguments to the \code{plot} function.
#'
#' @return \item{p}{an object of class \code{ggplot}.}
#'
#' @details
#' The \code{plot} function allows to represent the regularization paths for a grid of values of \code{lambda}, as well as several quality criteria associated to the
#' clustering.
#'
#' For both \code{groupsparsewkm} and \code{sparsewkm} functions, the following options are available:
#'
#' If \code{what=weights.features}, the regularization paths for the weights associated to the variables are displayed.
#'
#' If \code{what=sel.features}, the graph represents the number of selected variables for each value of the regularization parameter \code{lambda}. In the case of
#' sparse weighted k-means for mixed data, categorical variables are represented with dotted lines so that one easily identifies them.
#'
#' If \code{what=expl.var}, the explained variance (computed as the contribution of the between-class variance to the global variance) is displayed. This criterion is
#' computed for all variables in the data set, without taking into account the weights of the group or of the variables.
#'
#' If \code{what=w.expl.var}, the explained weighted variance is computed. The difference with the criterion above is that the weights of the variables are
#' taken into account in the computation. This leads to a criterion which, for large regularization parameters \code{lambda}, may be computed on one variable only, if
#' its weight becomes equal to 1 and all the others are discarded.
#'
#' If \code{what=pen.crit}, the graph displays the evolution of the penalized criterion, maximized by the algorithm. This criterion writes as
#' the between-class weighted sum-of-squares, penalized by a group L1-norm. For more details on the mathematical expressions, one may refer to Chavel et al. (2020).
#'
#' For the outcome of the \code{groupsparsewkm} function trained on numerical data only, two more options are available:
#'
#' If \code{what=weights.groups}, the regularization paths for the weights associated to the groups of variables are displayed.
#'
#' If \code{what=sel.groups}, the graph represents the number of selected groups for each value of the regularization parameter \code{lambda}.
#'
#' For the outcome of the \code{sparsewkm} function trained on mixed data, two more options are also available:
#'
#' If \code{what=weights.levels}, the regularization paths for the weights associated to the levels of the categorical variables are displayed.
#'
#' If \code{what=sel.levels}, the graph represents the number of selected levels associated to the categorical variables plus the number of selected
#' numerical variables, for each value of the regularization parameter \code{lambda}.
#'
#' If the number of groups in \code{groupsparsewkm} or if the number of features in \code{sparsewkm} are too large to have easily interpretable graphics, one may select
#' some groups or some variables using the argument \code{Which}. Note that when training \code{sparsewkm} on mixed data, the initial order of the variables is changed:
#' after the processing step, numerical variables are displayed first, and categorical second. The indexing provided in \code{Which} should take this into account (see the
#' Examples section).
#'
#' @references M., Chavent, J. Lacaille, A. Mourer, and M. Olteanu (2020).
#' Sparse k-means for mixed data via group-sparse clustering. To appear in ESANN proceedings.
#'
#' @seealso \code{\link{sparsewkm}}, \code{\link{groupsparsewkm}}
#'
#' @examples
#' # sparse weighted k-means on mixed data
#' \donttest{
#' data(HDdata)
#' out <- sparsewkm(X = HDdata[,-14], centers = 2)
#' plot(out, what = "weights.features")
#' plot(out, what = "weights.levels")
#' plot(out, what = "sel.features")
#' plot(out, what = "sel.levels")
#' plot(out, what = "expl.var")
#' plot(out, what = "w.expl.var")
#' plot(out, what = "pen.crit")
#' # plot the regularization paths for first three variables only
#' plot(out, what = "weights.features", Which=1:3)
#'
#' # group sparse weighted k-means on numerical data
#' data(iris)
#' index <- c(1, 2, 1, 2)
#' out <- groupsparsewkm(X = iris[,-5], centers = 3, index = index)
#' plot(out, what = "weights.groups")
#' plot(out, what = "weights.features")
#' plot(out, what = "sel.groups")
#' plot(out, what = "sel.features")
#' plot(out, what = "expl.var")
#' plot(out, what = "w.expl.var")
#' plot(out, what = "pen.crit")
#' # plot the regularization paths for the variables in the first group only
#' plot(out, what = "weights.features", Which=1)
#' }
#' @import ggplot2 Polychrome
plot.spwkm <- function(x, what="weights.features", Which=NULL, xtitle =NULL, ytitle = NULL,
title = NULL, showlegend = NULL,
legendtitle = NULL, ...)
{
if (!inherits(x, "spwkm"))
stop("Use only with \"spwkm\" objects")
if (x$type=="NumGroupSparse")
{
if (what=="weights.levels")
stop("The implementation does not allow to perform group-sparse k-means on mixed data yet!")
if (what=="sel.levels")
stop("The implementation does not allow to perform group-sparse k-means on mixed data yet!")
if (what=="weights.groups")
{
data.to.plot <- data.frame(weights=c(t(x$Wg)), lambda=rep(x$lambda, length(unique(x$index))),
group=rep(rownames(x$Wg), each=length(x$lambda)),
color=as.factor(rep(1:length(unique(x$index)),
each=length(x$lambda))))
color.count <- length(unique(x$index))
if (is.null(Which)==FALSE)
{data.to.plot <- data.to.plot[data.to.plot$color%in%Which,]} else
Which <- unique(x$index)
get.palette <- grDevices::colorRampPalette(glasbey.colors(n=32))
colors.groups <- get.palette(color.count)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Group weights"
if (is.null(title))
title = "Groups of features - regularization paths"
if(is.null(showlegend))
showlegend = T
if (is.null(legendtitle))
legendtitle = "Group of features"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$weights, group=as.factor(.data$group))) +
geom_line(aes(color=.data$color))+
geom_point(aes(color=.data$color))+
scale_colour_manual("", values=colors.groups[Which], labels=rownames(x$Wg)[Which]) +
xlab(xtitle)+ylab(ytitle)+
labs(color = legendtitle, linetype = legendtitle)+ggtitle(title)
if (showlegend==F)
p <- p + theme(legend.position="none")
return(p)
}
if (what=="weights.features")
{
data.to.plot <- data.frame(weights=c(t(x$W)), lambda=rep(x$lambda, dim(x$W)[1]),
group=rep(rownames(x$W), each=length(x$lambda)),
color=as.factor(rep(x$index,
each=length(x$lambda))))
color.count <- length(unique(x$index))
if (is.null(Which)==FALSE)
{data.to.plot <- data.to.plot[data.to.plot$color%in%Which,]} else
Which <- unique(x$index)
get.palette <- grDevices::colorRampPalette(glasbey.colors(n=32))
colors.groups <- get.palette(color.count)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Features weights"
if (is.null(title))
title = "Features - regularization paths"
if(is.null(showlegend))
showlegend = T
if (is.null(legendtitle))
legendtitle = "Features"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$weights, group=as.factor(.data$group),
color=.data$color)) +
geom_line()+ geom_point()+
scale_colour_manual("", values=colors.groups[unique(data.to.plot$color)],
labels= paste("Group ", unique(data.to.plot$color), sep=""))+
xlab(xtitle)+ ylab(ytitle)+ggtitle(title)
if (showlegend==F)
p <- p + theme(legend.position="none")
return(p)
}
if (what=="sel.groups")
{
data.to.plot <- data.frame(sel.groups=x$sel.groups, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Number of selected groups"
if (is.null(title))
title = "Selected groups path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$sel.groups)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="sel.features")
{
data.to.plot <- data.frame(sel.features=x$sel.feat, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Number of selected features"
if (is.null(title))
title = "Selected features path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$sel.features)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="expl.var")
{
data.to.plot <- data.frame(bss=apply(x$bss.per.feature,2,sum)/dim(x$W)[1], lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Explained variance"
if (is.null(title))
title = "Explained variance path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$bss)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="pen.crit")
{
penalized.criterion <- sapply(1:length(x$lambda), function(i){sum(x$W[,i]*x$bss.per.feature[,i])-
x$lambda[i]*sum(sqrt(summary(as.factor(x$index)))*x$Wg[,i])})
data.to.plot <- data.frame(penalized.criterion, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Penalized weigthed between-class variance"
if (is.null(title))
title = "Penalized criterion path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$penalized.criterion)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="w.expl.var")
{
bss <- sapply(1:length(x$lambda), function(i){sum(x$W[,i]*x$bss.per.feature[,i])})/apply(x$W,2,sum)
data.to.plot <- data.frame(bss, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Explained weighted variance"
if (is.null(title))
title = "Explained weighted-variance path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$bss)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
}
if (x$type=="MixedSparse")
{
if (what=="weights.groups")
stop("The implementation does not allow to perform group-sparse k-means on mixed data yet!")
if (what=="sel.groups")
stop("The implementation does not allow to perform group-sparse k-means on mixed data yet!")
if (what=="weights.features")
{
data.to.plot <- data.frame(weights=c(t(x$W)), lambda=rep(x$lambda, length(unique(x$index))),
feature=rep(rownames(x$W), each=length(x$lambda)),
color=as.factor(rep(1:length(unique(x$index)),
each=length(x$lambda))),
line.type=as.factor(rep(as.numeric(summary(as.factor(x$index))>1)*2+1,
each=length(x$lambda))))
color.count <- length(unique(x$index))
line.groups <- as.numeric(summary(as.factor(x$index))>1)*2+1
if (is.null(Which)==FALSE)
{data.to.plot <- data.to.plot[data.to.plot$color%in%Which,]} else
Which <- unique(x$index)
get.palette <- grDevices::colorRampPalette(glasbey.colors(n=32))
colors.groups <- get.palette(color.count)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Features weights"
if (is.null(title))
title = "Features - regularization paths"
if(is.null(showlegend))
showlegend = T
if (is.null(legendtitle))
legendtitle = ""
p <- ggplot(data.to.plot, aes(x=.data$lambda, y=.data$weights,
linetype=interaction(.data$color, .data$line.type),
col=interaction(.data$line.type, .data$color))) +
geom_line() +
geom_point() +
scale_colour_manual("", values=colors.groups[Which], labels=rownames(x$W)[Which]) +
scale_linetype_manual("", values=line.groups[Which], labels=rownames(x$W)[Which])+
xlab(xtitle)+ylab(ytitle)+
labs(title=legendtitle)+ggtitle(title)
if (showlegend==F)
p <- p + theme(legend.position="none")
return(p)
}
if (what=="weights.levels")
{
data.to.plot <- data.frame(weights=c(t(x$Wm)), lambda=rep(x$lambda, dim(x$Wm)[1]),
group=rep(rownames(x$Wm), each=length(x$lambda)),
feature=as.factor(rep(x$index,
each=length(x$lambda))),
color=as.factor(rep(1:length(x$index),
each=length(x$lambda))))
if (is.null(Which)==FALSE)
{data.to.plot <- data.to.plot[data.to.plot$feature%in%Which,]} else
Which=unique(x$index)
data.to.plot <- data.to.plot[data.to.plot$feature%in%which(summary(as.factor(x$index))>1),]
color.count <- length(x$index)
get.palette <- grDevices::colorRampPalette(glasbey.colors(n=32))
colors.groups <- get.palette(color.count)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Levels weights"
if (is.null(title))
title = "Levels - regularization paths"
if(is.null(showlegend))
showlegend = T
if (is.null(legendtitle))
legendtitle = ""
p <- ggplot(data.to.plot, aes(x=.data$lambda, y=.data$weights, group=as.factor(.data$group),
col=.data$color)) +
geom_line(linetype="dotted") +
geom_point() +
scale_colour_manual("", values=colors.groups[which(summary(data.to.plot$color)>0)],
labels= unique(data.to.plot$group))+
xlab(xtitle)+ylab(ytitle)+
labs(title=legendtitle)+ggtitle(title)
if (showlegend==F)
p <- p + theme(legend.position="none")
return(p)
}
if (what=="sel.features")
{
data.to.plot <- data.frame(sel.groups=x$sel.init.feat, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Number of selected features"
if (is.null(title))
title = "Selected features path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$sel.groups)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="sel.levels")
{
data.to.plot <- data.frame(sel.features=x$sel.trans.feat, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Number of selected levels"
if (is.null(title))
title = "Selected levels path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$sel.features)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="expl.var")
{
data.to.plot <- data.frame(bss=apply(x$bss.per.feature,2,sum)/dim(x$Wm)[1], lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Explained variance"
if (is.null(title))
title = "Explained variance path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$bss)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="pen.crit")
{
penalized.criterion <- sapply(1:length(x$lambda), function(i){sum(x$Wm[,i]*x$bss.per.feature[,i])-
x$lambda[i]*sum(sqrt(summary(as.factor(x$index)))*x$W[,i])})
data.to.plot <- data.frame(penalized.criterion, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Penalized weigthed between-class variance"
if (is.null(title))
title = "Penalized criterion path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$penalized.criterion)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
if (what=="w.expl.var")
{
bss <- sapply(1:length(x$lambda), function(i){sum(x$Wm[,i]*x$bss.per.feature[,i])})/apply(x$Wm,2,sum)
data.to.plot <- data.frame(bss, lambda=x$lambda)
if(is.null(xtitle))
xtitle = "Lambda"
if(is.null(ytitle))
ytitle = "Explained weighted variance"
if (is.null(title))
title = "Explained weighted-variance path"
p<-ggplot(data.to.plot, aes(x=.data$lambda, y=.data$bss)) +
geom_line()+geom_point()+xlab(xtitle)+ylab(ytitle)+ggtitle(title)
return(p)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.