Nothing
#' Methods for `TSClusters`
#'
#' Methods associated with [TSClusters-class] and derived objects.
#'
#' @name tsclusters-methods
#' @rdname tsclusters-methods
#' @aliases TSClusters-methods
#' @include S4-TSClusters-classes.R
#' @importFrom methods setMethod
#'
NULL
# ==================================================================================================
# Custom initialize
# ==================================================================================================
#' @rdname tsclusters-methods
#' @aliases initialize,TSClusters
#' @importFrom methods callNextMethod
#' @importFrom methods initialize
#' @importFrom methods new
#' @importFrom rlang enexprs
#'
#' @param .Object A `TSClusters` prototype. You *shouldn't* use this, see Initialize section and the
#' examples.
#' @param ... For `initialize`, any valid slots. For `plot`, passed to [ggplot2::geom_line()] for
#' the plotting of the *cluster centroids*, or to [stats::plot.hclust()]; see Plotting section and
#' the examples. For `update`, any supported argument. Otherwise ignored.
#' @param override.family Logical. Attempt to substitute the default family with one that conforms
#' to the provided elements? See Initialize section.
#'
#' @section Initialize:
#'
#' The initialize method is used when calling [methods::new()]. The `family` slot can be
#' substituted with an appropriate one if certain elements are provided by the user. The
#' initialize methods of derived classes also inherit the family and can use it to calculate other
#' slots. In order to get a fully functional object, at least the following slots should be
#' provided:
#'
#' - `type`: "partitional", "hierarchical", "fuzzy" or "tadpole".
#' - `datalist`: The data in one of the supported formats.
#' - `centroids`: The time series centroids in one of the supported formats.
#' - `cluster`: The cluster indices for each series in the `datalist`.
#' - `control*`: A [tsclust-controls] object with the desired parameters.
#' - `distance*`: A string indicating the distance that should be used.
#' - `centroid*`: A string indicating the centroid to use (only necessary for partitional
#' clustering).
#'
#' *Necessary when overriding the default family for the calculation of other slots, CVIs or
#' prediction. Maybe not always needed, e.g. for plotting.
#'
#' @examples
#'
#' data(uciCT)
#'
#' # Assuming this was generated by some clustering procedure
#' centroids <- CharTraj[seq(1L, 100L, 5L)]
#' cluster <- unclass(CharTrajLabels)
#'
#' pc_obj <- new("PartitionalTSClusters",
#' type = "partitional", datalist = CharTraj,
#' centroids = centroids, cluster = cluster,
#' distance = "sbd", centroid = "dba",
#' control = partitional_control(),
#' args = tsclust_args(cent = list(window.size = 8L, norm = "L2")))
#'
#' fc_obj <- new("FuzzyTSClusters",
#' type = "fuzzy", datalist = CharTraj,
#' centroids = centroids, cluster = cluster,
#' distance = "sbd", centroid = "fcm",
#' control = fuzzy_control())
#'
#' show(fc_obj)
#'
setMethod("initialize", "TSClusters", function(.Object, ..., override.family = TRUE) {
tic <- proc.time()
parent_dots <- list(...)
dots <- rlang::enexprs(...)
dots$.Object <- quote(.Object)
# some minor checks
if (!is.null(parent_dots$datalist)) {
datalist <- tslist(parent_dots$datalist)
dots$datalist <- quote(datalist)
}
if (!is.null(parent_dots$centroids)) {
centroids <- tslist(parent_dots$centroids)
dots$centroids <- quote(centroids)
}
# avoid infinite recursion (see https://bugs.r-project.org/bugzilla/show_bug.cgi?id=16629)
if (is.null(parent_dots$call)) {
call <- match.call()
}
else {
call <- parent_dots$call
dots$call <- NULL
}
# apparently a non-NULL value is needed if proc_time class is virtual?
if (is.null(parent_dots$proctime)) {
dots$proctime <- quote(tic)
fill_proctime <- TRUE
}
else {
fill_proctime <- FALSE # nocov
}
# no shenanigans here, apparently do.call evaluates as parent and callNextMethod needs that
.Object <- do.call(methods::callNextMethod, dots)
.Object@call <- call
# some "defaults"
if (is.null(dots$preproc)) .Object@preproc <- "none"
if (is.null(dots$k)) .Object@k <- length(.Object@centroids)
if (is.null(dots$args))
.Object@args <- tsclust_args(preproc = .Object@dots,
dist = .Object@dots,
cent = .Object@dots)
else
.Object@args <- adjust_args(.Object@args, .Object@dots) # UTILS-utils.R
# more helpful for hierarchical/tadpole
if (override.family) {
if (length(.Object@type) == 0L)
warning("Could not override family, 'type' slot is missing.")
else if (length(.Object@distance) == 0L)
warning("Could not override family, 'distance' slot is missing.")
else {
if (.Object@preproc == "none")
preproc <- .Object@family@preproc
else
preproc <- get_from_callers(.Object@preproc, "function")
if (.Object@type == "partitional") {
if (length(.Object@centroid) > 0L) {
allcent <- all_cent2(.Object@centroid, .Object@control)
if (.Object@centroid == "shape" && .Object@preproc == "none") {
preproc <- zscore
.Object@preproc <- "zscore"
}
}
else {
allcent <- .Object@family@allcent
warning("Could not override allcent in family, 'centroid' slot is missing.")
}
}
else if (.Object@type == "hierarchical") {
centfun <- try(match.fun(.Object@centroid), silent = TRUE)
if (!inherits(centfun, "try-error")) {
allcent <- function(...) { list(centfun(...)) }
allcent_env <- new.env(parent = .GlobalEnv)
allcent_env$centfun <- centfun
environment(allcent) <- allcent_env
}
else if (length(.Object@datalist) > 0L && !is.null(.Object@distmat)) {
allcent <- function(...) {
datalist[which.min(apply(distmat, 1L, sum))] # for CVI's global_cent
}
datalist <- distmat <- NULL # avoid check NOTE
allcent_env <- new.env(parent = .GlobalEnv)
allcent_env$datalist <- .Object@datalist
allcent_env$distmat <- .Object@distmat
environment(allcent) <- allcent_env
}
else {
allcent <- .Object@family@allcent
warning("Could not override allcent in family, 'datalist' or 'distmat' slots are missing.")
}
}
else if (.Object@type == "fuzzy") {
if (length(.Object@centroid) > 0L && .Object@centroid %in% centroids_fuzzy)
allcent <- .Object@centroid
else {
allcent <- .Object@family@allcent
warning("Could not override allcent in family, 'centroid' slot is missing.")
}
}
else if (.Object@type == "tadpole") {
centfun <- try(match.fun(.Object@centroid), silent = TRUE)
if (!inherits(centfun, "try-error")) {
allcent <- function(...) { list(centfun(...)) }
allcent_env <- new.env(parent = .GlobalEnv)
allcent_env$centfun <- centfun
environment(allcent) <- allcent_env
}
else if (length(.Object@centroids) > 0L) {
centroids <- NULL # avoid check NOTE
allcent_env <- new.env(parent = .GlobalEnv)
allcent_env$centroids <- .Object@centroids
allcent <- function(...) { centroids[1L] } # for CVI's global_cent
environment(allcent) <- allcent_env
}
else {
allcent <- .Object@family@allcent
warning("Could not override allcent in family, 'centroids' slot is missing.")
}
}
else { # nocov start
allcent <- .Object@family@allcent
warning("Could not override allcent in family, 'type' slot is not recognized.")
} # nocov end
.Object@family <- methods::new("tsclustFamily",
dist = .Object@distance,
allcent = allcent,
preproc = preproc,
control = .Object@control,
fuzzy = isTRUE(.Object@type == "fuzzy"))
}
}
# just a filler
if (fill_proctime) .Object@proctime <- proc.time() - tic
# return
.Object
})
# for derived classes
#' @importFrom methods callNextMethod
#' @importFrom methods initialize
#'
setMethod("initialize", "PartitionalTSClusters", function(.Object, ...) {
.Object <- methods::callNextMethod()
# some "defaults"
if (length(.Object@iter) == 0L) .Object@iter <- 1L
if (length(.Object@converged) == 0L) .Object@converged <- TRUE
if (!nrow(.Object@cldist) &&
length(formals(.Object@family@dist)) && length(.Object@cluster) &&
length(.Object@datalist) && length(.Object@centroids))
{
# no cldist available, but dist and cluster can be used to calculate it
dm <- quoted_call(
.Object@family@dist,
.Object@datalist,
.Object@centroids,
dots = .Object@args$dist
)
.Object@cldist <- base::as.matrix(dm[cbind(1L:length(.Object@datalist), .Object@cluster)])
dimnames(.Object@cldist) <- NULL
}
if (!nrow(.Object@clusinfo) && length(.Object@cluster) && nrow(.Object@cldist)) {
# no clusinfo available, but cluster and cldist can be used to calculate it (see UTILS-utils.R)
.Object@clusinfo <- compute_clusinfo(.Object@k, .Object@cluster, .Object@cldist)
}
# return
.Object
})
#' @importFrom methods callNextMethod
#' @importFrom methods initialize
#'
setMethod("initialize", "HierarchicalTSClusters", function(.Object, ...) {
.Object <- methods::callNextMethod()
# replace distmat with NULL so that, if the distance function is called again, it won't subset it
if (length(formals(.Object@family@dist)) > 0L)
environment(.Object@family@dist)$control$distmat <- NULL
if (!nrow(.Object@cldist) && length(formals(.Object@family@dist)) && length(.Object@cluster)) {
# no cldist available, but dist and cluster can be used to calculate it
dm <- quoted_call(
.Object@family@dist,
.Object@datalist,
.Object@centroids,
dots = .Object@args$dist
)
.Object@cldist <- base::as.matrix(dm[cbind(1L:length(.Object@datalist),
.Object@cluster)])
dimnames(.Object@cldist) <- NULL
}
if (!nrow(.Object@clusinfo) && length(.Object@cluster) && nrow(.Object@cldist)) {
# no clusinfo available, but cluster and cldist can be used to calculate it (see UTILS-utils.R)
.Object@clusinfo <- compute_clusinfo(.Object@k, .Object@cluster, .Object@cldist)
}
# return
.Object
})
#' @importFrom methods callNextMethod
#' @importFrom methods initialize
#'
setMethod("initialize", "FuzzyTSClusters", function(.Object, ...) {
.Object <- methods::callNextMethod()
# some "defaults"
if (length(.Object@iter) == 0L) .Object@iter <- 1L
if (length(.Object@converged) == 0L) .Object@converged <- TRUE
if (!nrow(.Object@fcluster)) {
if (length(formals(.Object@family@dist))) {
# no fcluster available, but dist and cluster function can be used to calculate it
dm <- quoted_call(
.Object@family@dist,
.Object@datalist,
.Object@centroids,
dots = .Object@args$dist
)
.Object@fcluster <- .Object@family@cluster(dm, m = .Object@control$fuzziness)
colnames(.Object@fcluster) <- paste0("cluster_", 1L:.Object@k)
.Object@cluster <- max.col(.Object@fcluster, "first")
}
else {
.Object@fcluster <- matrix(NA_real_) # nocov
}
}
# return
.Object
})
# ==================================================================================================
# Show
# ==================================================================================================
#' @rdname tsclusters-methods
#' @aliases show,TSClusters
#' @exportMethod show
#' @importFrom utils head
#'
#' @param object,x An object that inherits from [TSClusters-class] as returned by [tsclust()].
#'
setMethod("show", "TSClusters", function(object) {
cat(object@type, "clustering with", object@k, "clusters\n")
cat("Using", object@distance, "distance\n")
cat("Using", object@centroid, "centroids\n")
if (inherits(object, "HierarchicalTSClusters"))
cat("Using method", object@method, "\n")
if (object@preproc != "none")
cat("Using", object@preproc, "preprocessing\n")
cat("\nTime required for analysis:\n")
print(object@proctime)
if (inherits(object, "FuzzyTSClusters")) {
cat("\nHead of fuzzy memberships:\n\n")
print(utils::head(object@fcluster))
}
else if (!is.null(attr(object, "clusinfo"))) {
cat("\nCluster sizes with average intra-cluster distance:\n\n")
print(object@clusinfo)
}
invisible(NULL)
})
# ==================================================================================================
# update from stats
# ==================================================================================================
#' @rdname tsclusters-methods
#' @method update TSClusters
#' @export
#' @importFrom stats update
#'
#' @param evaluate Logical. Defaults to `TRUE` and evaluates the updated call, which will result in
#' a new `TSClusters` object. Otherwise, it returns the unevaluated call.
#'
#' @details
#'
#' The `update` method takes the original function call, replaces any provided argument and
#' optionally evaluates the call again. Use `evaluate = FALSE` if you want to get the unevaluated
#' call. If no arguments are provided, the object is updated to a new version if necessary (this is
#' due to changes in the internal functions of the package, here for backward compatibility).
#'
update.TSClusters <- function(object, ..., evaluate = TRUE) {
args <- as.pairlist(list(...))
if (length(args) == 0L) {
if (evaluate) {
if (object@type != "tadpole") {
# update dist closure
object@family@dist <- ddist2(object@distance, object@control)
# update allcent closure
if (object@centroid %in% centroids_included)
object@family@allcent <- all_cent2(object@centroid, object@control)
# update distmat in allcent environment with internal class?
if (object@centroid %in% c("pam", "fcmdd"))
environment(object@family@allcent)$control$distmat <- object@control$distmat
}
return(object)
}
else
return(object@call) # nocov
}
new_call <- object@call
new_call[names(args)] <- args
if (evaluate)
eval.parent(new_call, n = 2L)
else
new_call # nocov
}
#' @rdname tsclusters-methods
#' @aliases update,TSClusters
#' @exportMethod update
#' @importFrom methods signature
#'
setMethod("update", methods::signature(object = "TSClusters"), update.TSClusters)
# ==================================================================================================
# predict from stats
# ==================================================================================================
#' @rdname tsclusters-methods
#' @method predict TSClusters
#' @export
#' @importFrom stats predict
#'
#' @param newdata New data to be assigned to a cluster. It can take any of the supported formats of
#' [tsclust()]. Note that for multivariate series, this means that it **must** be a list of
#' matrices, even if the list has only one matrix.
#'
#' @section Prediction:
#'
#' The `predict` generic can take the usual `newdata` argument. If `NULL`, the method simply
#' returns the obtained cluster indices. Otherwise, a nearest-neighbor classification based on the
#' centroids obtained from clustering is performed:
#'
#' 1. `newdata` is preprocessed with `object@family@preproc` using the parameters in
#' `object@args$preproc`.
#' 2. A cross-distance matrix between the processed series and `object@centroids` is computed with
#' `object@family@dist` using the parameters in `object@args$dist`.
#' 3. For non-fuzzy clustering, the series are assigned to their nearest centroid's cluster. For
#' fuzzy clustering, the fuzzy membership matrix for the series is calculated. In both cases,
#' the function in `object@family@cluster` is used.
#'
predict.TSClusters <- function(object, newdata = NULL, ...) {
if (is.null(newdata)) {
if (inherits(object, "FuzzyTSClusters"))
ret <- object@fcluster
else
ret <- object@cluster
}
else {
newdata <- tslist(newdata)
check_consistency(newdata, "vltslist")
nm <- names(newdata)
newdata <- quoted_call(
object@family@preproc,
newdata,
dots = subset_dots(object@args$preproc, object@family@preproc)
)
distmat <- quoted_call(
object@family@dist,
x = newdata,
centroids = object@centroids,
dots = object@args$dist
)
ret <- object@family@cluster(distmat = distmat, m = object@control$fuzziness)
if (inherits(object, "FuzzyTSClusters"))
dimnames(ret) <- list(nm, paste0("cluster_", 1L:ncol(ret)))
else
names(ret) <- nm
}
ret
}
#' @rdname tsclusters-methods
#' @aliases predict,TSClusters
#' @exportMethod predict
#' @importFrom methods signature
#'
setMethod("predict", methods::signature(object = "TSClusters"), predict.TSClusters)
# ==================================================================================================
# Plot
# ==================================================================================================
#' @rdname tsclusters-methods
#' @method plot TSClusters
#' @export
#' @importFrom dplyr anti_join
#' @importFrom dplyr bind_rows
#' @importFrom dplyr sample_n
#' @importFrom methods S3Part
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 facet_wrap
#' @importFrom ggplot2 geom_line
#' @importFrom ggplot2 geom_vline
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 theme_bw
#' @importFrom ggrepel geom_label_repel
#' @importFrom graphics plot
#' @importFrom reshape2 melt
#' @importFrom rlang .data
#'
#' @param y Ignored.
#' @param clus A numeric vector indicating which clusters to plot.
#' @param labs.arg A list with arguments to change the title and/or axis labels. See the examples
#' and [ggplot2::labs()] for more information.
#' @param series Optionally, the data in the same format as it was provided to [tsclust()].
#' @param time Optional values for the time axis. If series have different lengths, provide the time
#' values of the longest series.
#' @param plot Logical flag. You can set this to `FALSE` in case you want to save the ggplot object
#' without printing anything to screen
#' @param type What to plot. `NULL` means default. See details.
#' @param labels Whether to include labels in the plot (not for dendrogram plots). See details and
#' note that this is subject to **randomness**.
#'
#' @section Plotting:
#'
#' The plot method uses the `ggplot2` plotting system (see [ggplot2::ggplot()]).
#'
#' The default depends on whether a hierarchical method was used or not. In those cases, the
#' dendrogram is plotted by default; you can pass any extra parameters to [stats::plot.hclust()]
#' via the ellipsis (`...`).
#'
#' Otherwise, the function plots the time series of each cluster along with the obtained centroid.
#' The default values for cluster centroids are: `linetype = "dashed"`, `linewidth = 1.5`,
#' `colour = "black"`, `alpha = 0.5`. You can change this by means of the ellipsis (`...`).
#'
#' You can choose what to plot with the `type` parameter. Possible options are:
#'
#' - `"dendrogram"`: Only available for hierarchical clustering.
#' - `"series"`: Plot the time series divided into clusters without including centroids.
#' - `"centroids"`: Plot the obtained centroids only.
#' - `"sc"`: Plot both series and centroids
#'
#' In order to enable labels on the (non-dendrogram) plot, you have to select an option that plots
#' the series and at least provide an empty list in the `labels` argument. This list can contain
#' arguments for [ggrepel::geom_label_repel()] and will be passed along. The following are
#' set by the plot method if they are not provided:
#'
#' - `"mapping"`: set to [aes][ggplot2::aes](x = t, y = value, label = label)
#' - `"data"`: a data frame with as many rows as series in the `datalist` and 4 columns:
#' + `t`: x coordinate of the label for each series.
#' + `value`: y coordinate of the label for each series.
#' + `cl`: index of the cluster to which the series belongs (i.e. `x@cluster`).
#' + `label`: the label for the given series (i.e. `names(x@datalist)`).
#'
#' You can provide your own data frame if you want, but it must have those columns and, even if
#' you override `mapping`, the `cl` column must have that name. The method will attempt to spread
#' the labels across the plot, but note that this is **subject to randomness**, so be careful if
#' you need reproducibility of any commands used after plotting (see examples).
#'
#' If created, the function returns the `gg` object invisibly, in case you want to modify it to
#' your liking. You might want to look at [ggplot2::ggplot_build()] if that's the case.
#'
#' If you want to free the scale of the X axis, you can do the following:
#'
#' `plot(x, plot = FALSE)` `+` `facet_wrap(~cl, scales = "free")`
#'
#' For more complicated changes, you're better off looking at the source code at
#' \url{https://github.com/asardaes/dtwclust/blob/master/R/S4-TSClusters-methods.R} and creating your
#' own plotting function.
#'
#' @return
#'
#' The plot method returns a `gg` object (or `NULL` for dendrogram plot) invisibly.
#'
#' @examples
#'
#' \dontrun{
#' plot(pc_obj, type = "c", linetype = "solid",
#' labs.arg = list(title = "Clusters' centroids"))
#'
#' set.seed(15L)
#' plot(pc_obj, labels = list(nudge_x = -5, nudge_y = 0.2),
#' clus = c(1L,4L))
#' }
#'
plot.TSClusters <- function(x, y, ...,
clus = seq_len(x@k), labs.arg = NULL,
series = NULL, time = NULL,
plot = TRUE, type = NULL,
labels = NULL)
{
# set default type if none was provided
if (!is.null(type)) # nocov start
type <- match.arg(type, c("dendrogram", "series", "centroids", "sc"))
else if (x@type == "hierarchical")
type <- "dendrogram"
else
type <- "sc" # nocov end
# plot dendrogram?
if (inherits(x, "HierarchicalTSClusters") && type == "dendrogram") {
x <- methods::S3Part(x, strictS3 = TRUE, "hclust")
if (plot) graphics::plot(x, ...)
return(invisible(NULL))
}
else if (x@type != "hierarchical" && type == "dendrogram") {
stop("Dendrogram plot only applies to hierarchical clustering.")
}
# Obtain data, the priority is: provided data > included data list
if (!is.null(series)) {
data <- tslist(series)
}
else {
if (length(x@datalist) < 1L)
stop("Provided object has no data. Please provide the data manually.") # nocov
data <- x@datalist
}
# centroids consistency
check_consistency(centroids <- x@centroids, "vltslist")
# force same length for all multivariate series/centroids in the same cluster by
# adding NAs
if (mv <- is_multivariate(data)) {
data <- lapply(data, base::as.matrix)
centroids <- lapply(centroids, base::as.matrix)
clusters <- split(data, factor(x@cluster, levels = 1L:x@k), drop = FALSE)
for (id_clus in 1L:x@k) {
cluster <- clusters[[id_clus]]
if (length(cluster) < 1L) next # nocov (empty cluster)
nc <- NCOL(cluster[[1L]])
len <- sapply(cluster, NROW)
L <- max(len, NROW(centroids[[id_clus]]))
trail <- L - len
clusters[[id_clus]] <- Map(cluster, trail, f = function(mvs, trail) {
rbind(mvs, matrix(NA, trail, nc))
})
trail <- L - NROW(centroids[[id_clus]])
centroids[[id_clus]] <- rbind(centroids[[id_clus]], matrix(NA, trail, nc))
}
# split returns the result in order of the factor levels,
# but I want to keep the original order as returned from clustering
ido <- sort(sort(x@cluster, index.return = TRUE)$ix, index.return = TRUE)$ix
data <- unlist(clusters, recursive = FALSE)[ido]
}
# helper values (lengths() here, see issue #18 in GitHub)
L1 <- lengths(data)
L2 <- lengths(centroids)
# timestamp consistency
if (!is.null(time) && length(time) < max(L1, L2))
stop("Length mismatch between values and timestamps") # nocov
# Check if data was z-normalized
if (x@preproc == "zscore")
title_str <- "Clusters' members (z-normalized)"
else
title_str <- "Clusters' members"
# transform to data frames
dfm <- reshape2::melt(data)
dfcm <- reshape2::melt(centroids)
# time, cluster and colour indices
color_ids <- integer(x@k)
dfm_tcc <- mapply(x@cluster, L1, USE.NAMES = FALSE, SIMPLIFY = FALSE,
FUN = function(clus, len) {
t <- if (is.null(time)) seq_len(len) else time[1L:len]
cl <- rep(clus, len)
color <- rep(color_ids[clus], len)
color_ids[clus] <<- color_ids[clus] + 1L
data.frame(t = t, cl = cl, color = color)
})
dfcm_tc <- mapply(1L:x@k, L2, USE.NAMES = FALSE, SIMPLIFY = FALSE,
FUN = function(clus, len) {
t <- if (is.null(time)) seq_len(len) else time[1L:len]
cl <- rep(clus, len)
data.frame(t = t, cl = cl)
})
# bind
dfm <- data.frame(dfm, call_rbind(dfm_tcc))
dfcm <- data.frame(dfcm, call_rbind(dfcm_tc))
# make factor
dfm$cl <- factor(dfm$cl)
dfcm$cl <- factor(dfcm$cl)
dfm$color <- factor(dfm$color)
# create gg object
gg <- ggplot2::ggplot(data.frame(t = integer(),
variable = factor(),
value = numeric(),
cl = factor(),
color = factor()),
ggplot2::aes(x = .data$t,
y = .data$value,
group = .data$L1))
# add centroids first if appropriate, so that they are at the very back
if (type %in% c("sc", "centroids")) {
if (length(list(...)) == 0L)
gg <- gg + ggplot2::geom_line(data = dfcm[dfcm$cl %in% clus, ],
linetype = "dashed",
linewidth = 1.5,
colour = "black",
alpha = 0.5)
else
gg <- gg + ggplot2::geom_line(data = dfcm[dfcm$cl %in% clus, ], ...)
}
# add series next if appropriate
if (type %in% c("sc", "series"))
gg <- gg + ggplot2::geom_line(data = dfm[dfm$cl %in% clus, ],
ggplot2::aes(colour = .data$color))
# add vertical lines to separate variables of multivariate series
if (mv) {
ggdata <- data.frame(cl = rep(1L:x@k, each = (nc - 1L)),
vbreaks = as.numeric(1L:(nc - 1L) %o% sapply(centroids, NROW)))
gg <- gg + ggplot2::geom_vline(data = ggdata[ggdata$cl %in% clus, , drop = FALSE],
colour = "black", linetype = "longdash",
ggplot2::aes(xintercept = .data$vbreaks))
}
# add labels
if (type %in% c("sc", "series") && is.list(labels)) {
if (is.null(labels$mapping))
labels$mapping <- ggplot2::aes(x = .data$t, y = .data$value, label = .data$label)
if (is.null(labels$data) && !is.null(names(x@datalist))) {
label <- names(x@datalist)[x@cluster %in% clus]
label <- split(label, x@cluster[x@cluster %in% clus])
dfm <- dfm[dfm$cl %in% clus,]
labels$data <- dplyr::bind_rows(lapply(split(dfm, dfm$cl), function(df_cluster) {
# keep given order for split
df_cluster$L1 <- factor(df_cluster$L1, levels = unique(df_cluster$L1))
df_series <- split(df_cluster[c("t", "value", "cl")], df_cluster$L1)
ret <- vector("list", length(df_series))
for (i in seq_along(df_series)) {
this_df <- df_series[[i]]
for (not_this in df_series[-i]) {
if (nrow(this_df) == nrow(not_this) && isTRUE(all.equal(not_this, this_df, check.attributes = FALSE))) {
next
}
this_df <- dplyr::anti_join(this_df, not_this, by = c("t", "value"))
}
ret[[i]] <- dplyr::sample_n(this_df, 1L)
}
# return
dplyr::bind_rows(ret)
}))
labels$data$label <- unlist(label)
}
labels$data <- labels$data[labels$data$cl %in% clus,]
labels$inherit.aes <- FALSE
gg <- gg + do_call(ggrepel::geom_label_repel, labels)
}
# add facets, remove legend, apply kinda black-white theme
gg <- gg +
ggplot2::facet_wrap(~cl, scales = "free_y") +
ggplot2::guides(colour = "none") +
ggplot2::theme_bw()
# labs
if (!is.null(labs.arg)) # nocov start
gg <- gg + ggplot2::labs(labs.arg)
else
gg <- gg + ggplot2::labs(title = title_str)
# plot without warnings in case I added NAs for multivariate cases
if (plot) suppressWarnings(graphics::plot(gg))
invisible(gg) # nocov end
}
#' @rdname tsclusters-methods
#' @aliases plot,TSClusters,missing
#' @exportMethod plot
#' @importFrom methods signature
#'
setMethod("plot", methods::signature(x = "TSClusters", y = "missing"), plot.TSClusters)
# ==================================================================================================
# Cluster validity indices
# ==================================================================================================
#' @importFrom cluster silhouette
#' @importFrom methods as
#'
cvi_TSClusters <- function(a, b = NULL, type = "valid", ...) {
type <- match.arg(type, several.ok = TRUE,
c("RI", "ARI", "J", "FM", "VI",
"Sil", "SF", "CH", "DB", "DBstar", "D", "COP",
"valid", "internal", "external"))
dots <- list(...)
internal <- c("Sil", "SF", "CH", "DB", "DBstar", "D", "COP")
external <- c("RI", "ARI", "J", "FM", "VI")
if (any(type == "valid")) {
type <- if (is.null(b)) internal else c(internal, external)
}
else if (any(type == "internal")) {
type <- internal
}
else if (any(type == "external")) {
type <- external
}
which_internal <- type %in% internal
which_external <- type %in% external
if (any(which_external))
CVIs <- cvi(a@cluster, b = b, type = type[which_external], ...)
else
CVIs <- numeric()
type <- type[which_internal]
if (any(which_internal)) {
if (length(a@datalist) == 0L && any(type %in% c("SF", "CH"))) {
warning("Internal CVIs: the original data must be in object to calculate ",
"the following indices:",
"\n\tSF\tCH")
type <- setdiff(type, c("SF", "CH"))
}
# calculate distmat if needed
distmat <- NULL
if (any(type %in% c("Sil", "D", "COP"))) {
if (is.null(a@distmat)) {
if (length(a@datalist) == 0L) {
warning("Internal CVIs: distmat OR original data needed for indices:",
"\n\tSil\tD\tCOP")
type <- setdiff(type, c("Sil", "D", "COP"))
}
else {
distmat <- quoted_call(
a@family@dist,
x = a@datalist,
centroids = if (isTRUE(a@control$symmetric)) NULL else a@datalist,
dots = a@args$dist
)
}
}
else {
distmat <- a@distmat
}
}
if (!is.null(distmat)) {
distmat <- base::as.matrix(distmat)
if (!base::isSymmetric(distmat))
warning("Internal CVIs: series' cross-distance matrix is NOT symmetric, ",
"which can be problematic for:",
"\n\tSil\tD\tCOP")
}
# are no valid indices left?
if (length(type) == 0L) return(CVIs) # nocov
# calculate some values that both Davies-Bouldin indices use
if (any(type %in% c("DB", "DBstar"))) {
S <- a@clusinfo$av_dist
# distance between centroids
distcent <- quoted_call(
a@family@dist,
x = a@centroids,
centroids = if (isTRUE(a@control$symmetric)) NULL else a@centroids,
dots = a@args$dist
)
if (base::is.matrix(distcent)) {
if (!base::isSymmetric(unclass(distcent)))
warning("Internal CVIs: centroids' cross-distance matrix is NOT symmetric, ",
"which can be problematic for:",
"\n\tDB\tDB*")
}
else {
distcent <- methods::as(distcent, "Distmat")
}
}
# calculate global centroids if needed
if (any(type %in% c("SF", "CH"))) {
N <- length(a@datalist)
if (a@type == "partitional") {
global_cent <- quoted_call(
a@family@allcent,
x = a@datalist,
cl_id = rep(1L, N),
k = 1L,
cent = a@datalist[sample(N, 1L)],
cl_old = rep(0L, N),
dots = a@args$cent
)
}
else {
global_cent <- quoted_call(a@family@allcent,
a@datalist,
dots = subset_dots(a@args$cent, a@family@allcent))
}
dist_global_cent <- quoted_call(a@family@dist,
x = a@centroids,
centroids = global_cent,
dots = a@args$dist)
dim(dist_global_cent) <- NULL
}
CVIs <- c(CVIs, sapply(type, function(CVI) {
switch(EXPR = CVI,
# Silhouette
Sil = {
mean(cluster::silhouette(a@cluster, dmatrix = distmat)[,3L])
},
# Dunn
D = {
pairs <- call_pairs(a@k)
deltas <- mapply(pairs[ , 1L], pairs[ , 2L],
USE.NAMES = FALSE, SIMPLIFY = TRUE,
FUN = function(i, j) {
min(distmat[a@cluster == i,
a@cluster == j,
drop = TRUE])
})
Deltas <- sapply(1L:a@k, function(k) {
max(distmat[a@cluster == k, a@cluster == k, drop = TRUE])
})
min(deltas) / max(Deltas)
},
# Davies-Bouldin
DB = {
mean(sapply(1L:a@k, function(k) {
max((S[k] + S[-k]) / distcent[k, -k])
}))
},
# Modified DB -> DB*
DBstar = {
mean(sapply(1L:a@k, function(k) {
max(S[k] + S[-k]) / min(distcent[k, -k, drop = TRUE])
}))
},
# Calinski-Harabasz
CH = {
(N - a@k) /
(a@k - 1) *
sum(a@clusinfo$size * dist_global_cent) /
sum(a@cldist[, 1L])
},
# Score function
SF = {
bcd <- sum(a@clusinfo$size * dist_global_cent) / (N * a@k)
wcd <- sum(a@clusinfo$av_dist)
1 - 1 / exp(exp(bcd - wcd))
},
# COP
COP = {
1 / nrow(distmat) * sum(sapply(1L:a@k, function(k) {
sum(a@cldist[a@cluster == k, 1L]) / min(apply(distmat[a@cluster != k,
a@cluster == k,
drop = FALSE],
2L,
max))
}))
})
}))
}
# return
CVIs
}
#' @rdname cvi
#' @aliases cvi,PartitionalTSClusters
#' @exportMethod cvi
#' @importFrom methods signature
#' @include GENERICS-cvi.R
#'
setMethod("cvi", methods::signature(a = "PartitionalTSClusters"), cvi_TSClusters)
#' @rdname cvi
#' @aliases cvi,HierarchicalTSClusters
#' @exportMethod cvi
#' @importFrom methods signature
#'
setMethod("cvi", methods::signature(a = "HierarchicalTSClusters"), cvi_TSClusters)
#' @rdname cvi
#' @aliases cvi,FuzzyTSClusters
#' @exportMethod cvi
#' @importFrom methods as
#' @importFrom methods signature
#'
setMethod(
"cvi", methods::signature(a = "FuzzyTSClusters", b = "ANY"),
function(a, b = NULL, type = "valid", ...) {
type <- match.arg(type, several.ok = TRUE,
c("MPC", "K", "T", "SC", "PBMF",
"RI", "ARI", "VI", "NMIM",
"valid", "internal", "external"))
dots <- list(...)
internal <- c("MPC", "K", "T", "SC", "PBMF")
external <- c("RI", "ARI", "VI", "NMIM")
if (any(type == "valid")) {
type <- if (is.null(b)) internal else c(internal, external)
} else if (any(type == "internal")) {
type <- internal
} else if (any(type == "external")) {
type <- external
}
which_internal <- type %in% internal
which_external <- type %in% external
if (any(which_external))
CVIs <- cvi(a@fcluster, b = b, type = type[which_external], ...)
else
CVIs <- numeric()
type <- type[which_internal]
if (any(which_internal)) {
if (length(a@datalist) == 0L && any(type %in% c("K", "T" ,"SC", "PBMF"))) {
warning("Fuzzy CVIs: the original series must be in the object to calculate ",
"the following indices:\n",
"\tK\tT\tSC\tPBMF")
type <- setdiff(type, c("K", "T" ,"SC", "PBMF"))
}
# are no valid indices left?
if (length(type) == 0L) return(numeric(0L)) # nocov
# calculate global centroids if needed
if (any(type %in% c("K", "SC", "PBMF"))) {
N <- length(a@datalist)
global_cent <- quoted_call(
a@family@allcent,
x = a@datalist,
cl_id = cbind(rep(1L, N)),
k = 1L,
dots = a@args$cent
)
dist_global_cent <- quoted_call(
a@family@dist,
x = a@centroids,
centroids = global_cent,
dots = a@args$dist
)
dim(dist_global_cent) <- NULL
}
# distance between centroids
if (any(type %in% c("K", "T", "PBMF"))) {
distcent <- quoted_call(a@family@dist,
x = a@centroids,
centroids = if (a@control$symmetric) NULL else a@centroids,
dots = a@args$dist)
if (inherits(distcent, "dist")) distcent <- methods::as(distcent, "Distmat")
}
# distance between series and centroids
if (any(type %in% c("K", "T", "SC", "PBMF"))) {
dsc <- quoted_call(a@family@dist,
x = a@datalist,
centroids = a@centroids,
dots = a@args$dist)
}
CVIs <- c(CVIs, sapply(type, function(CVI) {
switch(EXPR = CVI,
# ---------------------------------------------------------------------------
"MPC" = {
PC <- sum(a@fcluster ^ 2) / nrow(a@fcluster)
1 - (a@k / (a@k - 1L)) * (1 - PC)
},
# ---------------------------------------------------------------------------
"K" = {
numerator <- sum((a@fcluster ^ 2) * (dsc ^ 2)) +
sum(dist_global_cent ^ 2) / a@k
denominator <- min(distcent[!diag(a@k)] ^ 2)
numerator / denominator
},
# ---------------------------------------------------------------------------
"T" = {
numerator <- sum((a@fcluster ^ 2) * (dsc ^ 2)) +
sum(distcent[!diag(a@k)] ^ 2) / (a@k * (a@k - 1L))
denominator <- min(distcent[!diag(a@k)] ^ 2) + 1 / a@k
numerator / denominator
},
# ---------------------------------------------------------------------------
"SC" = {
u <- a@fcluster
m <- a@control$fuzziness
SC1_numerator <- sum((dist_global_cent ^ 2) / a@k)
SC1_denominator <- sum(apply((u ^ m) * (dsc ^ 2), 2L, sum) /
apply(u, 2L, sum))
SC1 <- SC1_numerator / SC1_denominator
SC2_numerator <- sum(sapply(1L:(a@k - 1L), function(i) {
sum(sapply(1L:(a@k - i), function(r) {
j <- r + i
temp <- apply(u[, c(i,j)], 1L, min)
sum(temp ^ 2) / sum(temp)
}))
}))
SC2_denominator <- apply(u, 1L, max)
SC2_denominator <- sum(SC2_denominator ^ 2) / sum(SC2_denominator)
SC2 <- SC2_numerator / SC2_denominator
SC1 - SC2
},
# ---------------------------------------------------------------------------
"PBMF" = {
u <- a@fcluster
m <- a@control$fuzziness
dsgc <- quoted_call(a@family@dist,
x = a@datalist,
centroids = global_cent,
dots = a@args$dist)
factor1 <- 1 / a@k
factor2 <- sum(dsgc) / sum(dsc * (u ^ m))
factor3 <- max(distcent[!diag(a@k)])
(factor1 * factor2 * factor3) ^ 2
})
}))
}
# return
CVIs
}
)
# ==================================================================================================
# Functions to support package 'clue'
# ==================================================================================================
#' @method as.cl_membership TSClusters
#' @export
#' @importFrom clue as.cl_membership
#'
as.cl_membership.TSClusters <- function(x) {
clue::as.cl_membership(x@cluster)
}
#' @method cl_class_ids TSClusters
#' @export
#' @importFrom clue as.cl_class_ids
#' @importFrom clue cl_class_ids
#'
cl_class_ids.TSClusters <- function(x) {
clue::as.cl_class_ids(x@cluster)
}
#' @method cl_membership TSClusters
#' @export
#' @importFrom clue as.cl_membership
#' @importFrom clue cl_membership
#'
cl_membership.TSClusters <- function(x, k = n_of_classes(x)) {
clue::as.cl_membership(x)
}
#' @method is.cl_dendrogram TSClusters
#' @export
#' @importFrom clue is.cl_dendrogram
#'
is.cl_dendrogram.TSClusters <- function(x) {
x@type == "hierarchical"
}
#' @method is.cl_hard_partition TSClusters
#' @export
#' @importFrom clue is.cl_hard_partition
#'
is.cl_hard_partition.TSClusters <- function(x) {
x@type != "fuzzy"
}
#' @method is.cl_hierarchy TSClusters
#' @export
#' @importFrom clue is.cl_hierarchy
#'
is.cl_hierarchy.TSClusters <- function(x) {
x@type == "hierarchical"
}
#' @method is.cl_partition TSClusters
#' @export
#' @importFrom clue is.cl_partition
#'
is.cl_partition.TSClusters <- function(x) {
TRUE
}
#' @method n_of_classes TSClusters
#' @export
#' @importFrom clue n_of_classes
#'
n_of_classes.TSClusters <- function(x) {
x@k
}
#' @method n_of_objects TSClusters
#' @export
#' @importFrom clue n_of_objects
#'
n_of_objects.TSClusters <- function(x) {
length(x@cluster)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.