Nothing
#' Helper function for preprocessing/distance/centroid configurations
#'
#' Create preprocessing, distance and centroid configurations for [compare_clusterings_configs()].
#'
#' @export
#' @importFrom dplyr bind_rows
#'
#' @param type Which type of function is being targeted by this configuration.
#' @param ... Any number of named lists with functions and arguments that will be shared by all
#' clusterings. See details.
#' @param partitional A named list of lists with functions and arguments for partitional
#' clusterings.
#' @param hierarchical A named list of lists with functions and arguments for hierarchical
#' clusterings.
#' @param fuzzy A named list of lists with functions and arguments for fuzzy clusterings.
#' @param tadpole A named list of lists with functions and arguments for TADPole clusterings.
#' @param share.config A character vector specifying which clusterings should include the shared
#' lists (the ones specified in `...`). It must be any combination of (possibly abbreviated):
#' partitional, hierarchical, fuzzy, tadpole.
#'
#' @details
#'
#' The named lists are interpreted in the following way: the name of the list will be considered to
#' be a function name, and the elements of the list will be the possible parameters for the
#' function. Each function must have at least an empty list. The parameters may be vectors that
#' specify different values to be tested.
#'
#' For preprocessing, the special name `none` signifies no preprocessing.
#'
#' For centroids, the special name `default` leaves the centroid unspecified.
#'
#' Please see the examples in [compare_clusterings()] to see how this is used.
#'
#' @return
#'
#' A list for each clustering, each of which includes a data frame with the computed configurations.
#'
pdc_configs <- function(type = c("preproc", "distance", "centroid"), ...,
partitional = NULL, hierarchical = NULL, fuzzy = NULL, tadpole = NULL,
share.config = c("p", "h", "f", "t"))
{
type <- match.arg(type)
shared <- list(...)
specific <- list(partitional = partitional,
hierarchical = hierarchical,
fuzzy = fuzzy,
tadpole = tadpole)
specific <- specific[!sapply(specific, is.null)]
share_missing <- missing(share.config)
share.config <- match.arg(share.config, supported_clusterings, TRUE)
if (type == "distance") {
if (!is.null(specific$tadpole) || (!share_missing && "tadpole" %in% share.config))
warning("TADPole ignores distance configurations.")
specific$tadpole <- NULL
share.config <- setdiff(share.config, "tadpole")
}
# ==============================================================================================
# Shared configs
# ==============================================================================================
if (length(shared) > 0L && length(share.config) > 0L) {
# careful, singular and plural below
shared_cfg <- Map(shared, names(shared), f = function(shared_args, fun) {
cfg <- quoted_call(expand.grid, fun, stringsAsFactors = FALSE, dots = shared_args)
names(cfg)[1L] <- type
cfg
})
shared_cfg <- dplyr::bind_rows(shared_cfg)
shared_cfgs <- lapply(share.config, function(dummy) { shared_cfg })
names(shared_cfgs) <- share.config
shared_cfgs <- shared_cfgs[setdiff(share.config, names(specific))]
}
else {
shared_cfg <- NULL
shared_cfgs <- list()
}
# ==============================================================================================
# Specific configs
# ==============================================================================================
if (length(specific) > 0L) {
cfgs <- Map(specific, names(specific), f = function(config, clus_type) {
config_names <- names(config)
if (!is.list(config) || is.null(config_names))
stop("All parameters must be named lists.") # nocov
cfg <- Map(config, config_names, f = function(config_args, fun) {
cfg <- quoted_call(expand.grid, fun, stringsAsFactors = FALSE, dots = config_args)
names(cfg)[1L] <- type
cfg
})
cfg <- dplyr::bind_rows(cfg)
if (clus_type %in% share.config)
cfg <- dplyr::bind_rows(shared_cfg, cfg) # singular shared
cfg
})
cfgs <- c(cfgs, shared_cfgs) # plural shared
}
else {
cfgs <- shared_cfgs
}
# return
cfgs
}
#' Create clustering configurations.
#'
#' Create configurations for [compare_clusterings()]
#'
#' @export
#' @importFrom dplyr bind_cols
#'
#' @param k A numeric vector with one or more elements specifying the number of clusters to test.
#' @param types Clustering types. It must be any combination of (possibly abbreviated): partitional,
#' hierarchical, fuzzy, tadpole.
#' @param controls A named list of [tsclust-controls]. `NULL` means defaults. See details.
#' @param preprocs Preprocessing configurations. See details.
#' @param distances Distance configurations. See details.
#' @param centroids Centroid configurations. See details.
#' @param no.expand A character vector indicating parameters that should *not* be expanded between
#' [pdc_configs()] configurations. See examples.
#'
#' @details
#'
#' Preprocessing, distance and centroid configurations are specified with the helper function
#' [pdc_configs()], refer to the examples in [compare_clusterings()] to see how this is used.
#'
#' The controls list may be specified with the usual [tsclust-controls] functions. The names of the
#' list must correspond to "partitional", "hierarchical", "fuzzy" or "tadpole" clustering. Again,
#' please refer to the examples in [compare_clusterings()].
#'
#' @return
#'
#' A list for each clustering type, each of which includes a data frame with the computed and merged
#' configurations. Each data frame has an extra attribute `num.configs` specifying the number of
#' configurations.
#'
#' @examples
#'
#' # compare this with leaving no.expand empty
#' compare_clusterings_configs(
#' distances = pdc_configs("d", dtw_basic = list(window.size = 1L:2L, norm = c("L1", "L2"))),
#' centroids = pdc_configs("c", dba = list(window.size = 1L:2L, norm = c("L1", "L2"))),
#' no.expand = c("window.size", "norm")
#' )
#'
compare_clusterings_configs <- function(types = c("p", "h", "f"), k = 2L, controls = NULL,
preprocs = pdc_configs("preproc", none = list()),
distances = pdc_configs("distance", dtw_basic = list()),
centroids = pdc_configs("centroid", default = list()),
no.expand = character(0L))
{
# ==============================================================================================
# Start
# ==============================================================================================
types <- match.arg(types, supported_clusterings, TRUE)
# ----------------------------------------------------------------------------------------------
# Check controls specification
# ----------------------------------------------------------------------------------------------
if (is.null(controls)) {
controls <- lapply(types, function(type) { do.call(paste0(type, "_control"), list()) })
names(controls) <- types
}
else if (!is.list(controls) || is.null(names(controls))) {
stop("The 'controls' argument must be NULL or a named list")
}
else if (!all(types %in% names(controls))) {
stop("The names of the 'controls' argument do not correspond to the provided 'types'")
}
else {
controls <- controls[intersect(names(controls), types)]
}
# ----------------------------------------------------------------------------------------------
# Check preprocessings specification
# ----------------------------------------------------------------------------------------------
if (missing(preprocs))
force(preprocs)
else if (!is.list(preprocs) || (length(preprocs) > 0L && is.null(names(preprocs))))
stop("The 'preprocs' argument must be a list with named elements")
else if (!all(types %in% names(preprocs)))
stop("The names of the 'preprocs' argument do not correspond to the provided 'types'")
preprocs <- preprocs[intersect(names(preprocs), types)]
# ----------------------------------------------------------------------------------------------
# Check distance specification
# ----------------------------------------------------------------------------------------------
if (missing(distances))
force(distances)
else if (!is.list(distances) || (length(distances) > 0L && is.null(names(distances))))
stop("The 'distances' argument must be a list with named elements")
else if (!all(setdiff(types, "tadpole") %in% names(distances)))
stop("The names of the 'distances' argument do not correspond to the provided 'types'")
distances <- distances[intersect(names(distances), types)]
# ----------------------------------------------------------------------------------------------
# Check centroids specification
# ----------------------------------------------------------------------------------------------
if (missing(centroids))
force(centroids)
else if (!is.list(centroids) || (length(centroids) > 0L && is.null(names(centroids))))
stop("The 'centroids' argument must be a list with named elements")
else if (!all(types %in% names(centroids)))
stop("The names of the 'centroids' argument do not correspond to the provided 'types'")
centroids <- centroids[intersect(names(centroids), types)]
# ==============================================================================================
# Create configs
# ==============================================================================================
# return here
Map(types, controls[types], preprocs[types], distances[types], centroids[types],
f = function(type, control, preproc, distance, centroid) {
# --------------------------------------------------------------------------------------
# Control configs
# --------------------------------------------------------------------------------------
if (class(control) != control_classes[type]) stop("Invalid ", type, " control") # nocov
# if it's within a list, it's to prevent expansion
cfg <- switch(
type,
partitional = {
quoted_call(
expand.grid,
k = list(k),
pam.precompute = control$pam.precompute,
iter.max = control$iter.max,
nrep = control$nrep,
symmetric = control$symmetric,
version = control$version,
stringsAsFactors = FALSE
)
},
hierarchical = {
quoted_call(
expand.grid,
k = list(k),
method = list(control$method),
symmetric = control$symmetric,
stringsAsFactors = FALSE
)
},
fuzzy = {
quoted_call(
expand.grid,
k = list(k),
fuzziness = control$fuzziness,
iter.max = control$iter.max,
delta = control$delta,
symmetric = control$symmetric,
version = control$version,
stringsAsFactors = FALSE
)
},
tadpole = {
quoted_call(
expand.grid,
k = list(k),
dc = list(control$dc),
window.size = control$window.size,
lb = control$lb,
stringsAsFactors = FALSE
)
}
)
# --------------------------------------------------------------------------------------
# Merge configs
# --------------------------------------------------------------------------------------
need_adjustment <- character(0L)
# preproc
if (!is.null(preproc) && nrow(preproc)) {
nms <- names(preproc)
if (any(nms %in% no.expand)) need_adjustment <- c(need_adjustment, "preproc")
nms_args <- nms != "preproc" & !(nms %in% no.expand)
if (any(nms_args)) names(preproc)[nms_args] <- paste0(nms[nms_args], "_preproc")
cfg <- base::merge(cfg, preproc, all = TRUE)
}
# distance
if (type != "tadpole" && !is.null(distance) && nrow(distance)) {
nms <- names(distance)
if (any(nms %in% no.expand)) need_adjustment <- c(need_adjustment, "distance")
nms_args <- nms != "distance" & !(nms %in% no.expand)
if (any(nms_args)) names(distance)[nms_args] <- paste0(nms[nms_args], "_distance")
cfg <- base::merge(cfg, distance, all = TRUE)
}
# centroid
if (!is.null(centroid) && nrow(centroid)) {
nms <- names(centroid)
if (any(nms %in% no.expand)) need_adjustment <- c(need_adjustment, "centroid")
nms_args <- nms != "centroid" & !(nms %in% no.expand)
if (any(nms_args)) names(centroid)[nms_args] <- paste0(nms[nms_args], "_centroid")
cfg <- base::merge(cfg, centroid, all = TRUE)
}
# special case: tadpole
tadpole_controls <- names(formals(tadpole_control))
if (type == "tadpole" && any(no.expand %in% tadpole_controls)) {
need_adjustment <- c(need_adjustment, "tadpole")
tadpole <- cfg[, tadpole_controls, drop = FALSE]
}
# adjust no.expand columns
if (length(need_adjustment) > 0L) {
adjust_cols <- cfg[, no.expand, drop = FALSE]
cfg <- cfg[, -which(names(cfg) %in% no.expand), drop = FALSE]
adjusted_cols <- lapply(need_adjustment, function(suffix) {
pdc_cfg <- get_from_callers(suffix, mode = "list")
cols <- intersect(names(pdc_cfg), no.expand)
adjusted_cols <- adjust_cols[, cols, drop = FALSE]
if (suffix != "tadpole")
names(adjusted_cols) <- paste0(names(adjusted_cols), "_", suffix)
adjusted_cols
})
cfg <- dplyr::bind_cols(cfg, adjusted_cols)
}
# for info
attr(cfg, "num.configs") <- switch(
type,
partitional = length(k) * sum(cfg$nrep),
hierarchical = length(k) * length(cfg$method[[1L]]) * nrow(cfg),
fuzzy = length(k) * nrow(cfg),
tadpole = length(k) * length(cfg$dc[[1L]]) * nrow(cfg)
)
# return Map
cfg
})
}
#' Compare different clustering configurations
#'
#' Compare many different clustering algorithms with support for parallelization.
#'
#' @export
#' @importFrom dplyr bind_rows
#' @importFrom dplyr inner_join
#' @importFrom proxy pr_DB
#'
#' @param series A list of series, a numeric matrix or a data frame. Matrices and data frames are
#' coerced to a list row-wise (see [tslist()]).
#' @param types Clustering types. It must be any combination of (possibly abbreviated):
#' "partitional", "hierarchical", "fuzzy", "tadpole."
#' @param configs The list of data frames with the desired configurations to run. See
#' [pdc_configs()] and [compare_clusterings_configs()].
#' @param seed Seed for random reproducibility.
#' @param trace Logical indicating that more output should be printed to screen.
#' @param ... Further arguments for [tsclust()], `score.clus` or `pick.clus`.
#' @param score.clus A function that gets the list of results (and `...`) and scores each one. It
#' may also be a named list of functions, one for each type of clustering. See Scoring section.
#' @param pick.clus A function to pick the best result. See Picking section.
#' @param shuffle.configs Randomly shuffle the order of configs, which can be useful to balance load
#' when using parallel computation.
#' @param return.objects Logical indicating whether the objects returned by [tsclust()] should be
#' given in the result.
#' @param packages A character vector with the names of any packages needed for any functions used
#' (distance, centroid, preprocessing, etc.). The name "dtwclust" is added automatically. Relevant
#' for parallel computation.
#' @param .errorhandling This will be passed to [foreach::foreach()]. See Parallel section below.
#'
#' @details
#'
#' This function calls [tsclust()] with different configurations and evaluates the results with the
#' provided functions. Parallel support is included. See the examples.
#'
#' Parameters specified in `configs` whose values are `NA` will be ignored automatically.
#'
#' The scoring and picking functions are for convenience, if they are not specified, the `scores`
#' and `pick` elements of the result will be `NULL`.
#'
#' See [repeat_clustering()] for when `return.objects = FALSE`.
#'
#' @return
#'
#' A list with:
#'
#' - `results`: A list of data frames with the flattened configs and the corresponding scores
#' returned by `score.clus`.
#' - `scores`: The scores given by `score.clus`.
#' - `pick`: The object returned by `pick.clus`.
#' - `proc_time`: The measured execution time, using [base::proc.time()].
#' - `seeds`: A list of lists with the random seeds computed for each configuration.
#'
#' The cluster objects are also returned if `return.objects` `=` `TRUE`.
#'
#' @section Parallel computation:
#'
#' The configurations for each clustering type can be evaluated in parallel (multi-processing)
#' with the \pkg{foreach} package. A parallel backend can be registered, e.g., with
#' \pkg{doParallel}.
#'
#' If the `.errorhandling` parameter is changed to "pass" and a custom `score.clus` function is
#' used, said function should be able to deal with possible error objects.
#'
#' If it is changed to "remove", it might not be possible to attach the scores to the results data
#' frame, or it may be inconsistent. Additionally, if `return.objects` is `TRUE`, the names given
#' to the objects might also be inconsistent.
#'
#' Parallelization can incur a lot of deep copies of data when returning the cluster objects,
#' since each one will contain a copy of `datalist`. If you want to avoid this, consider
#' specifying `score.clus` and setting `return.objects` to `FALSE`, and then using
#' [repeat_clustering()].
#'
#' @section Scoring:
#'
#' The clustering results are organized in a *list of lists* in the following way (where only
#' applicable `types` exist; first-level list names in bold):
#'
#' - **partitional** - list with
#' + Clustering results from first partitional config
#' + etc.
#' - **hierarchical** - list with
#' + Clustering results from first hierarchical config
#' + etc.
#' - **fuzzy** - list with
#' + Clustering results from first fuzzy config
#' + etc.
#' - **tadpole** - list with
#' + Clustering results from first tadpole config
#' + etc.
#'
#' If `score.clus` is a function, it will be applied to the available partitional, hierarchical,
#' fuzzy and/or tadpole results via:
#'
#' ```
#' scores <- lapply(list_of_lists, score.clus, ...)
#' ```
#'
#' Otherwise, `score.clus` should be a list of functions with the same names as the list above, so
#' that `score.clus$partitional` is used to score `list_of_lists$partitional` and so on (via
#' [base::Map()]).
#'
#' Therefore, the scores returned shall always be a list of lists with first-level names as above.
#'
#' @section Picking:
#'
#' If `return.objects` is `TRUE`, the results' data frames and the list of [TSClusters-class]
#' objects are given to `pick.clus` as first and second arguments respectively, followed by `...`.
#' Otherwise, `pick.clus` will receive only the data frames and the contents of `...` (since the
#' objects will not be returned by the preceding step).
#'
#' @section Limitations:
#'
#' Note that the configurations returned by the helper functions assign special names to
#' preprocessing/distance/centroid arguments, and these names are used internally to recognize
#' them.
#'
#' If some of these arguments are more complex (e.g. matrices) and should *not* be expanded,
#' consider passing them directly via the ellipsis (`...`) instead of using [pdc_configs()]. This
#' assumes that said arguments can be passed to all functions without affecting their results.
#'
#' The distance matrices (if calculated) are not re-used across configurations. Given the way the
#' configurations are created, this shouldn't matter, because clusterings with arguments that can
#' use the same distance matrix are already grouped together by [compare_clusterings_configs()]
#' and [pdc_configs()].
#'
#' @author Alexis Sarda-Espinosa
#'
#' @seealso
#'
#' [compare_clusterings_configs()], [tsclust()]
#'
#' @example man-examples/comparison-examples.R
#'
compare_clusterings <- function(series = NULL, types = c("p", "h", "f", "t"),
configs = compare_clusterings_configs(types),
seed = NULL, trace = FALSE, ...,
score.clus = function(...) stop("No scoring"),
pick.clus = function(...) stop("No picking"),
shuffle.configs = FALSE, return.objects = FALSE,
packages = character(0L), .errorhandling = "stop")
{
# ==============================================================================================
# Start
# ==============================================================================================
tic <- proc.time()
handle_rngkind() # UTILS-rng.R
set.seed(seed)
score_missing <- missing(score.clus)
pick_missing <- missing(pick.clus)
# nocov start
if (is.null(series))
stop("No series provided.")
if (!return.objects && score_missing)
stop("Returning no objects and specifying no scoring function would return no useful results.")
types <- match.arg(types, supported_clusterings, TRUE)
.errorhandling <- match.arg(.errorhandling, c("stop", "remove", "pass"))
# coerce to list if necessary
if (is.data.frame(series) || !is.list(series))
series <- tslist(series, TRUE)
check_consistency(series, "vltslist")
if (!is.function(score.clus) && !(is.list(score.clus) && all(sapply(score.clus, is.function))))
stop("Invalid evaluation function(s)")
else if (is.list(score.clus)) {
if (!all(types %in% names(score.clus)))
stop("The names of the 'score.clus' argument do not correspond to the provided 'types'")
score.clus <- score.clus[types]
}
if (!is.function(pick.clus))
stop("Invalid pick function") # nocov end
# ----------------------------------------------------------------------------------------------
# Misc parameters
# ----------------------------------------------------------------------------------------------
packages <- unique(c("dtwclust", packages))
dots <- list(...)
configs <- configs[types]
if (any(sapply(configs, is.null)))
stop("The configuration for one of the chosen clustering types is missing.") # nocov
if (shuffle.configs) {
configs <- lapply(configs, function(config) {
config[sample(nrow(config)), , drop = FALSE]
})
}
# ----------------------------------------------------------------------------------------------
# Obtain random seeds
# ----------------------------------------------------------------------------------------------
num_seeds <- cumsum(sapply(configs, nrow))
seeds <- rng_seq(num_seeds[length(num_seeds)], seed = seed, simplify = FALSE) # UTILS-rng.R
seeds <- Map(c(1L, num_seeds[-length(num_seeds)] + 1L), num_seeds,
f = function(first, last) { seeds[first:last] })
setnames_inplace(seeds, names(configs)) # UTILS-utils.R
# ==============================================================================================
# Preprocessings
# ==============================================================================================
if (trace) message("=================================== Preprocessing ",
"series ===================================\n")
processed_series <- Map(configs, types, f = function(config, type) {
preproc_cols <- grepl("_?preproc$", names(config))
preproc_df <- unique(config[, preproc_cols, drop = FALSE])
preproc_args <- grepl("_preproc$", names(preproc_df))
if (trace) {
message("-------------- Applying ", type, " preprocessings: --------------")
print(preproc_df)
}
config$.preproc_id_ <- seq_len(nrow(config))
lapply(seq_len(nrow(preproc_df)), function(i) {
preproc_char <- preproc_df$preproc[i]
if (preproc_char != "none") {
# find all configs that have this preproc to assign them as attribute at the end
df <- dplyr::inner_join(config,
preproc_df[i, , drop = FALSE],
by = names(config)[preproc_cols])
preproc_fun <- get_from_callers(preproc_char, "function")
if (any(preproc_args)) {
this_config <- preproc_df[i, preproc_args, drop = FALSE]
names(this_config) <- sub("_preproc$", "", names(this_config))
preproc_args <- as.list(this_config)
preproc_args <- preproc_args[!sapply(preproc_args, is.na)]
}
else
preproc_args <- list()
ret <- quoted_call(preproc_fun, series, dots = preproc_args)
attr(ret, "config_ids") <- df$.preproc_id_
}
else {
ret <- series
}
ret
})
})
# UTILS-utils.R
setnames_inplace(processed_series, names(configs))
# ==============================================================================================
# Clusterings
# ==============================================================================================
if (trace) cat("\n")
objs_by_type <- Map(configs, names(configs), seeds, f = function(config, type, seeds) {
if (trace) message("=================================== Performing ",
type,
" clusterings ===================================\n")
series <- processed_series[[type]]
# ------------------------------------------------------------------------------------------
# distance entries to re-register in parallel workers
# ------------------------------------------------------------------------------------------
if (type != "tadpole") {
dist_names <- unique(config$distance)
dist_entries <- lapply(dist_names, function(dist) { proxy::pr_DB$get_entry(dist) })
setnames_inplace(dist_entries, dist_names)
}
# ------------------------------------------------------------------------------------------
# export any necessary preprocessing and centroid functions
# ------------------------------------------------------------------------------------------
custom_preprocs <- setdiff(unique(config$preproc), "none")
custom_centroids <- setdiff(unique(config$centroid), c("default", centroids_included))
for (custom_preproc in custom_preprocs)
assign(custom_preproc, get_from_callers(custom_preproc, "function"))
for (custom_centroid in custom_centroids)
assign(custom_centroid, get_from_callers(custom_centroid, "function"))
export <- c("trace", "score.clus", "return.objects",
"dots",
"centroids_included",
"check_consistency", "do_call", "quoted_call", "enlist", "subset_dots", "get_from_callers",
"setnames_inplace",
custom_preprocs, custom_centroids)
# ------------------------------------------------------------------------------------------
# perform clusterings
# ------------------------------------------------------------------------------------------
force(seeds)
i <- nrow(config)
objs <- foreach::foreach(
i = seq_len(i),
.combine = c,
.multicombine = TRUE,
.packages = packages,
.export = export,
.errorhandling = .errorhandling
) %op% {
cfg <- config[i, , drop = FALSE]
seed <- seeds[[i]]
if (trace) {
message("-------------- Using configuration: --------------")
print(cfg)
}
# ----------------------------------------------------------------------------------
# obtain args from configuration
# ----------------------------------------------------------------------------------
args <- lapply(c("preproc", "distance", "centroid"), function(func) {
col_ids <- grepl(paste0("_", func, "$"), names(cfg))
if (cfg[[func]] != "none" && any(col_ids)) {
this_args <- as.list(cfg[, col_ids, drop = FALSE])
names(this_args) <- sub(paste0("_", func, "$"),
"",
names(this_args))
ans <- this_args[!sapply(this_args, is.na)]
if (any(sapply(ans, is.list))) {
unlist(ans, recursive = FALSE)
}
else {
ans
}
}
else {
list()
}
})
setnames_inplace(args, c("preproc", "dist", "cent"))
args <- do_call("tsclust_args", args)
# ----------------------------------------------------------------------------------
# controls for this configuration
# ----------------------------------------------------------------------------------
control_fun_name <- paste0(type, "_control")
control_fun <- match.fun(control_fun_name)
control_args <- subset_dots(as.list(cfg), control_fun)
control_args <- lapply(control_args, unlist, recursive = FALSE)
control <- do_call(control_fun_name, control_args)
# ----------------------------------------------------------------------------------
# get processed series
# ----------------------------------------------------------------------------------
preproc_char <- cfg$preproc
config_ids <- lapply(series, attr, which = "config_ids")
if (preproc_char == "none")
this_series <- which(sapply(config_ids, is.null))
else
this_series <- which(sapply(config_ids, function(cfg_id) { i %in% cfg_id }))
if (length(this_series) > 1L) # nocov start
stop("Could not find unique processed series for ", type,
" clustering and config row=", i)
else # nocov end
this_series <- series[[this_series]]
# ----------------------------------------------------------------------------------
# distance entry to re-register in parallel worker
# ----------------------------------------------------------------------------------
if (type != "tadpole") {
distance <- cfg$distance
dist_entry <- dist_entries[[distance]]
if (!check_consistency(dist_entry$names[1L], "dist"))
do_call(proxy::pr_DB$set_entry, dist_entry) # nocov
}
else distance <- NULL # dummy
# ----------------------------------------------------------------------------------
# centroid for this configuration
# ----------------------------------------------------------------------------------
centroid_char <- cfg$centroid
# ----------------------------------------------------------------------------------
# call tsclust
# ----------------------------------------------------------------------------------
this_args <- enlist(series = this_series,
type = type,
k = unlist(cfg$k),
distance = distance,
seed = seed,
trace = trace,
args = args,
control = control,
error.check = FALSE,
dots = dots)
if (type == "tadpole")
this_args$distance <- NULL
if (centroid_char == "default") {
# do not specify centroid
tsc <- do_call("tsclust", this_args)
}
else if (type %in% c("partitional", "fuzzy") && centroid_char %in% centroids_included) {
# with included centroid
tsc <- quoted_call(tsclust, centroid = centroid_char, dots = this_args)
}
else {
# with centroid function
tsc <- quoted_call(tsclust,
centroid = get_from_callers(centroid_char, "function"),
dots = this_args)
}
if (inherits(tsc, "TSClusters"))
tsc <- list(tsc)
ret <- lapply(tsc, function(tsc) {
tsc@preproc <- preproc_char
if (preproc_char != "none")
tsc@family@preproc <- get_from_callers(preproc_char, "function")
if (centroid_char != "default")
tsc@centroid <- centroid_char
tsc
})
# ----------------------------------------------------------------------------------
# evaluate
# ----------------------------------------------------------------------------------
if (!return.objects) {
if (!is.function(score.clus)) score.clus <- score.clus[[type]]
ret <- list(quoted_call(score.clus, ret, dots = dots))
}
# return config result from foreach()
ret
}
class(objs) <- NULL
if (.errorhandling == "pass") {
failed_cfgs <- sapply(objs, function(obj) { !inherits(obj, "TSClusters") })
if (any(failed_cfgs)) {
warning("At least one of the ", type, " configurations resulted in an error.")
# a simple error is a list with 2 elements: message and call, so I need to re-pack
# each pair of elements in a single element of the objs list
which_failed <- which(failed_cfgs)[seq(from = 1L, by = 2L, length.out = sum(failed_cfgs) / 2)]
for (failed_cfg_id in which_failed) {
objs[[failed_cfg_id]] <- structure(objs[failed_cfg_id:(failed_cfg_id + 1L)],
class = c("simpleError", "error", "condition"))
}
names(objs)[which_failed] <- paste0("failure_", seq_along(which_failed))
objs[which_failed + 1L] <- NULL
}
}
objs
})
# ==============================================================================================
# Evaluations
# ==============================================================================================
if (return.objects) {
if (is.function(score.clus))
scores <- try(lapply(objs_by_type, score.clus, ...), silent = TRUE)
else
scores <- try(mapply(objs_by_type, score.clus[names(objs_by_type)],
SIMPLIFY = FALSE,
MoreArgs = dots,
FUN = function(objs, score_fun, ...) { score_fun(objs, ...) }),
silent = TRUE)
if (inherits(scores, "try-error")) {
if (!score_missing) warning("The score.clus function(s) did not execute successfully:\n",
attr(scores, "condition")$message)
scores <- NULL
}
}
else {
scores <- lapply(objs_by_type, function(objs) {
failed_cfgs <- sapply(objs, function(obj) { inherits(obj, "error") })
if (any(failed_cfgs))
passed_objs <- objs[!failed_cfgs]
else
passed_objs <- objs
if (length(passed_objs) == 0L) return(objs)
if (any(sapply(passed_objs, function(score) { is.null(dim(score)) })))
unlist(passed_objs, recursive = FALSE)
else
dplyr::bind_rows(lapply(passed_objs, base::as.data.frame))
})
}
# ==============================================================================================
# Data frame with results
# ==============================================================================================
# create initial IDs
i_cfg <- 1L
config_ids <- lapply(sapply(configs, nrow), function(nr) {
ids <- seq(from = i_cfg, by = 1L, length.out = nr)
i_cfg <<- i_cfg + nr
ids
})
# change to config names and assign to seeds (before flattening)
config_ids <- Map(config_ids, seeds, f = function(ids, seed) {
nms <- paste0("config", ids)
try(setnames_inplace(seed, nms), silent = TRUE)
nms
})
# flatten
configs_out <- Map(configs, config_ids, types, f = function(config, ids, type) {
config <- data.frame(config_id = ids, config, stringsAsFactors = FALSE)
k <- unlist(config$k[1L])
dfs <- switch(
type,
partitional = {
lapply(seq_len(nrow(config)), function(i) {
this_config <- config[i, , drop = FALSE]
rep <- 1L:this_config$nrep
this_config <- this_config[setdiff(names(this_config), c("k", "nrep"))]
df <- expand.grid(rep = rep, k = k)
make_unique_ids(df, this_config) # see EOF
})
},
hierarchical = {
lapply(seq_len(nrow(config)), function(i) {
this_config <- config[i, , drop = FALSE]
method <- unlist(this_config$method)
this_config <- this_config[setdiff(names(this_config), c("k", "method"))]
df <- expand.grid(method = method, k = k, stringsAsFactors = FALSE)[c("k", "method")]
make_unique_ids(df, this_config) # see EOF
})
},
fuzzy = {
lapply(seq_len(nrow(config)), function(i) {
this_config <- config[i, , drop = FALSE]
this_config <- this_config[setdiff(names(this_config), c("k"))]
df <- expand.grid(k = k)
make_unique_ids(df, this_config) # see EOF
})
},
tadpole = {
lapply(seq_len(nrow(config)), function(i) {
this_config <- config[i, , drop = FALSE]
dc <- unlist(this_config$dc)
this_config <- this_config[setdiff(names(this_config), c("k", "dc"))]
df <- expand.grid(k = k, dc = dc)
make_unique_ids(df, this_config) # see EOF
})
}
)
# return Map
dplyr::bind_rows(dfs)
})
# ----------------------------------------------------------------------------------------------
# Add scores and pick
# ----------------------------------------------------------------------------------------------
# in case ordering is required below
if (shuffle.configs)
configs_cols <- lapply(configs_out, function(config) {
setdiff(colnames(config), c("config_id", "rep", "k", "method", "dc", "window.size", "lb"))
})
if (!is.null(scores)) {
results <- try(Map(configs_out, scores,
f = function(config, score) {
cbind(config, base::as.data.frame(score))
}),
silent = TRUE)
if (inherits(results, "try-error")) {
warning("The scores could not be appended to the results data frame:\n",
attr(results, "condition")$message)
results <- configs_out
pick <- NULL
}
else {
if (return.objects) {
pick <- try(pick.clus(results, objs_by_type, ...), silent = TRUE)
if (inherits(pick, "try-error")) {
if (!pick_missing) warning("The pick.clus function did not execute successfully:\n",
attr(pick, "condition")$message)
pick <- NULL
}
}
else {
pick <- try(pick.clus(results, ...), silent = TRUE)
if (inherits(pick, "try-error")) {
if (!pick_missing) warning("The pick.clus function did not execute successfully:\n",
attr(pick, "condition")$message)
pick <- NULL
}
}
}
}
else {
results <- configs_out
pick <- NULL
}
# ==============================================================================================
# List with all results
# ==============================================================================================
results <- list(results = results,
scores = scores,
pick = pick,
proc_time = proc.time() - tic,
seeds = seeds)
if (return.objects) {
setnames_res <- try(Map(objs_by_type, results$results,
f = function(objs, res) { setnames_inplace(objs, res$config_id) }),
silent = TRUE)
if (inherits(setnames_res, "try-error"))
warning("Could not assign names to returned objects:\n",
attr(setnames_res, "condition")$message)
results <- c(results, objects = objs_by_type)
}
if (shuffle.configs)
results$results <- Map(results$results, configs_cols[names(results$results)],
f = function(result, cols) {
order_args <- as.list(result[cols])
names(order_args) <- NULL
base_order <- base::order
result[do_call("base_order", order_args), , drop = FALSE]
})
# return results
results
}
# ==================================================================================================
# compare_clusterings helpers
# ==================================================================================================
make_unique_ids <- function(df, this_config) {
rownames(this_config) <- NULL
this_config <- cbind(this_config[, 1L, drop = FALSE], df, this_config[, -1L, drop = FALSE])
nr <- nrow(this_config)
if (nr > 1L) this_config$config_id <- paste0(this_config$config_id, "_", 1L:nr)
this_config
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.