Nothing
#######################################################################
# stream - Infrastructure for Data Stream Mining
# Copyright (C) 2018 Matthias Carnein
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#' evoStream - Evolutionary Stream Clustering
#'
#' Micro Clusterer with reclustering.
#' Stream clustering algorithm based on evolutionary optimization.
#'
#' The online component uses a simplified version of [DBSTREAM] to
#' generate micro-clusters. The micro-clusters are then incrementally
#' reclustered using an evolutionary algorithm. Evolutionary algorithms create
#' slight variations by combining and randomly modifying existing solutions. By
#' iteratively selecting better solutions, an evolutionary pressure is created
#' which improves the clustering over time. Since the evolutionary algorithm is
#' incremental, it is possible to apply it between observations, e.g. in the
#' idle time of the stream. Whenever there is idle time, we can call the
#' [recluster()] function of the reference class to improve the
#' macro-clusters (see example). The evolutionary algorithm can also be applied
#' as a traditional reclustering step, or a combination of both. In addition,
#' this implementation also allows to evaluate a fixed number of generations
#' after each observation.
#'
#' @family DSC_Micro
#' @family DSC_TwoStage
#'
#' @param formula `NULL` to use all features in the stream or a model [formula] of the form `~ X1 + X2`
#' to specify the features used for clustering. Only `.`, `+` and `-` are currently
#' supported in the formula.
#' @param r radius threshold for micro-cluster assignment
#' @param lambda decay rate
#' @param tgap time-interval between outlier detection and clean-up
#' @param k number of macro-clusters
#' @param crossoverRate cross-over rate for the evolutionary algorithm
#' @param mutationRate mutation rate for the evolutionary algorithm
#' @param populationSize number of solutions that the evolutionary algorithm
#' maintains
#' @param initializeAfter number of micro-cluster required for the
#' initialization of the evolutionary algorithm.
#' @param incrementalGenerations number of EA generations performed after each
#' observation
#' @param reclusterGenerations number of EA generations performed during
#' reclustering
#' @author Matthias Carnein \email{Matthias.Carnein@@uni-muenster.de}
#' @references
#' Carnein M. and Trautmann H. (2018), "evoStream - Evolutionary
#' Stream Clustering Utilizing Idle Times", Big Data Research.
#' @examples
#' stream <- DSD_Gaussians(k = 3, d = 2) %>% DSD_Memory(n = 500)
#'
#' ## init evoStream
#' evoStream <- DSC_evoStream(r = 0.05, k = 3,
#' incrementalGenerations = 1, reclusterGenerations = 500)
#'
#' ## insert observations
#' update(evoStream, stream, n = 500)
#'
#' ## micro clusters
#' get_centers(evoStream, type = "micro")
#'
#' ## micro weights
#' get_weights(evoStream, type = "micro")
#'
#' ## macro clusters
#' get_centers(evoStream, type = "macro")
#'
#' ## macro weights
#' get_weights(evoStream, type = "macro")
#'
#' ## plot result
#' reset_stream(stream)
#' plot(evoStream, stream)
#'
#' ## if we have time, then we can evaluate additional generations.
#' ## This can be called at any time, also between observations.
#' ## by default, 1 generation is evaluated after each observation and
#' ## 1000 generations during reclustering but we set it here to 500
#' evoStream$RObj$recluster(500)
#'
#' ## plot improved result
#' reset_stream(stream)
#' plot(evoStream, stream)
#'
#' ## get assignment of micro to macro clusters
#' microToMacro(evoStream)
#' @export
DSC_evoStream <-
function(formula = NULL,
r,
lambda = 0.001,
tgap = 100,
k = 2,
crossoverRate = .8,
mutationRate = .001,
populationSize = 100,
initializeAfter = 2 * k,
incrementalGenerations = 1,
reclusterGenerations = 1000) {
evoStream <-
evoStream_R$new(
r,
lambda,
tgap,
k,
crossoverRate,
mutationRate,
populationSize,
initializeAfter,
incrementalGenerations,
reclusterGenerations
)
structure(
list(description = "evoStream - Evolutionary Stream Clustering",
formula = formula,
RObj = evoStream),
class = c("DSC_evoStream", "DSC_Micro", "DSC_R", "DSC")
)
}
evoStream_R <- setRefClass("evoStream_R", fields = list(C = "ANY", colnames = "ANY"))
evoStream_R$methods(
initialize = function(r,
lambda,
tgap,
k,
crossoverRate,
mutationRate,
populationSize,
initializeAfter,
incrementalGenerations,
reclusterGenerations) {
C <<- new(EvoStream) ## Exposed C class
C$setFields(
r,
lambda,
tgap,
k,
crossoverRate,
mutationRate,
populationSize,
initializeAfter,
incrementalGenerations,
reclusterGenerations
) ## since exposed constructors have limited parameters
colnames <<- NULL
.self
}
)
evoStream_R$methods(
cluster = function(newdata) {
.self$C$cluster(as.matrix(newdata))
NULL
}
)
evoStream_R$methods(
get_microclusters = function() {
centers <- as.data.frame(.self$C$get_microclusters())
colnames(centers) <- colnames
centers
}
)
evoStream_R$methods(
get_microweights = function() {
.self$C$get_microweights()
}
)
evoStream_R$methods(
get_macroclusters = function() {
centers <- as.data.frame(.self$C$get_macroclusters())
colnames(centers) <- colnames
centers
}
)
evoStream_R$methods(
get_macroweights = function() {
.self$C$get_macroweights()
}
)
evoStream_R$methods(
microToMacro = function(micro = NULL) {
clusterAssignment = .self$C$microToMacro() + 1
if (!is.null(micro)) {
return(clusterAssignment[micro])
} else{
return(clusterAssignment)
}
}
)
evoStream_R$methods(
recluster = function(generations = 1) {
.self$C$recluster(generations)
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.