R/RcppExports.R

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Symbolic Aggregate Aproximation
#'
#' Converts a numeric time series to a series of discrete values (here: integers,
#' but could also be letters or other symbols), depending on where would they be
#' placed in the series of intervals called \code{limits}. Inspired by the pure R
#' implementation in \code{TSclust::convert.to.SAX.symbol()}, but faster because
#' C++ code.
#'
#' @section References:
#'
#' Lin, J., Keogh, E., Lonardi, S. & Chiu, B. (2003). A symbolic representation
#' of time series, with implications for streaming algorithms. In \emph{Proceedings
#' of the 8th acm sigmod workshop on research issues in data mining and knowledge
#' discovery} (pp. 2-11). ACM.
#'
#' @param x Numeric vector/time series.
#' @param limits The interval boundaries which determine the mapping to symbols.
#' A time series value in the i-th interval will be mapped to the value i. Your
#' limits should be a monotonically increasing vector, starting from negative
#' infinity and ending with positive infinity (length = number of symbols + 1).
#' @return The series converted to integer values.
#' @family SAX functions
#' @export
SAX_fast <- function(x, limits) {
    .Call(`_FastTSDistances_SAX_fast`, x, limits)
}

#' Piecewise Aggregate Approximation
#'
#' Divides a time series into \code{windowCount} frames of equal length and
#' represents each interval by its mean. If the time series length is not
#' divisible by the number of windows, one element might belong to two windows,
#' but only with a certain fraction to each. Inspired by the pure R implementation
#' in \code{TSclust::PAA()}, but faster because C++ code.
#'
#' @section References:
#'
#' Keogh, E. J. & Pazzani, M. J. (2000). Scaling up dynamic time warping for
#' datamining applications. In \emph{Proceedings of the sixth acm sigkdd
#' international conference on knowledge discovery and data mining} (pp. 285-289).
#' ACM.
#'
#' Keogh, E., Chakrabarti, K., Pazzani, M. & Mehrotra, S. (2001). Dimensionality
#' reduction for fast similarity search in large time series databases.
#' \emph{Knowledge and information Systems}, 3(3), 263-286.
#'
#' @param x Numeric vector/time series.
#' @param windowCount The number of windows for the shortened time series.
#' @return A numeric vector of the length \code{windowCount}.
#' @family piecewise aggregation functions
#' @export
PAA_fast <- function(x, windowCount) {
    .Call(`_FastTSDistances_PAA_fast`, x, windowCount)
}

#' Piecewise Maximum Aggregate Approximation
#'
#' Divides a time series into \code{windowCount} frames of equal length and
#' represents each interval by its maximum. If the time series length is not
#' divisible by the number of windows, elements still assigned uniquely to one
#' window each.
#'
#' The function is similar to \code{\link{PAA_fast}}, but simply uses a different
#' aggregate.
#'
#' @param x Numeric vector/time series.
#' @param windowCount The number of windows for the shortened time series.
#' @return A numeric vector of the length \code{windowCount}.
#' @family piecewise aggregation functions
#' @export
PMaxAA_fast <- function(x, windowCount) {
    .Call(`_FastTSDistances_PMaxAA_fast`, x, windowCount)
}

#' Piecewise Median Aggregate Approximation
#'
#' Divides a time series into \code{windowCount} frames of equal length and
#' represents each interval by its median. If the time series length is not
#' divisible by the number of windows, elements still assigned uniquely to one
#' window each.
#'
#' The function is similar to \code{\link{PAA_fast}}, but simply uses a different
#' aggregate.
#'
#' @param x Numeric vector/time series.
#' @param windowCount The number of windows for the shortened time series.
#' @return A numeric vector of the length \code{windowCount}.
#' @family piecewise aggregation functions
#' @export
PMedAA_fast <- function(x, windowCount) {
    .Call(`_FastTSDistances_PMedAA_fast`, x, windowCount)
}

#' Piecewise Minimum Aggregate Approximation
#'
#' Divides a time series into \code{windowCount} frames of equal length and
#' represents each interval by its minimum. If the time series length is not
#' divisible by the number of windows, elements still assigned uniquely to one
#' window each.
#'
#' The function is similar to \code{\link{PAA_fast}}, but simply uses a different
#' aggregate.
#'
#' @param x Numeric vector/time series.
#' @param windowCount The number of windows for the shortened time series.
#' @return A numeric vector of the length \code{windowCount}.
#' @family piecewise aggregation functions
#' @export
PMinAA_fast <- function(x, windowCount) {
    .Call(`_FastTSDistances_PMinAA_fast`, x, windowCount)
}

#' Piecewise Standard Deviation Aggregate Approximation
#'
#' Divides a time series into \code{windowCount} frames of equal length and
#' represents each interval by its standard deviation. If the time series length
#' is not divisible by the number of windows, one element might belong to two
#' windows, but only with a certain fraction to each.
#'
#' The function is similar to \code{\link{PAA_fast}}, but simply uses a different
#' aggregate.
#'
#' @param x Numeric vector/time series.
#' @param windowCount The number of windows for the shortened time series.
#' @param sample Compute sample standard deviation instead of population
#' standard deviation (divide by n-1 instead of n).
#' @return A numeric vector of the length \code{windowCount}.
#' @family piecewise aggregation functions
#' @export
PSDAA_fast <- function(x, windowCount, sample = FALSE) {
    .Call(`_FastTSDistances_PSDAA_fast`, x, windowCount, sample)
}

#' Mean with Weighted First And Last Element
#'
#' Calculates the mean of a sub-vector, weighting start and end if the corresponding
#' indices are no whole numbers.
#'
#' @param x Numeric vector.
#' @param startIdx Index of first element of sub-vector.
#' @param endIdx Index of last element of sub-vector.
#' @return The weighted mean as double.
subVectorMean_fast <- function(x, startIdx, endIdx) {
    .Call(`_FastTSDistances_subVectorMean_fast`, x, startIdx, endIdx)
}

#' Piecewise Skewness Aggregate Approximation
#'
#' Divides a time series into \code{windowCount} frames of equal length and
#' represents each interval by its skewness. If the time series length is not
#' divisible by the number of windows, one element might belong to two windows,
#' but only with a certain fraction to each.
#'
#' The function is similar to \code{\link{PAA_fast}}, but simply uses a different
#' aggregate.
#'
#' @param x Numeric vector/time series.
#' @param windowCount The number of windows for the shortened time series.
#' @param nanReplace All NaN elements will be replaced with this value (default: NaN).
#' NaNs can occur if a segment is constant.
#' @return A numeric vector of the length \code{windowCount}.
#' @family piecewise aggregation functions
#' @export
PSkewAA_fast <- function(x, windowCount, nanReplace = NA_real_) {
    .Call(`_FastTSDistances_PSkewAA_fast`, x, windowCount, nanReplace)
}

#' Piecewise Kurtosis Aggregate Approximation
#'
#' Divides a time series into \code{windowCount} frames of equal length and
#' represents each interval by its kurtosis. If the time series length is not
#' divisible by the number of windows, one element might belong to two windows,
#' but only with a certain fraction to each.
#'
#' The function is similar to \code{\link{PAA_fast}}, but simply uses a different
#' aggregate.
#'
#' @param x Numeric vector/time series.
#' @param windowCount The number of windows for the shortened time series.
#' @param nanReplace All NaN elements will be replaced with this value (default: NaN).
#' NaNs can occur if a segment is constant.
#' @param excess Compute excess kurtosis? (kurtosis - 3, is zero for normal distribution)
#' @return A numeric vector of the length \code{windowCount}.
#' @family piecewise aggregation functions
#' @export
PKurtAA_fast <- function(x, windowCount, nanReplace = NA_real_, excess = FALSE) {
    .Call(`_FastTSDistances_PKurtAA_fast`, x, windowCount, nanReplace, excess)
}

#' Average Univariate Time Series
#'
#' Multiplies a list of univariate time series with a weight vector and
#' sums up the result to get a weighted average.
#'
#' @param tsList A list of NumericVectors all having the same length.
#' @param weights A vector of weights, having the same length as the list.
#' @return The weighted average of the series from the list.
#' @export
averageTimeSeries_fast <- function(tsList, weights) {
    .Call(`_FastTSDistances_averageTimeSeries_fast`, tsList, weights)
}

#' Average Multivariate Time Series
#'
#' Multiplies a list of multivariate time series with a weight vector and
#' sums up the result to get a weighted average.
#'
#' @param tsList A list of NumericMatrix all having the same length.
#' @param weights A vector of weights, having the same length as the list.
#' @return The weighted average of the series from the list.
#' @export
averageTimeSeriesMult_fast <- function(tsList, weights) {
    .Call(`_FastTSDistances_averageTimeSeriesMult_fast`, tsList, weights)
}

#' L2 Complexity Correction Factor for a Time Series Distance
#'
#' Calculates the complexity correction factor for the distance between two time
#' series, using the L2 norm of each time series' diff vector as complexity
#' estimate. Can be combined with any distance as a scaling factor (distances
#' between vectors of different complexity become more prominent). Does not obey
#' the triangular equality if its combined with the Euclidean distance, but a
#' relaxed version (see reference).
#'
#' This factor is currently integrated as a parameter into the L2 distance and
#' dynamic time warping distance of this package.
#'
#' @section References:
#' Batista, G. E., Keogh, E. J., Tataw, O. M. & De Souza, V. M. (2014). Cid: An
#' efficient complexity-invariant distance for time series. \emph{Data Mining and
#' Knowledge Discovery, 28}(3), 634-669.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @return The complexity correction factor as double. Is infinity if one series
#' is constant and the other one not.
#' @seealso \code{\link{l2Dist_fast}}, \code{\link{DTWDist_fast}}
#' @export
l2CompCorFactor_fast <- function(x, y) {
    .Call(`_FastTSDistances_l2CompCorFactor_fast`, x, y)
}

#' Multi-variate L2 Complexity Correction Factor
#'
#' Calculates the complexity correction factor for the distance between two
#' multi-variate time series, using the L2 norm of each time series' attributes'
#' diff vectors as complexity estimate. Can be combined with any distance as a
#' scaling factor (distances between vectors of different complexity become more
#' prominent). Does not obey the triangular equality if its combined with the
#' Euclidean distance, but a relaxed version (see reference).
#'
#' This factor is currently integrated as a parameter into the L2 distance and
#' dynamic time warping distance of this package.
#'
#' @section References:
#'
#' Batista, G. E., Keogh, E. J., Tataw, O. M. & De Souza, V. M. (2014). Cid: An
#' efficient complexity-invariant distance for time series. \emph{Data Mining and
#' Knowledge Discovery, 28}(3), 634-669.
#'
#' Kotsakos, D., Trajcevski, G., Gunopulos, D. & Aggarwal, C. C. (2014). Time-series
#' data clustering. In C. C. Aggarwal & C. K. Reddy (Eds.), \emph{Data clustering :
#' Algorithms and applications} (pp. 357–380). Chapman & Hall/CRC data mining and
#' knowledge discovery series. Boca Raton: CRC Press.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric vector/multi-variate time series.
#' @return The complexity correction factor as double. Is infinity if one series
#' is constant in all attributes and the other one not.
#' @seealso \code{\link{l2Dist_fast}}, \code{\link{DTWDist_fast}}
#' @export
l2CompCorFactorMult_fast <- function(x, y) {
    .Call(`_FastTSDistances_l2CompCorFactorMult_fast`, x, y)
}

#' (Fast) L2 Norm
#'
#' Computes the standard Euclidean norm with a fast C++ implementation.
#'
#' @param x A numeric vector/time series.
#' @return The norm as double.
#' @family L_p distances
#' @export
l2Norm_fast <- function(x) {
    .Call(`_FastTSDistances_l2Norm_fast`, x)
}

#' Temporal Correlation
#'
#' Calculates the temporal correlation (correlation between the difference vectors)
#' as defined by Chouakria and Nagabhushan (2007). We additionally set the
#' correlation between two constant series to 1 and between a constant and a
#' non-constant one to 0 to guarantee that the result is always a proper (aka
#' not NaN, not infinity) number.
#'
#' @section References:
#'
#' Chouakria, A. D. & Nagabhushan, P. N. (2007). Adaptive dissimilarity index
#' for measuring time series proximity. \emph{Advances in Data Analysis and
#' Classification, 1}(1), 5-21.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @return The temporal correlation as double from the range [-1,1].
tempCor_fast <- function(x, y) {
    .Call(`_FastTSDistances_tempCor_fast`, x, y)
}

#' Temporal Correlation-Based Correction Factor for a Time Series Distance
#'
#' Considers the dissimilarity of two time series regarding their behavior,
#' namely if they move in the same direction (diff vectors used) at the
#' different points in time. This correlation between [-1,1] is scaled with
#' the exponential function to (0,2) (depending on \code{k}) and should be
#' multiplied with another dissimilarity to enhance it with this behavioral
#' information. Introduced by  Chouakria and Nagabhushan (2007).
#'
#' This factor is currently integrated as a parameter into the L2 distance and
#' dynamic time warping distance of this package.
#'
#' @section References:
#'
#' Chouakria, A. D. & Nagabhushan, P. N. (2007). Adaptive dissimilarity index
#' for measuring time series proximity. \emph{Advances in Data Analysis and
#' Classification, 1}(1), 5-21.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param k A non-negative constant for scaling.
#' @return The correlation-based scaling factor as double from the range (0,2).
#' @export
cortFactor_fast <- function(x, y, k = 2) {
    .Call(`_FastTSDistances_cortFactor_fast`, x, y, k)
}

#' Multi-variate Temporal Correlation-Based Correction Factor
#'
#' Considers the dissimilarity of two time series regarding their behavior,
#' namely if they move in the same direction (diff vectors used) at the
#' different points in time. This correlation between [-1,1] is firstly
#' calculated for each dimension/attribute separately and then averaged.
#' It is scaled with the exponential function to (0,2) (depending on \code{k})
#' and should be multiplied with another dissimilarity to enhance it with
#' this behavioral information. Introduced by Chouakria and Nagabhushan (2007)
#' for the uni-variate case.
#'
#' This factor is currently integrated as a parameter into the L2 distance and
#' dynamic time warping distance of this package.
#'
#' @section References:
#'
#' Chouakria, A. D. & Nagabhushan, P. N. (2007). Adaptive dissimilarity index
#' for measuring time series proximity. \emph{Advances in Data Analysis and
#' Classification, 1}(1), 5-21.
#'
#' Kotsakos, D., Trajcevski, G., Gunopulos, D. & Aggarwal, C. C. (2014). Time-series
#' data clustering. In C. C. Aggarwal & C. K. Reddy (Eds.), \emph{Data clustering :
#' Algorithms and applications} (pp. 357–380). Chapman & Hall/CRC data mining and
#' knowledge discovery series. Boca Raton: CRC Press.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @param k A non-negative constant for scaling.
#' @return The correlation-based scaling factor as double from the range (0,2).
#' @export
cortFactorMult_fast <- function(x, y, k = 2) {
    .Call(`_FastTSDistances_cortFactorMult_fast`, x, y, k)
}

#' Pairwise Absolute Distance
#'
#' Computes the pairwise absolute distance between two numeric vectors
#' (e.g. univariate time series)
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @return An |x|*|y| matrix containing the pairwise absolute distances.
#' @export
vectorCrossDistMat <- function(x, y) {
    .Call(`_FastTSDistances_vectorCrossDistMat`, x, y)
}

#' (Fast) Correlation-based Dissimilarity
#'
#' Computes correlation-based dissimilarity as described by Golay et al. (1998).
#' The coding is inspired by the \code{TSclust::diss.cor()} method, but faster
#' because of the the C++ implementation.
#'
#' @section References:
#'
#' Golay, X., Kollias, S., Stoll, G., Meier, D., Valavanis, A. & Boesiger, P.
#' (1998). A new correlation-based fuzzy logic clustering algorithm for fmri.
#' \emph{Magnetic Resonance in Medicine, 40}(2), 249-260.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param beta If this parameter is smaller/equal zero, the formula \eqn{dist(x,y)=
#' \sqrt{2*(1-cor(x,y))}}{dist(x,y) = sqrt(2*(1-cor(x,y)))} is used (d1 in the
#' paper, equals \eqn{\frac{l2Dist(x,y)}{\sqrt{n}}}{l2Dist(x,y)/sqrt(n)} if time
#' series are z-standardized), otherwise
#' \eqn{dist(x,y)=\sqrt{(\frac{1-cor(x,y)}{1+cor(x,y)})^{\beta}}}{dist(x,y) =
#' sqrt(((1-cor(x,y))/(1+cor(x,y)))^beta)} (called d2 in the paper).
#' @return The dissimilarity as double in the range [0,sqrt(2)] if \code{beta == 0}
#' and [0,Inf] otherwise. Is NaN if at least one series is constant.
#' @export
corDist_fast <- function(x, y, beta = 0) {
    .Call(`_FastTSDistances_corDist_fast`, x, y, beta)
}

#' (Fast) Edit Distance on Real Sequence
#'
#' Computes the Edit distance on Real Sequence as described by Chen, Özsu
#' and Oria (2005). A match between two (real-valued) time series elements exists
#' if their L1 distance is below an \code{epsilon}. Apart from that, the
#' computation is similar to the standard edit distance. The coding is inspired
#' by the \code{TSdist::EDRDistance()} method, but faster because point-to-point
#' distances computation is integrated into the C++ code.
#'
#' Despite the name, it is not really a distance in the strict sense, as EDR
#' violates the triangular inequality.
#'
#' @section References:
#' Chen, L., Özsu, M. T. & Oria, V. (2005). Robust and fast similarity search
#' for moving object trajectories. In \emph{Proceedings of the 2005 acm sigmod
#' international conference on management of data} (pp. 491–502). ACM.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param epsilon Maximum distance between two time series elements to count a
#' match.
#' @param normalize Normalize the result to [0,1] considering the maximum
#' possible dissimilarity.
#' @return The distance as double.
#' @family Edit distance functions
#' @export
EDRDist_fast <- function(x, y, epsilon, normalize = FALSE) {
    .Call(`_FastTSDistances_EDRDist_fast`, x, y, epsilon, normalize)
}

#' (Fast) Multi-variate Edit Distance on Real Sequence
#'
#' Computes the Edit distance on Real Sequence as described by Chen, Özsu
#' and Oria (2005). A match between two time series elements exists
#' if the L1 distances between corresponding attributes are all is below an
#' \code{epsilon}. Apart from that, the computation is similar to the standard
#' edit distance. The coding is inspired by the \code{TSdist::EDRDistance()}
#' method, but faster because point-to-point distances computation is integrated
#' into the C++ code.
#'
#' Despite the name, it is not really a distance in the strict sense, as EDR
#' violates the triangular inequality.
#'
#' @section References:
#' Chen, L., Özsu, M. T. & Oria, V. (2005). Robust and fast similarity search
#' for moving object trajectories. In \emph{Proceedings of the 2005 acm sigmod
#' international conference on management of data} (pp. 491–502). ACM.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @param epsilon Maximum distance between two time series elements to count a
#' match.
#' @param normalize Normalize the result to [0,1] considering the maximum
#' possible dissimilarity.
#' @return The distance as double.
#' @family Edit distance functions
#' @export
EDRDistMult_fast <- function(x, y, epsilon, normalize = FALSE) {
    .Call(`_FastTSDistances_EDRDistMult_fast`, x, y, epsilon, normalize)
}

#' (Fast) Edit Distance on Real Sequence and Sakoe-Chiba Window
#'
#' Computes the Edit distance on Real Sequence as described by Chen, Özsu
#' and Oria (2005), constraining the possible matches to a maximum index
#' difference of \code{windowSize} as desribed by Sakoe and Chiba (1978).
#' The coding is inspired by the \code{TSdist::EDRDistance()} method, but
#' faster because point-to-point distances computation is integrated into the
#' C++ code.
#'
#' Despite the name, it is not really a distance in the strict sense, as EDR
#' violates the triangular inequality.
#'
#' @section References:
#' Chen, L., Özsu, M. T. & Oria, V. (2005). Robust and fast similarity search
#' for moving object trajectories. In \emph{Proceedings of the 2005 acm sigmod
#' international conference on management of data} (pp. 491–502). ACM.
#'
#' Sakoe, H., & Chiba, S. (1978). Dynamic programming algorithm optimization
#' for spoken word recognition. \emph{IEEE transactions on acoustics, speech,
#' and signal processing, 26}(1), 43-49.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param epsilon Maximum distance between two time series elements to count a
#' match.
#' @param windowSize The maximum index difference which is considered when
#' matching elements
#' @return The distance as double (not-a-number if matching is not possible
#' as the time series lengths differ more than \code{windowSize}).
#' @family Edit distance functions
#' @export
EDRDistSakoeChiba_fast <- function(x, y, epsilon, windowSize) {
    .Call(`_FastTSDistances_EDRDistSakoeChiba_fast`, x, y, epsilon, windowSize)
}

#' (Fast) Multi-variate Edit Distance on Real Sequence and Sakoe-Chiba Window
#'
#' Computes the Edit distance on Real Sequence as described by Chen, Özsu
#' and Oria (2005), constraining the possible matches to a maximum index
#' difference of \code{windowSize} as desribed by Sakoe and Chiba (1978).
#' The coding is inspired by the \code{TSdist::EDRDistance()} method, but
#' faster because point-to-point distances computation is integrated into the
#' C++ code.
#'
#' Despite the name, it is not really a distance in the strict sense, as EDR
#' violates the triangular inequality.
#'
#' @section References:
#' Chen, L., Özsu, M. T. & Oria, V. (2005). Robust and fast similarity search
#' for moving object trajectories. In \emph{Proceedings of the 2005 acm sigmod
#' international conference on management of data} (pp. 491–502). ACM.
#'
#' Sakoe, H., & Chiba, S. (1978). Dynamic programming algorithm optimization
#' for spoken word recognition. \emph{IEEE transactions on acoustics, speech,
#' and signal processing, 26}(1), 43-49.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @param epsilon Maximum distance between two time series elements to count a
#' match.
#' @param windowSize The maximum index difference which is considered when
#' matching elements
#' @return The distance as double (not-a-number if matching is not possible
#' as the time series lengths differ more than \code{windowSize}).
#' @family Edit distance functions
#' @export
EDRDistSakoeChibaMult_fast <- function(x, y, epsilon, windowSize) {
    .Call(`_FastTSDistances_EDRDistSakoeChibaMult_fast`, x, y, epsilon, windowSize)
}

#' (Fast) Edit Distance with Real Penalty
#'
#' Computes the Edit distance with real penalty as described by Chen and Ng
#' (2004). The coding is inspired by the \code{TSdist::ERPDistance()} method,
#' but faster because point-to-point distances computation is integrated into
#' the C++ code.
#'
#' @section References:
#' Chen, L., & Ng, R. (2004, August). On the marriage of lp-norms and edit
#' distance. In \emph{Proceedings of the Thirtieth international conference
#' on Very large data bases-Volume 30} (pp. 792-803). VLDB Endowment.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param gapValue If an element of one series is not matched to the other
#' series, its distance to the gapValue is computed instead (0 might be a
#' sensible default for standardized series).
#' @return The distance as double.
#' @family Edit distance functions
#' @export
ERPDist <- function(x, y, gapValue) {
    .Call(`_FastTSDistances_ERPDist`, x, y, gapValue)
}

#' (Even Faster) Edit Distance with Real Penalty
#'
#' Faster version of\link{ERPDist} which uses a cyclic access strategy
#' with a smaller cost matrix; inspired by the C implementation of dynamic
#' time warping in \code{dtwclust::dtw_basic()}.
#'
#' @section References:
#' Chen, L., & Ng, R. (2004, August). On the marriage of lp-norms and edit
#' distance. In \emph{Proceedings of the Thirtieth international conference
#' on Very large data bases-Volume 30} (pp. 792-803). VLDB Endowment.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param gapValue If an element of one series is not matched to the other
#' series, its distance to the gapValue is computed instead (0 might be a
#' sensible default for standardized series).
#' @param normalize Divide by the length of the longer time series (= minimum
#' amount of assignment steps) to account for series of different lengths in
#' your dataset.
#' @return The distance as double.
#' @family Edit distance functions
#' @export
ERPDist_fast <- function(x, y, gapValue, normalize = FALSE) {
    .Call(`_FastTSDistances_ERPDist_fast`, x, y, gapValue, normalize)
}

#' (Fast) Multi-variate Edit Distance with Real Penalty
#'
#' Multi-variate version of \code{\link{ERPDist_fast}}. Uses the L1 norm for
#' point-to-point distance computations.
#'
#' @section References:
#' Chen, L., & Ng, R. (2004, August). On the marriage of lp-norms and edit
#' distance. In \emph{Proceedings of the Thirtieth international conference
#' on Very large data bases-Volume 30} (pp. 792-803). VLDB Endowment.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @param gapValue If an element of one series is not matched to the other
#' series, its distance to the gapValue is computed instead (0 might be a
#' sensible default for standardized series).
#' @param normalize Divide by the length of the longer time series (= minimum
#' amount of assignment steps) to account for series of different lengths in
#' your dataset.
#' @return The distance as double.
#' @family Edit distance functions
#' @export
ERPDistMult_fast <- function(x, y, gapValue, normalize = FALSE) {
    .Call(`_FastTSDistances_ERPDistMult_fast`, x, y, gapValue, normalize)
}

#' (Fast) Edit Distance with Real Penalty and Sakoe-Chiba Window
#'
#' Computes the Edit distance with real penalty as described by Chen and Ng
#' (2004), constraining the possible matches to a maximum index difference of
#' \code{windowSize} as desribed by Sakoe and Chiba (1978). The coding is
#' inspired by the \code{TSdist::ERPDistance()} method, but faster because
#' point-to-point distances computation is integrated into the C++ code.
#'
#' @section References:
#' Chen, L., & Ng, R. (2004, August). On the marriage of lp-norms and edit
#' distance. In \emph{Proceedings of the Thirtieth international conference
#' on Very large data bases-Volume 30} (pp. 792-803). VLDB Endowment.
#'
#' Sakoe, H., & Chiba, S. (1978). Dynamic programming algorithm optimization
#' for spoken word recognition. \emph{IEEE transactions on acoustics, speech,
#' and signal processing, 26}(1), 43-49.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param gapValue If an element of one series is not matched to the other
#' series, its distance to the gapValue is computed instead (0 might be a
#' sensible default for standardized series).
#' @param windowSize The maximum index difference which is considered when
#' matching elements.
#' @return The distance as double (not-a-number if matching is not possible
#' as the time series lengths differ more than \code{windowSize}).
#' @family Edit distance functions
#' @export
ERPDistSakoeChiba <- function(x, y, gapValue, windowSize) {
    .Call(`_FastTSDistances_ERPDistSakoeChiba`, x, y, gapValue, windowSize)
}

#' (Even Faster) Edit Distance with Real Penalty and Sakoe-Chiba Window
#'
#' Faster version of \link{ERPDistSakoeChiba} which uses a cyclic access
#' strategy with a smaller cost matrix ; inspired by the C implementation of
#' dynamic time warping in \code{dtwclust::dtw_basic()}.
#'
#' @section References:
#' Chen, L., & Ng, R. (2004, August). On the marriage of lp-norms and edit
#' distance. In \emph{Proceedings of the Thirtieth international conference
#' on Very large data bases-Volume 30} (pp. 792-803). VLDB Endowment.
#'
#' Sakoe, H., & Chiba, S. (1978). Dynamic programming algorithm optimization
#' for spoken word recognition. \emph{IEEE transactions on acoustics, speech,
#' and signal processing, 26}(1), 43-49.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param gapValue If an element of one series is not matched to the other
#' series, its distance to the gapValue is computed instead (0 might be a
#' sensible default for standardized series).
#' @param windowSize The maximum index difference which is considered when
#' matching elements.
#' @return The distance as double (not-a-number if matching is not possible
#' as the time series lengths differ more than \code{windowSize}).
#' @family Edit distance functions
#' @export
ERPDistSakoeChiba_fast <- function(x, y, gapValue, windowSize) {
    .Call(`_FastTSDistances_ERPDistSakoeChiba_fast`, x, y, gapValue, windowSize)
}

#' (Fast) Multi-variate Edit Distance with Real Penalty and Sakoe-Chiba Window
#'
#' Multi-variate version of \link{ERPDistSakoeChiba_fast}. Uses the L1 norm for
#' point-to-point distance computations.
#'
#' @section References:
#' Chen, L., & Ng, R. (2004, August). On the marriage of lp-norms and edit
#' distance. In \emph{Proceedings of the Thirtieth international conference
#' on Very large data bases-Volume 30} (pp. 792-803). VLDB Endowment.
#'
#' Sakoe, H., & Chiba, S. (1978). Dynamic programming algorithm optimization
#' for spoken word recognition. \emph{IEEE transactions on acoustics, speech,
#' and signal processing, 26}(1), 43-49.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric vector/multi-variate time series.
#' @param gapValue If an element of one series is not matched to the other
#' series, its distance to the gapValue is computed instead (0 might be a
#' sensible default for standardized series).
#' @param windowSize The maximum index difference which is considered when
#' matching elements.
#' @return The distance as double (not-a-number if matching is not possible
#' as the time series lengths differ more than \code{windowSize}).
#' @family Edit distance functions
#' @export
ERPDistSakoeChibaMult_fast <- function(x, y, gapValue, windowSize) {
    .Call(`_FastTSDistances_ERPDistSakoeChibaMult_fast`, x, y, gapValue, windowSize)
}

#' (Fast) Dynamic Time Warping Dissimilarity
#'
#' Fast version of univariate dynamic time warping (unconstrained, symmetric1
#' step pattern) which uses a cyclic access strategy with a smaller cost matrix;
#' inspired by the C implementation of dynamic time warping in
#' \code{dtwclust::dtw_basic()}, but cuts even more overhead.
#'
#' Be aware that it is not really a distance in the strict sense, as DTW
#' violates the triangle inequality.
#'
#' @section References:
#' Berndt, D. J. & Clifford, J. (1994). Using dynamic time warping to find
#' patterns in time series. In \emph{Proceedings of the 3rd international
#' conference on knowledge discovery and data mining} (pp. 359–370). AAAI Press.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param cid Should the distance be made "complexity-invariant"
#' (\code{\link{l2CompCorFactor_fast}})?
#' @param cortK Should the temporal behavior (correlation) of the time series'
#' diff vectors be considered (\code{\link{cortFactor_fast}})? A factor smaller
#' than 0 means no, higher factors will be used as parameter \code{k} in the
#' temporal correlation scaling function.
#' @param normalize Divide by the length of the longer time series (= minimum
#' amount of assignment steps) to account for series of different lengths in
#' your dataset.
#' @return The distance as double.
#' @family DTW functions
#' @export
DTWDist_fast <- function(x, y, cid = FALSE, cortK = -1, normalize = FALSE) {
    .Call(`_FastTSDistances_DTWDist_fast`, x, y, cid, cortK, normalize)
}

#' (Fast) Multi-variate Dynamic Time Warping Dissimilarity
#'
#' Fast version of multi-variate dynamic time warping (unconstrained, symmetric1
#' step pattern, L2 distance for point-to-point comparisons) which uses a cyclic
#' access strategy with a smaller cost matrix; inspired by the C implementation of
#' dynamic time warping in \code{dtwclust::dtw_basic()}, but cuts even more overhead.
#'
#' Be aware that it is not really a distance in the strict sense, as DTW
#' violates the triangle inequality.
#'
#' @section References:
#'
#' Berndt, D. J. & Clifford, J. (1994). Using dynamic time warping to find
#' patterns in time series. In \emph{Proceedings of the 3rd international
#' conference on knowledge discovery and data mining} (pp. 359–370). AAAI Press.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @param cid Should the distance be made "complexity-invariant"
#' (\code{\link{l2CompCorFactorMult_fast}})?
#' @param cortK Should the temporal behavior (correlation) of the time series'
#' diff vectors be considered (\code{\link{cortFactorMult_fast}})? A factor
#' smaller than 0 means no, higher factors will be used as parameter \code{k}
#' in the temporal correlation scaling function.
#' @param normalize Divide by the length of the longer time series (= minimum
#' amount of assignment steps) to account for series of different lengths in
#' your dataset.
#' @return The distance as double.
#' @family DTW functions
#' @export
DTWDistMult_fast <- function(x, y, cid = FALSE, cortK = -1, normalize = FALSE) {
    .Call(`_FastTSDistances_DTWDistMult_fast`, x, y, cid, cortK, normalize)
}

#' (Fast) Dynamic Time Warping Dissimilarity with a Sakoe-Chiba Window
#'
#' Fast version of univariate dynamic time warping (Sakoe-Chiba window as
#' constraint, symmetric1 step pattern) which uses a cyclic access strategy
#' with a smaller cost matrix; inspired by the C implementation of dynamic time
#' warping in \code{dtwclust::dtw_basic()}, but cuts even more overhead.
#'
#' Be aware that it is not really a distance in the strict sense, as DTW
#' violates the triangle inequality.
#'
#' @section References:
#' Berndt, D. J. & Clifford, J. (1994). Using dynamic time warping to find
#' patterns in time series. In \emph{Proceedings of the 3rd international
#' conference on knowledge discovery and data mining} (pp. 359–370). AAAI Press.
#'
#' Sakoe, H., & Chiba, S. (1978). Dynamic programming algorithm optimization
#' for spoken word recognition. \emph{IEEE transactions on acoustics, speech,
#' and signal processing, 26}(1), 43-49.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param windowSize The maximum index difference which is considered when
#' matching elements. If greater/equal one, interpreted as absolute value.
#' If smaller then one, interpreted as fraction of the length of the longer
#' time series.
#' @return The distance as double (not-a-number if matching is not possible
#' as the time series lengths differ more than \code{windowSize}).
#' @family DTW functions
#' @export
DTWDistSakoeChiba_fast <- function(x, y, windowSize) {
    .Call(`_FastTSDistances_DTWDistSakoeChiba_fast`, x, y, windowSize)
}

#' (Fast) Multi-variate Dynamic Time Warping Dissimilarity with a Sakoe-Chiba Window
#'
#' Fast version of multivariate dynamic time warping (Sakoe-Chiba window as
#' constraint, symmetric1 step pattern, L2 distance for point-to-point comparisons)
#' which uses a cyclic access strategy with a smaller cost matrix; inspired by
#' the C implementation of dynamic time warping in \code{dtwclust::dtw_basic()},
#' but cuts even more overhead.
#'
#' Be aware that it is not really a distance in the strict sense, as DTW
#' violates the triangle inequality.
#'
#' @section References:
#' Berndt, D. J. & Clifford, J. (1994). Using dynamic time warping to find
#' patterns in time series. In \emph{Proceedings of the 3rd international
#' conference on knowledge discovery and data mining} (pp. 359–370). AAAI Press.
#'
#' Sakoe, H., & Chiba, S. (1978). Dynamic programming algorithm optimization
#' for spoken word recognition. \emph{IEEE transactions on acoustics, speech,
#' and signal processing, 26}(1), 43-49.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @param windowSize The maximum index difference which is considered when
#' matching elements. If greater/equal one, interpreted as absolute value.
#' If smaller then one, interpreted as fraction of the length of the longer
#' time series.
#' @return The distance as double (not-a-number if matching is not possible
#' as the time series lengths differ more than \code{windowSize}).
#' @family DTW functions
#' @export
DTWDistSakoeChibaMult_fast <- function(x, y, windowSize) {
    .Call(`_FastTSDistances_DTWDistSakoeChibaMult_fast`, x, y, windowSize)
}

#' (Fast) L1 Distance
#'
#' Computes the standard Manhattan distance with a fast C++ implementation.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @return The distance as double.
#' @family L_p distances
#' @export
l1Dist_fast <- function(x, y) {
    .Call(`_FastTSDistances_l1Dist_fast`, x, y)
}

#' (Fast) Multi-variate L1 Distance
#'
#' Computes the Manhattan distance between multi-variate time series (according
#' to Kotsakos, Trajcevski, Gunopulos and Aggarwal (2014)) with a fast C++
#' implementation.
#'
#' @section References:
#'
#' Kotsakos, D., Trajcevski, G., Gunopulos, D. & Aggarwal, C. C. (2014). Time-series
#' data clustering. In C. C. Aggarwal & C. K. Reddy (Eds.), \emph{Data clustering :
#' Algorithms and applications} (pp. 357–380). Chapman & Hall/CRC data mining and
#' knowledge discovery series. Boca Raton: CRC Press.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @return The distance as double.
#' @family L_p distances
#' @export
l1DistMult_fast <- function(x, y) {
    .Call(`_FastTSDistances_l1DistMult_fast`, x, y)
}

#' (Fast) L2 Distance
#'
#' Computes the standard Euclidean distance with a fast C++ implementation.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @param cid Should the distance be made complexity invariant
#' (\code{\link{l2CompCorFactor_fast}})?
#' @param cortK Should the temporal behavior (correlation) of the time series'
#' diff vectors be considered (\code{\link{cortFactor_fast}})? A factor smaller
#' than 0 means no, higher factors will be used as parameter \code{k} in the
#' temporal correlation scaling function.
#' @return The distance as double.
#' @family L_p distances
#' @export
l2Dist_fast <- function(x, y, cid = FALSE, cortK = -1) {
    .Call(`_FastTSDistances_l2Dist_fast`, x, y, cid, cortK)
}

#' (Fast) Multi-variate L2 Distance
#'
#' Computes the standard Euclidean distance between multi-variate time series
#' (according to Kotsakos, Trajcevski, Gunopulos and Aggarwal (2014)) with a
#' fast C++ implementation.
#'
#' @section References:
#'
#' Kotsakos, D., Trajcevski, G., Gunopulos, D. & Aggarwal, C. C. (2014). Time-series
#' data clustering. In C. C. Aggarwal & C. K. Reddy (Eds.), \emph{Data clustering :
#' Algorithms and applications} (pp. 357–380). Chapman & Hall/CRC data mining and
#' knowledge discovery series. Boca Raton: CRC Press.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @param cid Should the distance be made complexity invariant
#' (\code{\link{l2CompCorFactorMult_fast}})?
#' @param cortK Should the temporal behavior (correlation) of the time series'
#' diff vectors be considered (\code{\link{cortFactorMult_fast}})? A factor smaller
#' than 0 means no, higher factors will be used as parameter \code{k} in the
#' temporal correlation scaling function.
#' @return The distance as double.
#' @family L_p distances
#' @export
l2DistMult_fast <- function(x, y, cid = FALSE, cortK = -1) {
    .Call(`_FastTSDistances_l2DistMult_fast`, x, y, cid, cortK)
}

#' (Fast) Chebyshev Distance
#'
#' Computes the standard Chebyshev distance (maximum metric) with a fast C++
#' implementation.
#'
#' @param x 1st numeric vector/time series.
#' @param y 2nd numeric vector/time series.
#' @return The distance as double.
#' @family L_p distances
#' @export
lmaxDist_fast <- function(x, y) {
    .Call(`_FastTSDistances_lmaxDist_fast`, x, y)
}

#' (Fast) Multi-variate Chebyshev Distance
#'
#' Computes the standard Chebyshev distance (maximum metric) between
#' multi-variate time series (according toKotsakos, Trajcevski, Gunopulos and
#' Aggarwal (2014)) with a fast C++ implementation.
#'
#' @section References:
#'
#' Kotsakos, D., Trajcevski, G., Gunopulos, D. & Aggarwal, C. C. (2014). Time-series
#' data clustering. In C. C. Aggarwal & C. K. Reddy (Eds.), \emph{Data clustering :
#' Algorithms and applications} (pp. 357–380). Chapman & Hall/CRC data mining and
#' knowledge discovery series. Boca Raton: CRC Press.
#'
#' @param x 1st numeric matrix/multi-variate time series.
#' @param y 2nd numeric matrix/multi-variate time series.
#' @return The distance as double.
#' @family L_p distances
#' @export
lmaxDistMult_fast <- function(x, y) {
    .Call(`_FastTSDistances_lmaxDistMult_fast`, x, y)
}

#' Entropy for Clusterings
#'
#' Calculates the Shannon entropy for a cluster assignment vector. A value of
#' 0 means that all elements are in one cluster, higher values indicate a more
#' even distribution of objects in the clusters. The value can be normalized
#' to [0,1] such that 1 means the same number of objects in each cluster.
#'
#' @param assignments Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters (code depends on this!).
#' @param normalize Should the entropy be normalized to [0,1]?
#' @return The entropy as double.
#' @export
clusterEntropy_fast <- function(assignments, normalize = FALSE) {
    .Call(`_FastTSDistances_clusterEntropy_fast`, assignments, normalize)
}

#' Generalized Davies-Bouldin Index
#'
#' Calculates a generalized version of the Davies-Bouldin Index for internal
#' cluster validation. The index is expressed by a ratio of cluster compactness
#' and cluster separation, summed and averaged over all clusters. This generalized
#' method does not define the concrete way to compute distances within clusters
#' and between clusters, but simply takes these distances as input to compute the
#' Davies-Bouldin Index. Lower values indicate better clustering quality.
#'
#' @section References:
#'
#' Davies, D. L. & Bouldin, D. W. (1979). A cluster separation measure. \emph{IEEE
#' transactions on pattern analysis and machine intelligence, 1}(2), 224–227.
#'
#' @param interClusterDistances A matrix representing the distances between
#' clusters (separation), with the number of rows/columns equal to the number
#' of clusters.
#' @param intraClusterDistances A vector representing the distances within a
#' cluster (compactness), with its length equal to the number of clusters.
#' @return The Generalized Davies-Bouldin Index. Could be Inf or NaN if there
#' are clusters with a distance of zero between them (which is a bad clustering
#' result).
#' @family Internal Cluster Validity Indices
#' @export
generalizedDB_fast <- function(interClusterDistances, intraClusterDistances) {
    .Call(`_FastTSDistances_generalizedDB_fast`, interClusterDistances, intraClusterDistances)
}

#' Inverted Generalized Davies-Bouldin Index
#'
#' Calculates a generalized version of the Davies-Bouldin Index, similar to
#' \code{\link{generalizedDB_fast}}. The only difference is that the separation
#' measure is divided by the compactness measure (inverted compared to original
#' index), so high values are desirable.
#'
#' @section References:
#'
#' Davies, D. L. & Bouldin, D. W. (1979). A cluster separation measure. \emph{IEEE
#' transactions on pattern analysis and machine intelligence, 1}(2), 224–227.
#'
#' @param interClusterDistances A matrix representing the distances between
#' clusters (separation), with the number of rows/columns equal to the number
#' of clusters.
#' @param intraClusterDistances A vector representing the distances within a
#' cluster (compactness), with its length equal to the number of clusters.
#' @return The Inverted Generalized Davies-Bouldin Index. Could be Inf or NaN
#' if there are clusters with a distance of zero between them (which is a bad
#' clustering result) or infinity between them (which might indicate an error
#' in the computation of your dissimilarity matrix).
#' @family Internal Cluster Validity Indices
#' @export
iGeneralizedDB_fast <- function(interClusterDistances, intraClusterDistances) {
    .Call(`_FastTSDistances_iGeneralizedDB_fast`, interClusterDistances, intraClusterDistances)
}

#' Generalized Dunn Index
#'
#' Calculates a generalized version of the Dunn Index, allowing an arbitrary
#' measure of cluster separation (which goes to the numerator and is minimized)
#' and an arbitrary measure of cluster compactness (which goes to the denominator
#' and is maximized over all clusters). Dunn used the highly outlier-prone
#' single linkage measure for separation and complete linkage for compactness.
#' Higher values indicate better clustering quality.
#'
#' @section References:
#'
#' Bezdek, J. C. & Pal, N. R. (1998). Some new indexes of cluster validity.
#' \emph{IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics),
#' 28}(3), 301–315.
#'
#' Dunn, J. C. (1973). A fuzzy relative of the isodata process and its use in
#' detecting compact well-separated clusters. \emph{Journal of Cybernetics, 3}(3),
#' 32–57.
#'
#' @param interClusterDistances A symmetric matrix representing the distances
#' between clusters (separation), with the number of rows/columns equal to the
#' number of clusters.
#' @param intraClusterDistances A vector representing the distances within a
#' cluster (compactness), with its length equal to the number of clusters.
#' @return The Generalized Dunn Index. Could be Inf or NaN if there are clusters
#' with a distance of zero between them (which is a bad clustering result).
#' @family Internal Cluster Validity Indices
#' @export
generalizedDunn_fast <- function(interClusterDistances, intraClusterDistances) {
    .Call(`_FastTSDistances_generalizedDunn_fast`, interClusterDistances, intraClusterDistances)
}

#' Conditional Entropy to Compare Clusterings
#'
#' Calculates the conditional Shannon entropy to compare two cluster assignment
#' vectors (external cluster validation). It is a value greater or equal 0,
#' lower values indicating more similarity (purer clusters).  Optionally, the
#' index can be normalized to [0,1] and we take 1 - normalized entropy to get
#' a uniformity measure where high values are good.
#'
#' Be aware that this measure is asymmetric (classes are conditioned on/ analyzed
#' in) clusters and can still be high if the classes of the ground truth are
#' split up into multiple (but pure) clusters. Wu, Xiong and Chen (2009) propose
#' to use the symmetric variation of information (\code{\link{VI_fast}}) instead,
#' which is also based on entropy.
#'
#' We use the base 2 logarithm for calculating entropy.
#'
#' @section References:
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param assignments Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters (code depends on this!).
#' @param groundTruth Integer vector of true class labels containing only values
#' from 1 to k' with k' = number of classes.
#' @param normalizeAndInvert Should the entropy be normalized to [0,1] and inverted
#' such that high values indicate similar clusterings?
#' @return The conditional entropy as double in [0, k'] (without normalization) or
#' a uniformity measure in [0,1] (with normalization).
#' @family External Cluster Validity Indices
#' @export
conditionalEntropy_fast <- function(assignments, groundTruth, normalizeAndInvert = FALSE) {
    .Call(`_FastTSDistances_conditionalEntropy_fast`, assignments, groundTruth, normalizeAndInvert)
}

#' Statistics for External CVIs based on Pairwise Comparison
#'
#' Calculates the summary statistics [m, m1, m2, M] which can be used to compute
#' multiple normalized external CVIs based on the formulas of Wu, Xiong and Chen (2009).
#'
#' @section References:
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param assignments1 Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters (code depends on this!).
#' @param assignments2 Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters.
#' @return Vector with the four components called \code{m, m1, m2, M} by Wu,
#' Xiong and Chen (2009).
#' @family External Cluster Validity Indices
#' @export
pairCVIParameters_fast <- function(assignments1, assignments2) {
    .Call(`_FastTSDistances_pairCVIParameters_fast`, assignments1, assignments2)
}

#' Rand Index
#'
#' Calculates the Rand Index of Rand (1971) to compare two cluster assignment
#' vectors (external cluster validation). It is a value in (0,1], higher values
#' indicating more similarity. The index can be corrected for similarity by
#' chance as proposed by Hubert and Arabie (1985), then also possibly yielding
#' negative results (if the similarity is worse than random assignment) while
#' the maximum is still 1 (and values are usually positive).
#'
#' @section References:
#'
#' Hubert, L. & Arabie, P. (1985). Comparing partitions. \emph{Journal of
#' classification,2}(1), 193-218.
#'
#' Rand, W. M. (1971). Objective criteria for the evaluation of clustering methods.
#' \emph{Journal of the American Statistical association, 66}(336), 846-850.
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param pairCVIParams Output of \code{link{pairCVIParameters}} which has to be
#' called with the two cluster assignment vector to be compared.
#' @param normalize Should the Rand Index be corrected for chance? (Adjusted Rand
#' Index as proposed by Hubert and Arabie (1985))
#' @return The (Adjusted) Rand Index as double (at most 1 for identical clusterings,
#' normal Rand Index greater than zero, adjusted one can also be negative).
#' @family External Cluster Validity Indices
#' @export
randIndex_fast <- function(pairCVIParams, normalize = FALSE) {
    .Call(`_FastTSDistances_randIndex_fast`, pairCVIParams, normalize)
}

#' Fowlkes-Mallows Index
#'
#' Calculates the index of Fowlkes and Mallows (1983) to compare two cluster
#' assignment vectors (external cluster validation). It is a value in [0,1],
#' the geometric mean of precision and recall, higher values indicating more
#' similarity. The index can be corrected for similarity by chance as proposed
#' by Wu, Xiong and Chen (2009), then also possibly yielding negative results
#' (if the similarity is worse than random assignment) while the maximum is still
#' 1 (and values are usually positive).
#'
#' @section References:
#'
#' Fowlkes, E. B. & Mallows, C. L. (1983). A method for comparing two hierarchical
#' clusterings. \emph{Journal of the American statistical association, 78}(383),
#' 553-569.
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param pairCVIParams Output of \code{link{pairCVIParameters}} which has to be
#' called with the two cluster assignment vector to be compared.
#' @param normalize Should the Fowlkes-Mallows Index be corrected for chance?
#' @return The Fowlkes-Mallows Index as double (at most 1 for identical clusterings,
#' normal Fowlkes-Mallows greater than zero, normalized one can also be negative).
#' @family External Cluster Validity Indices
#' @export
fowlkesMallows_fast <- function(pairCVIParams, normalize = FALSE) {
    .Call(`_FastTSDistances_fowlkesMallows_fast`, pairCVIParams, normalize)
}

#' Phi Coefficient
#'
#' Calculates the Phi coefficient of Pearson (1900) to compare two cluster
#' assignment vectors (external cluster validation). It is a correlation value,
#' therefore being in the interval (-1,1], higher values indicating more similar
#' clusterings. The name varies in the literature. Instead of Phi, some sources
#' also call it Gamma. In Wu, Xiong and Chen (2009) its called "Hubert's Gamma
#' statistic I".
#'
#' @section References:
#'
#' Pearson, K. (1900). Mathematical contributions to the theory of evolution. vii.
#' on the correlation of characters not quantitatively measurable. \emph{Philosophical
#' Transactions of the Royal Society of London. Series A, Containing Papers of a
#' Mathematical or Physical Character}, 195, 1-405.
#'
#' Pearson, K. & Heron, D. (1913). On theories of association. \emph{Biometrika},
#' 9(1/2), 159-315.
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param pairCVIParams Output of \code{link{pairCVIParameters}} which has to be
#' called with the two cluster assignment vector to be compared.
#' @return Phi/Gamma coefficient as double from the range (-1,1].
#' @family External Cluster Validity Indices
#' @export
phi_fast <- function(pairCVIParams) {
    .Call(`_FastTSDistances_phi_fast`, pairCVIParams)
}

#' Purity Measure
#'
#' Calculates the purity measure (e.g. described by Wu, Xiong and Chen (2009))
#' to compare two cluster assignment vectors (external cluster validation). It
#' is a value in (0,1], higher values indicating more similarity. It finds the
#' most common ground truth class in each cluster and sums over these relative
#' frequencies.
#'
#' Be aware that this measure is asymmetric and can still be high if the classes
#' of the ground truth are split up into multiple (but pure) clusters. Wu, Xiong
#' and Chen (2009) propose to use the symmetric van Dongen measure
#' (\code{\link{vanDongen_fast}}) instead.
#'
#' @section References:
#'
#' Van Dongen, S. (2000). \emph{Performance criteria for graph clustering and markov
#' cluster experiments}. National Research Institute for Mathematics and Computer
#' Science. Amsterdam.
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param assignments Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters (code depends on this!).
#' @param groundTruth Integer vector of class (ground truth) assignments containing
#' only values from 1 to k with k = number of clusters.
#' @return The purity measure as double in (0,1].
#' @family External Cluster Validity Indices
#' @export
purity_fast <- function(assignments, groundTruth) {
    .Call(`_FastTSDistances_purity_fast`, assignments, groundTruth)
}

#' Van Dongen Criterion
#'
#' Calculates the index of van Dongen (2000) to compare two cluster assignment
#' vectors (external cluster validation). It is a value in [0,2n), lower values
#' indicating more similarity (it matches each cluster of one assignment to the
#' most similar cluster in the other assignment and counts mismatches). Optionally,
#' the index can be normalized to [0,1] as proposed by Wu, Xiong and Chen (2009).
#' After normalization, we take 1 - normalizedValue so that higher values indicate
#' better clustering quality (as it is for indices like Rand, Fowlkes-Mallows).
#'
#' @section References:
#'
#' Van Dongen, S. (2000). \emph{Performance criteria for graph clustering and markov
#' cluster experiments}. National Research Institute for Mathematics and Computer
#' Science. Amsterdam.
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param assignments1 Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters (code depends on this!).
#' @param assignments2 Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters.
#' @param normalizeAndInvert Should the van Dongen criterion be normalized to
#' [0,1] and inverted such that high values indicate similar clusterings?
#' @return The van Dongen criterion as double (in [0,2n) without normalization
#' and [0,1] else).
#' @family External Cluster Validity Indices
#' @export
vanDongen_fast <- function(assignments1, assignments2, normalizeAndInvert = FALSE) {
    .Call(`_FastTSDistances_vanDongen_fast`, assignments1, assignments2, normalizeAndInvert)
}

#' Variation of Information and Normalized Mutual Information
#'
#' Calculates the Variation of Information index introduced by of Meila (2003) to
#' compare two cluster assignment vectors (external cluster validation). It is a
#' value greater or equal 0, lower values indicating more similarity (it is based
#' on the entropy of the single assignments and the mutual information of the joint
#' distribution). Optionally, the index can be normalized to [0,1] as proposed by
#' Wu, Xiong and Chen (2009). After normalization, we take 1 - normalizedValue so
#' that higher values indicate better clustering quality (as it is for indices
#' like Rand, Fowlkes-Mallows); the result equals the Normalized Mutual Information
#' of Fred and Jain (2002).
#'
#' We use the base 2 logarithm for calculating entropy and mutual information.
#'
#' @section References:
#'
#' Fred, A. L. & Jain, A. K. (2002). Data clustering using evidence accumulation.
#' In \emph{Pattern recognition, 2002. proceedings. 16th international conference
#' on} (Vol. 4, pp. 276-280). IEEE.
#'
#' Meila, M. (2003). Comparing clusterings by the variation of information. In
#' B. Schölkopf & M. K. Warmuth (Eds.), \emph{Learning theory and kernel machines:
#' 16th annual conference on learning theory and 7th kernel workshop, colt/kernel
#' 2003, washington, dc, usa, august 24-27, 2003. proceedings} (pp. 173-187).
#' Springer Berlin Heidelberg.
#'
#' Wu, J., Xiong, H. & Chen, J. (2009). Adapting the right measures for k-means
#' clustering. In \emph{Proceedings of the 15th acm sigkdd international conference
#' on knowledge discovery and data mining} (pp. 877-886). ACM.
#'
#' @param assignments1 Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters (code depends on this!).
#' @param assignments2 Integer vector of cluster assignments containing only values
#' from 1 to k with k = number of clusters.
#' @param normalizeAndInvert Should the Variation of Information be normalized
#' to [0,1] and inverted such that high values indicate similar clusterings?
#' @return The Variation of Information as double (in [0, entropy1+entropy2] without
#' normalization and [0,1] else).
#' @export
VI_fast <- function(assignments1, assignments2, normalizeAndInvert = FALSE) {
    .Call(`_FastTSDistances_VI_fast`, assignments1, assignments2, normalizeAndInvert)
}
Jakob-Bach/FastTSDistances documentation built on May 13, 2019, 1:15 p.m.