R/RcppExports.R

Defines functions find_discords_brute_force euclidean_dist early_abandoned_dist find_discords_hotsax paa str_to_repair_grammar find_discords_rra series_to_wordbag manyseries_to_wordbag bags_to_tfidf cosine_sim alphabet_to_cuts series_to_chars series_to_string sax_via_window sax_by_chunking idx_to_letter letter_to_idx letters_to_idx is_equal_str is_equal_mindist subseries znorm

Documented in alphabet_to_cuts bags_to_tfidf cosine_sim early_abandoned_dist euclidean_dist find_discords_brute_force find_discords_hotsax find_discords_rra idx_to_letter is_equal_mindist is_equal_str letters_to_idx letter_to_idx manyseries_to_wordbag paa sax_by_chunking sax_via_window series_to_chars series_to_string series_to_wordbag str_to_repair_grammar subseries znorm

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Finds a discord using brute force algorithm.
#'
#' @param ts the input timeseries.
#' @param w_size the sliding window size.
#' @param discords_num the number of discords to report.
#' @useDynLib jmotif
#' @export
#' @references Keogh, E., Lin, J., Fu, A.,
#' HOT SAX: Efficiently finding the most unusual time series subsequence.
#' Proceeding ICDM '05 Proceedings of the Fifth IEEE International Conference on Data Mining
#' @examples
#' discords = find_discords_brute_force(ecg0606[1:600], 100, 1)
#' plot(ecg0606[1:600], type = "l", col = "cornflowerblue", main = "ECG 0606")
#' lines(x=c(discords[1,2]:(discords[1,2]+100)),
#'    y=ecg0606[discords[1,2]:(discords[1,2]+100)], col="red")
find_discords_brute_force <- function(ts, w_size, discords_num) {
    .Call('_jmotif_find_discords_brute_force', PACKAGE = 'jmotif', ts, w_size, discords_num)
}

#' Finds the Euclidean distance between points.
#'
#' @param seq1 the array 1.
#' @param seq2 the array 2.
#' stops and the NAN is returned.
#' @useDynLib jmotif
#' @export
euclidean_dist <- function(seq1, seq2) {
    .Call('_jmotif_euclidean_dist', PACKAGE = 'jmotif', seq1, seq2)
}

#' Finds the Euclidean distance between points, if distance is above the threshold, abandons the computation
#' and returns NAN.
#'
#' @param seq1 the array 1.
#' @param seq2 the array 2.
#' @param upper_limit the max value after reaching which the distance computation
#' stops and the NAN is returned.
#' @useDynLib jmotif
#' @export
early_abandoned_dist <- function(seq1, seq2, upper_limit) {
    .Call('_jmotif_early_abandoned_dist', PACKAGE = 'jmotif', seq1, seq2, upper_limit)
}

#' Finds a discord (i.e. time series anomaly) with HOT-SAX.
#' Usually works the best with lower sizes of discretization parameters: PAA and Alphabet.
#'
#' @param ts the input timeseries.
#' @param w_size the sliding window size.
#' @param paa_size the PAA size.
#' @param a_size the alphabet size.
#' @param n_threshold the normalization threshold.
#' @param discords_num the number of discords to report.
#' @useDynLib jmotif
#' @export
#' @references Keogh, E., Lin, J., Fu, A.,
#' HOT SAX: Efficiently finding the most unusual time series subsequence.
#' Proceeding ICDM '05 Proceedings of the Fifth IEEE International Conference on Data Mining
#' @examples
#' discords = find_discords_hotsax(ecg0606, 100, 3, 3, 0.01, 1)
#' plot(ecg0606, type = "l", col = "cornflowerblue", main = "ECG 0606")
#' lines(x=c(discords[1,2]:(discords[1,2]+100)),
#'    y=ecg0606[discords[1,2]:(discords[1,2]+100)], col="red")
find_discords_hotsax <- function(ts, w_size, paa_size, a_size, n_threshold, discords_num) {
    .Call('_jmotif_find_discords_hotsax', PACKAGE = 'jmotif', ts, w_size, paa_size, a_size, n_threshold, discords_num)
}

#' Computes a Piecewise Aggregate Approximation (PAA) for a time series.
#'
#' @param ts a timeseries to compute the PAA for.
#' @param paa_num the desired PAA size.
#' @useDynLib jmotif
#' @export
#' @references Keogh, E., Chakrabarti, K., Pazzani, M., Mehrotra, S.,
#' Dimensionality reduction for fast similarity search in large time series databases.
#' Knowledge and information Systems, 3(3), 263-286. (2001)
#' @examples
#' x = c(-1, -2, -1, 0, 2, 1, 1, 0)
#' x_paa3 = paa(x, 3)
#' #
#' plot(x, type = "l", main = c("8-points time series and its PAA transform into three points",
#'                           "PAA shown schematically in blue"))
#' points(x, pch = 16, lwd = 5)
#' #
#' paa_bounds = c(1, 1+7/3, 1+7/3*2, 8)
#' abline(v = paa_bounds, lty = 3, lwd = 2, col = "cornflowerblue")
#' segments(paa_bounds[1:3], x_paa3, paa_bounds[2:4], x_paa3, col = "cornflowerblue", lwd = 2)
#' points(x = c(1, 1+7/3, 1+7/3*2) + (7/3)/2, y = x_paa3, pch = 15, lwd = 5, col = "cornflowerblue")
paa <- function(ts, paa_num) {
    .Call('_jmotif_paa', PACKAGE = 'jmotif', ts, paa_num)
}

#' Runs the repair on a string.
#'
#' @param str the input string.
#' @useDynLib jmotif
#' @export
#' @references  N.J. Larsson and A. Moffat. Offline dictionary-based compression.
#' In Data Compression Conference, 1999.
#' @examples
#' str_to_repair_grammar("abc abc cba cba bac xxx abc abc cba cba bac")
str_to_repair_grammar <- function(str) {
    .Call('_jmotif_str_to_repair_grammar', PACKAGE = 'jmotif', str)
}

#' Finds a discord with RRA (Rare Rule Anomaly) algorithm.
#' Usually works the best with higher than that for HOT-SAX sizes of discretization parameters
#' (i.e., PAA and Alphabet sizes).
#'
#' @param series the input timeseries.
#' @param w_size the sliding window size.
#' @param paa_size the PAA size.
#' @param a_size the alphabet size.
#' @param nr_strategy the numerosity reduction strategy ("none", "exact", "mindist").
#' @param n_threshold the normalization threshold.
#' @param discords_num the number of discords to report.
#' @useDynLib jmotif
#' @export
#' @references Senin Pavel and Malinchik Sergey,
#' SAX-VSM: Interpretable Time Series Classification Using SAX and Vector Space Model.,
#' Data Mining (ICDM), 2013 IEEE 13th International Conference on.
#' @examples
#' discords = find_discords_rra(ecg0606, 100, 4, 4, "none", 0.01, 1)
#' plot(ecg0606, type = "l", col = "cornflowerblue", main = "ECG 0606")
#' lines(x=c(discords[1,2]:(discords[1,2]+100)),
#'    y=ecg0606[discords[1,2]:(discords[1,2]+100)], col="red")
find_discords_rra <- function(series, w_size, paa_size, a_size, nr_strategy, n_threshold, discords_num) {
    .Call('_jmotif_find_discords_rra', PACKAGE = 'jmotif', series, w_size, paa_size, a_size, nr_strategy, n_threshold, discords_num)
}

#' Converts a single time series into a bag of words.
#'
#' @param ts the timeseries.
#' @param w_size the sliding window size.
#' @param paa_size the PAA size.
#' @param a_size the alphabet size.
#' @param nr_strategy the NR strategy.
#' @param n_threshold the normalization threshold.
#' @useDynLib jmotif
#' @export
#' @references Senin Pavel and Malinchik Sergey,
#' SAX-VSM: Interpretable Time Series Classification Using SAX and Vector Space Model.
#' Data Mining (ICDM), 2013 IEEE 13th International Conference on, pp.1175,1180, 7-10 Dec. 2013.
#' @references Salton, G., Wong, A., Yang., C.,
#' A vector space model for automatic indexing. Commun. ACM 18, 11, 613-620, 1975.
series_to_wordbag <- function(ts, w_size, paa_size, a_size, nr_strategy, n_threshold) {
    .Call('_jmotif_series_to_wordbag', PACKAGE = 'jmotif', ts, w_size, paa_size, a_size, nr_strategy, n_threshold)
}

#' Converts a set of time-series into a single bag of words.
#'
#' @param data the timeseries data, row-wise.
#' @param w_size the sliding window size.
#' @param paa_size the PAA size.
#' @param a_size the alphabet size.
#' @param nr_strategy the NR strategy.
#' @param n_threshold the normalization threshold.
#' @useDynLib jmotif
#' @export
#' @references Senin Pavel and Malinchik Sergey,
#' SAX-VSM: Interpretable Time Series Classification Using SAX and Vector Space Model.
#' Data Mining (ICDM), 2013 IEEE 13th International Conference on, pp.1175,1180, 7-10 Dec. 2013.
#' @references Salton, G., Wong, A., Yang., C.,
#' A vector space model for automatic indexing. Commun. ACM 18, 11, 613-620, 1975.
manyseries_to_wordbag <- function(data, w_size, paa_size, a_size, nr_strategy, n_threshold) {
    .Call('_jmotif_manyseries_to_wordbag', PACKAGE = 'jmotif', data, w_size, paa_size, a_size, nr_strategy, n_threshold)
}

#' Computes a TF-IDF weight vectors for a set of word bags.
#'
#' @param data the list containing the input word bags.
#' @useDynLib jmotif
#' @export
#' @references Senin Pavel and Malinchik Sergey,
#' SAX-VSM: Interpretable Time Series Classification Using SAX and Vector Space Model.
#' Data Mining (ICDM), 2013 IEEE 13th International Conference on, pp.1175,1180, 7-10 Dec. 2013.
#' @references Salton, G., Wong, A., Yang., C.,
#' A vector space model for automatic indexing. Commun. ACM 18, 11, 613-620, 1975.
#' @examples
#' bag1 = data.frame(
#'    "words" = c("this", "is", "a", "sample"),
#'    "counts" = c(1, 1, 2, 1),
#'    stringsAsFactors = FALSE
#'    )
#' bag2 = data.frame(
#'    "words" = c("this", "is", "another", "example"),
#'    "counts" = c(1, 1, 2, 3),
#'    stringsAsFactors = FALSE
#'    )
#' ll = list("bag1" = bag1, "bag2" = bag2)
#' tfidf = bags_to_tfidf(ll)
bags_to_tfidf <- function(data) {
    .Call('_jmotif_bags_to_tfidf', PACKAGE = 'jmotif', data)
}

#' Computes the cosine distance value between a bag of words and a set of TF-IDF weight vectors.
#'
#' @param data the list containing a word-bag and the TF-IDF object.
#' @useDynLib jmotif
#' @export
#' @references Senin Pavel and Malinchik Sergey,
#' SAX-VSM: Interpretable Time Series Classification Using SAX and Vector Space Model.
#' Data Mining (ICDM), 2013 IEEE 13th International Conference on, pp.1175,1180, 7-10 Dec. 2013.
#' @references Salton, G., Wong, A., Yang., C.,
#' A vector space model for automatic indexing. Commun. ACM 18, 11, 613-620, 1975.
cosine_sim <- function(data) {
    .Call('_jmotif_cosine_sim', PACKAGE = 'jmotif', data)
}

#' Translates an alphabet size into the array of corresponding SAX cut-lines built using the Normal distribution.
#'
#' @param a_size the alphabet size, a value between 2 and 20 (inclusive).
#' @useDynLib jmotif
#' @export
#' @references Lonardi, S., Lin, J., Keogh, E., Patel, P.,
#' Finding motifs in time series.
#' In Proc. of the 2nd Workshop on Temporal Data Mining (pp. 53-68). (2002)
#' @examples
#' alphabet_to_cuts(5)
alphabet_to_cuts <- function(a_size) {
    .Call('_jmotif_alphabet_to_cuts', PACKAGE = 'jmotif', a_size)
}

#' Transforms a time series into the char array using SAX and the normal alphabet.
#'
#' @param ts the timeseries.
#' @param a_size the alphabet size.
#' @useDynLib jmotif
#' @export
#' @references Lonardi, S., Lin, J., Keogh, E., Patel, P.,
#' Finding motifs in time series.
#' In Proc. of the 2nd Workshop on Temporal Data Mining (pp. 53-68). (2002)
#' @examples
#' y = c(-1, -2, -1, 0, 2, 1, 1, 0)
#' y_paa3 = paa(y, 3)
#' series_to_chars(y_paa3, 3)
series_to_chars <- function(ts, a_size) {
    .Call('_jmotif_series_to_chars', PACKAGE = 'jmotif', ts, a_size)
}

#' Transforms a time series into the string.
#'
#' @param ts the timeseries.
#' @param a_size the alphabet size.
#' @useDynLib jmotif
#' @export
#' @references Lonardi, S., Lin, J., Keogh, E., Patel, P.,
#' Finding motifs in time series.
#' In Proc. of the 2nd Workshop on Temporal Data Mining (pp. 53-68). (2002)
#' @examples
#' y = c(-1, -2, -1, 0, 2, 1, 1, 0)
#' y_paa3 = paa(y, 3)
#' series_to_string(y_paa3, 3)
series_to_string <- function(ts, a_size) {
    .Call('_jmotif_series_to_string', PACKAGE = 'jmotif', ts, a_size)
}

#' Discretizes a time series with SAX via sliding window.
#'
#' @param ts the input timeseries.
#' @param w_size the sliding window size.
#' @param paa_size the PAA size.
#' @param a_size the alphabet size.
#' @param nr_strategy the Numerosity Reduction strategy, acceptable values are "exact" and "mindist" --
#' any other value triggers no numerosity reduction.
#' @param n_threshold the normalization threshold.
#' @useDynLib jmotif
#' @export
#' @references Lonardi, S., Lin, J., Keogh, E., Patel, P.,
#' Finding motifs in time series.
#' In Proc. of the 2nd Workshop on Temporal Data Mining (pp. 53-68). (2002)
sax_via_window <- function(ts, w_size, paa_size, a_size, nr_strategy, n_threshold) {
    .Call('_jmotif_sax_via_window', PACKAGE = 'jmotif', ts, w_size, paa_size, a_size, nr_strategy, n_threshold)
}

#' Discretize a time series with SAX using chunking (no sliding window).
#'
#' @param ts the input time series.
#' @param paa_size the PAA size.
#' @param a_size the alphabet size.
#' @param n_threshold the normalization threshold.
#' @useDynLib jmotif
#' @export
#' @references Lonardi, S., Lin, J., Keogh, E., Patel, P.,
#' Finding motifs in time series.
#' In Proc. of the 2nd Workshop on Temporal Data Mining (pp. 53-68). (2002)
sax_by_chunking <- function(ts, paa_size, a_size, n_threshold) {
    .Call('_jmotif_sax_by_chunking', PACKAGE = 'jmotif', ts, paa_size, a_size, n_threshold)
}

#' Get the ASCII letter by an index.
#'
#' @param idx the index.
#' @useDynLib jmotif
#' @export
#' @examples
#' # letter 'b'
#' idx_to_letter(2)
idx_to_letter <- function(idx) {
    .Call('_jmotif_idx_to_letter', PACKAGE = 'jmotif', idx)
}

#' Get the index for an ASCII letter.
#'
#' @param letter the letter.
#' @useDynLib jmotif
#' @export
#' @examples
#' # letter 'b' translates to 2
#' letter_to_idx('b')
letter_to_idx <- function(letter) {
    .Call('_jmotif_letter_to_idx', PACKAGE = 'jmotif', letter)
}

#' Get an ASCII indexes sequence for a given character array.
#'
#' @param str the character array.
#' @useDynLib jmotif
#' @export
#' @examples
#' letters_to_idx(c('a','b','c','a'))
letters_to_idx <- function(str) {
    .Call('_jmotif_letters_to_idx', PACKAGE = 'jmotif', str)
}

#' Compares two strings using natural letter ordering.
#'
#' @param a the string a.
#' @param b the string b.
#' @useDynLib jmotif
#' @export
#' @examples
#' is_equal_str("aaa", "bbb")
#' is_equal_str("ccc", "ccc")
is_equal_str <- function(a, b) {
    .Call('_jmotif_is_equal_str', PACKAGE = 'jmotif', a, b)
}

#' Compares two strings using mindist.
#'
#' @param a the string a.
#' @param b the string b.
#' @useDynLib jmotif
#' @export
#' @examples
#' is_equal_str("aaa", "bbb") # true
#' is_equal_str("aaa", "ccc") # false
is_equal_mindist <- function(a, b) {
    .Call('_jmotif_is_equal_mindist', PACKAGE = 'jmotif', a, b)
}

#' Extracts a subseries.
#'
#' @param ts the input timeseries (0-based, left inclusive).
#' @param start the interval start.
#' @param end the interval end.
#' @useDynLib jmotif
#' @export
#' @examples
#' y = c(-1, -2, -1, 0, 2, 1, 1, 0)
#' subseries(y, 0, 3)
subseries <- function(ts, start, end) {
    .Call('_jmotif_subseries', PACKAGE = 'jmotif', ts, start, end)
}

#' Z-normalizes a time series by subtracting its mean and dividing by the standard deviation.
#'
#' @param ts the input time series.
#' @param threshold the z-normalization threshold value, if the input time series' standard
#' deviation will be found less than this value, the procedure will not be applied,
#' so the "under-threshold-noise" would not get amplified.
#' @useDynLib jmotif
#' @export
#' @references Dina Goldin and Paris Kanellakis,
#' On similarity queries for time-series data: Constraint specification and implementation.
#' In Principles and Practice of Constraint Programming (CP 1995), pages 137-153. (1995)
#' @examples
#' x = seq(0, pi*4, 0.02)
#' y = sin(x) * 5 + rnorm(length(x))
#' plot(x, y, type="l", col="blue")
#' lines(x, znorm(y, 0.01), type="l", col="red")
znorm <- function(ts, threshold = 0.01) {
    .Call('_jmotif_znorm', PACKAGE = 'jmotif', ts, threshold)
}

Try the jmotif package in your browser

Any scripts or data that you put into this service are public.

jmotif documentation built on March 26, 2020, 7:23 p.m.