R/jmotif.R

Defines functions sax_distance_matrix min_dist cosine_dist

Documented in cosine_dist min_dist sax_distance_matrix

#' @useDynLib jmotif
#' @importFrom Rcpp sourceCpp
NULL

#' Generates a SAX MinDist distance matrix (i.e. the "lookup table") for a given alphabet size.
#'
#' @param a_size the desired alphabet size (a value between 2 and 20, inclusive)
#' @return Returns a distance matrix (for SAX minDist) for a specified alphabet size
#' @export
#' @references Lonardi, S., Lin, J., Keogh, E., Patel, P.,
#' Finding motifs in time series.
#' In Proc. of the 2nd Workshop on Temporal Data Mining (pp. 53-68).
#' @examples
#' sax_distance_matrix(5)
sax_distance_matrix <- function(a_size) {
  if (a_size > 1 && a_size <= 20) {
    cutlines <- jmotif::alphabet_to_cuts(a_size)[2:a_size]
    distance_matrix <- matrix(rep(0, a_size * a_size), byrow = T, nrow = a_size, ncol = a_size)
    i <- 1
    while (i <= a_size) {
      # the min_dist for adjacent symbols are 0, so we start with i+2
      j <- i + 2;
      while (j <= a_size) {
        # square the distance now for future use
        distance_matrix[i,j] <- (cutlines[i] - cutlines[j - 1]) * (cutlines[i] - cutlines[j - 1])
        # the distance matrix is symmetric
        distance_matrix[j,i] <- distance_matrix[i,j]
        j <- j + 1
      }
      i <- i + 1
    }
    distance_matrix
  } else {
    stop(paste("unable to get a distance matrix for the alphabet size",a_size))
  }
}

#' Computes the mindist value for two strings
#'
#' @param str1 the first string
#' @param str2 the second string
#' @param alphabet_size the used alphabet size
#' @param compression_ratio the distance compression ratio
#' @return Returns the distance between strings
#' @export
#' @references Lonardi, S., Lin, J., Keogh, E., Patel, P.,
#' Finding motifs in time series.
#' In Proc. of the 2nd Workshop on Temporal Data Mining (pp. 53-68).
#' @examples
#' str1 <- c('a', 'b', 'c')
#' str2 <- c('c', 'b', 'a')
#' min_dist(str1, str2, 3)
min_dist <- function(str1, str2, alphabet_size, compression_ratio = 1) {
  if (length(str1) != length(str2)) {
    stop("error: the strings must have equal length")
  }else{
    if ( any(letters_to_idx(str1) > alphabet_size) |
        any(letters_to_idx(str2) > alphabet_size)) {
           stop('error: some symbol(s) in the string(s) exceed(s)
                the alphabet size!');
    }else{
      dist_table <- sax_distance_matrix(alphabet_size)
      dist <- 0
      dist <- sqrt(
         compression_ratio *
          sum(diag(dist_table[letters_to_idx(str1), letters_to_idx(str2)]) ^ 2)
        )
      dist
    }
  }
}

#' Computes the cosine similarity between numeric vectors
#'
#' @param m the data matrix
#' @return Returns the cosine similarity
#' @export
#' @examples
#' a <- c(2, 1, 0, 2, 0, 1, 1, 1)
#' b <- c(2, 1, 1, 1, 1, 0, 1, 1)
#' sim <- cosine_dist(rbind(a,b))
cosine_dist <- function(m) {
  stats::as.dist(1 - m %*% t(m) / (sqrt(rowSums(m ^ 2) %*% t(rowSums(m ^ 2)))))
}

Try the jmotif package in your browser

Any scripts or data that you put into this service are public.

jmotif documentation built on March 26, 2020, 7:23 p.m.