#' Bag of Patterns (K-Nearest Neighbours)
#'
#' Fit a Bag of Patterns Model with KNN
#'
#' This function fits a bag of patterns model by using Symbolic Aggregate
#' eXpressions (SAX) to convert a data frame of time series into a count table
#' of 'words' as generated by SAX generated by sliding windows over the series.
#' New examples are then fit by finding nearest neighbor examples with the converted
#' training data.
#' @param data a data frame where each row is a time series, along with a column for class
#' @param target the name of the column where the class of each row is stored
#' @param window_size The size of the sliding windows as applied to the time series, either as a fraction of the length or an integer of precise length.
#' @param sparse_windows a logical, indicating whether `sqrt(m)` random windows should be taken instead of all
#' @param normalize a logical, indicating whether each window should be z-normalized (`(x - mean(x)/sd(x)`)
#' @param alphabet_size the number of distinct letters to use in the compressed SAX representation
#' @param word_size the size of the 'words' generated out of the alphabet by SAX
#' @param breakpoints the method used to assign letters (see `seewave::SAX`)
#' @param word_weighting The weighting function for the DTM/TDM (default is term-frequency, effectively unweighted)
#' @param maximum_sparsity A optional numeric for the maximal allowed sparsity in the range from bigger zero to smaller one.
#' @param verbose whether to print the progress of model creation.
#' @param ... arguments to be passed to `FNN:knn`
#' @examples
#' data("FaceAll_TRAIN")
#' model <- bagofpatterns_knn(FaceAll_TRAIN, window_size = .9, alphabet_size = 2, word_size = 2)
#' @export
bagofpatterns_knn <- function(data,
target = "target",
window_size = .2,
sparse_windows = FALSE,
normalize = FALSE,
alphabet_size = 4,
word_size = 8,
breakpoints = "quantiles",
word_weighting = tm::weightTf,
maximum_sparsity = NA,
verbose = TRUE,
k = 3,
algorithm = "kd_tree",
prob = FALSE) {
# Validate KNN parameters
if (!is.numeric(k) || k < 1 || k != round(k)) {
stop("'k' must be a positive integer", call. = FALSE)
}
valid_algorithms <- c("kd_tree", "cover_tree", "brute")
if (!(algorithm %in% valid_algorithms)) {
stop("'algorithm' must be one of: ", paste(valid_algorithms, collapse = ", "), call. = FALSE)
}
# Fit the bag of patterns model
model_data <- fit_bagofpatterns(data = data,
target = target,
window_size = window_size,
sparse_windows = sparse_windows,
normalize = normalize,
alphabet_size = alphabet_size,
word_size = word_size,
breakpoints = breakpoints,
word_weighting = word_weighting,
maximum_sparsity = maximum_sparsity,
verbose = verbose)
# Store KNN parameters
model_data$model_args <- list(
k = k,
algorithm = algorithm,
prob = prob
)
return(model_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.