#' Pan-Matrix Profile
#'
#' Computes the Pan-Matrix Profile (PMP) for the given time series.
#'
#' The work closest in spirit to ours is VALMOD. The idea of VALMOD is to compute the MP for
#' the shortest length of interest, then use the information gleaned from it to guide a search
#' through longer subsequence lengths, exploiting lower bounds to prune off some calculations.
#' This idea works well for the first few of the longer subsequence lengths, but the lower bounds
#' progressively weaken, making the pruning ineffective. Thus, in the five case studies they
#' presented, the mean value of U/L was just 1.24. In contrast, consider that our termite example
#' in Fig. 15 has a U/L ratio of 240, more than two orders of magnitude larger. Thus, VALMOD is
#' perhaps best seen as finding motifs with some tolerance for a slightly (~25%) too short
#' user-specified query length, rather than a true "motif-of-all-lengths" algorithm. Also note
#' that apart from the shortest length, VALMOD only gives some information for the other lengths,
#' unlike pmp, which contains exact distances for all subsequences of all lengths.
#'
#' @param data a `matrix` or a `vector` of `numeric`.
#' @param window_sizes a `vector` of the window sizes that will be evaluated. They will be rounded to the lower integer
#' and sorted. (Default is a sequence of 20 values from 10 to half data size).
#' @param plot a `logical`. If `TRUE`, every new computation will be plotted. (Default is `FALSE`).
#' @param pmp_obj a `PMP` object that may or not contain an upper bound value, and previous computed profiles. The function will
#' add new profiles, not replace. (Default is `NULL`).
#' @param n_workers an `int`. Number of workers for parallel. (Default is `1`).
#' @param verbose an `int`. See details. (Default is `2`).
#'
#' @details
#' When just the `data` is provided, the exploration will be done using the default `window_sizes` that is a sequence
#' of 20 values between 10 and the half data size and the resulting object will have an `upper_bound` equals to `Inf`.
#' If an object is provided by the argument `pmp_obj`, this function will add more information to the resulting object,
#' never changing the values already computed.
#' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2`
#' adds the progress bar, `3` adds the finish sound.
#'
#' Talk about upper bound and window sizes
#' 1. upper_window will be set to Inf on new objects
#' 1.1. upper_window will also be used for plot, and for discovery, it must not remove any existing data from the object
#' 2. window_sizes is used for plot, it must not remove any mp inside the object
#' 2.1. window_sizes tells the function what mp are stored, it may be updated with as.numeric(names(pmp))
#' 3. the functions must be capable to handle the data without need to sort by window_size, but sort may be useful later(?)
#'
#' @return Returns a `PMP` object.
#' @export
#'
#' @examples
#' # Just compute
#' pan <- pmp(mp_gait_data)
#' \dontrun{
#' # Compute the upper bound, than add new profiles
#' pan <- pmp_upper_bound(mp_gait_data)
#' pan <- pmp(mp_gait_data, pmp_obj = pan)
#' }
pmp <- function(data,
window_sizes = seq.int(from = 10, to = length(data) / 2, length.out = 20),
plot = FALSE,
pmp_obj = NULL,
n_workers = 1,
verbose = getOption("tsmp.verbose", 2)) {
# Parse arguments ---------------------------------
window_sizes <- floor(window_sizes)
checkmate::qassert(data, "N+")
checkmate::qassert(window_sizes, "X+")
checkmate::qassert(plot, "B")
checkmate::qassert(pmp_obj, c("0", "l"))
checkmate::qassert(n_workers, paste0("X1[1,", parallel::detectCores(), "]"))
checkmate::qassert(verbose, "X1")
# checks if the given object is actualy a skimp object
if (!is.null(pmp_obj)) {
if (class(pmp_obj) != "PMP") {
stop("`pmp_obj` must be of class `PMP`")
}
}
## Prepare things ----
data_size <- length(data)
# if an object is given, remove the windows that already have been computed and are below the upper_window
if (!is.null(pmp_obj)) {
# remove already computed
window_sizes <- window_sizes[!(window_sizes %in% pmp_obj$w)]
if (!is.null(pmp_obj$upper_window)) {
# remove those above the upper_window
window_sizes <- window_sizes[window_sizes < pmp_obj$upper_window]
} else {
# else, keep them and set the upper_window to Inf, since it is NULL
pmp_obj$upper_window <- Inf
}
}
# get the extreme values and sort the window_sizes for later binary_split correctly
min_window <- min(window_sizes)
max_window <- max(window_sizes)
window_sizes <- sort(window_sizes)
# print(str(list(min_window = min_window, max_window = max_window, window_sizes = window_sizes)))
# if we'll plot while computing, prepare the canvas
if (plot == TRUE) {
# if an object is given, plot it using existing windows
if (!is.null(pmp_obj)) {
graphics::plot(pmp_obj) # skimp_plot_set_canvas(pmp_obj = pmp_obj)
Sys.sleep(1) # needed for plot update
} else {
# create a blank canvas with the proper size
skimp_plot_set_canvas(
ymin = min_window,
ymax = max_window + floor((max_window - min_window) / 24), # arbitrary
xmin = 1,
xmax = data_size
)
Sys.sleep(1) # needed for plot update
}
}
# anytime must return the result always
on.exit(
{
if (is.null(pmp_obj)) {
return(NULL)
} else {
# final arrangements to existing object
expected_profiles <- as.numeric(names(pmp_obj$pmp))
expected_indexes <- as.numeric(names(pmp_obj$pmpi))
# check if the windows vector contains any uncomputed matrix profile
if (any(!(pmp_obj$w %in% expected_profiles))) {
warning("`windows` contains values not computed in `pmp`")
}
# check if there is any matrix profile without a profile index
if (any(!(expected_profiles %in% expected_indexes))) {
warning("`pmp` contains values without respective `pmpi")
}
# check if there is any profile index without a matrix profile
if (any(!(expected_indexes %in% expected_profiles))) {
warning("`pmpi` contains values without respective `pmp")
}
# if (sorted == TRUE) {
# sorted <- sort(as.numeric(names(pmp_obj$pmp)), index.return = TRUE)
# window_sizes <- sorted$x
#
# pmp_obj$pmp <- pmp_obj$pmp[sorted$ix]
# pmp_obj$pmpi <- pmp_obj$pmpi[sorted$ix]
# } else {
# window_sizes <- as.numeric(names(pmp_obj$pmp))
# }
pmp_obj$ez <- getOption("tsmp.exclusion_zone", 1 / 2)
return(pmp_obj)
}
},
TRUE
)
# if not given, create a new object to start with
if (is.null(pmp_obj)) {
pmp_obj <- list(pmp = list(), pmpi = list())
class(pmp_obj) <- "PMP"
}
# Determine the order in which we will explore the window_sizes
split_idx <- binary_split(length(window_sizes))
## Begin the main Loop ----
for (i in seq_along(split_idx)) {
# i holds the sequence from 1 to ...length(split_idx)
# idx holds the actual binary split index
idx <- split_idx[i]
# w holds the current window being explored
w <- window_sizes[idx]
if (is.na(w) || w == 0) {
warning("Invalid window size ", w)
next
}
# Run Matrix Profile
result <- mpx(data = data, window_size = w, idx = TRUE, dist = "euclidean", n_workers = n_workers)
if (verbose > 0) {
message(
"step: ", i, "/", length(split_idx), " binary idx: ", idx, " window: ", w
)
}
# if pmp_obj is a new empty object, accessing windows will return NULL, so it's fine
pmp_obj$w <- c(pmp_obj$w, w)
# using character to create a tuple list. Numbers would create NULL's
pmp_obj$pmp[[as.character(w)]] <- result$mp
pmp_obj$pmpi[[as.character(w)]] <- result$pi
if (plot == TRUE) {
# add a layer to the plot. `pmp_obj$w` is currently used to know the heigth of
# the new layer. May be room to improve.
skimp_plot_add_layer(result$mp, w, pmp_obj$w)
Sys.sleep(1) # needed for plot update
}
}
# A dict with the following:
# {
# 'pmp': the pan matrix profile as a 2D array,
# 'pmpi': the pmp indices,
# 'data': {
# 'ts': time series used,
# },
# 'windows': the windows used to compute the pmp,
# 'sample_pct': the sample percent used,
# 'metric':The distance metric computed for the pmp,
# 'algorithm': the algorithm used,
# 'class': PMP
# }
} # function skimp
#' Pan Matrix Profile upper bound
#'
#' Finds the upper bound for Pan Matrix Profile calculation.
#'
#' @param data a `matrix` or a `vector` of `numeric`.
#' @param threshold a `numeric`. Correlation threshold. See details. (Default is `0.95`).
#' @param refine_stepsize a `numeric`. Step size for the last upper bound search. See details. (Default is `0.25`).
#' @param return_pmp a `logical`. If `TRUE`, returns the computed data as a `PMP` object, if `FALSE`,
#' returns just the upper bound value. (Default is `TRUE`).
#' @param n_workers an `int`. Number of workers for parallel. (Default is `1`).
#' @param verbose verbose an `int`. See details. (Default is `2`).
#'
#' @details
#' The Pan Matrix Profile may not give any further information beyond a certain window size. This function starts
#' computing the matrix profile for the window size of 8 and doubles it until the minimum correlation value found is
#' less than the `threshold`. After that, it begins to refine the upper bound using the `refine_stepsize` values, until
#' the `threshold` value is hit.
#'
#' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2`
#' adds the progress bar, `3` adds the finish sound.
#'
#' @return Returns a `PMP` object with computed data, or just the upper bound value if `return_pmp` is set to `FALSE`.
#' @export
#'
#' @references * Yet to be announced
#' @references Website: <http://www.cs.ucr.edu/~eamonn/MatrixProfile.html>
#'
#' @examples
#' # return the object
#' pan_matrix <- pmp_upper_bound(mp_gait_data)
#'
#' # just the upper bound
#' pan_ub <- pmp_upper_bound(mp_gait_data, return_pmp = FALSE)
pmp_upper_bound <- function(data,
threshold = getOption("tsmp.pmp_ub", 0.95),
refine_stepsize = getOption("tsmp.pmp_refine", 0.25),
return_pmp = TRUE, n_workers = 1,
verbose = getOption("tsmp.verbose", 2)) {
correlation_max <- Inf
if (return_pmp) {
pmp <- list()
pmpi <- list()
do_idxs <- TRUE
} else {
do_idxs <- FALSE
}
correlation_max <- Inf
window_size <- 8
windows <- NULL
max_window <- floor(length(data) / 2)
# Register the anytime exit point
on.exit(
{
if (return_pmp) {
pmp_obj <- list(upper_window = max(windows), pmp = pmp, pmpi = pmpi, w = windows)
class(pmp_obj) <- "PMP"
return(pmp_obj)
} else {
return(window_size)
}
},
TRUE
)
# first perform a wide search by increasing window by 2 in each iteration
while (window_size <= max_window) {
# message("window: ", window_size)
result <- mpx(data = data, window_size = window_size, idx = do_idxs, dist = "pearson", n_workers = n_workers)
if (is.null(result) || result$partial) {
warning("The computation was terminated prematurely. The results are partial.")
return()
}
correlation_max <- max(result$mp[!is.infinite(result$mp)], na.rm = TRUE)
if (correlation_max < threshold) {
# message("break at ", window_size)
break
}
if (return_pmp) {
pmp[[as.character(window_size)]] <- corr_ed(result$mp, window_size)
pmpi[[as.character(window_size)]] <- result$pi
}
windows <- c(windows, window_size)
window_size <- window_size * 2
}
if (window_size <= max_window) {
# refine the upper bound by increase by + X% increments
test_windows <- 2 * round(((seq(refine_stepsize, 1 - 1e-5, refine_stepsize) + 1) * window_size / 2) / 2)
for (window_size in test_windows) {
# message("refine window: ", window_size)
result <- mpx(data = data, window_size = window_size, idx = do_idxs, dist = "pearson", n_workers = n_workers)
if (is.null(result) || result$partial) {
warning("The computation was terminated prematurely. The results are partial.")
return()
}
windows <- c(windows, window_size)
correlation_max <- max(result$mp[!is.infinite(result$mp)], na.rm = TRUE)
if (return_pmp) {
pmp[[as.character(window_size)]] <- corr_ed(result$mp, window_size)
pmpi[[as.character(window_size)]] <- result$pi
}
if (correlation_max < threshold) {
# message("break refine at ", window_size)
break
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.