#' Create simple PPM model
#'
#' Creates a simple PPM model, that is, a PPM model
#' without any non-traditional features such as memory decay.
#'
#' @param alphabet_size
#' (Integerish scalar)
#' The size of the alphabet upon which the model will be trained and tested.
#' If not provided, this will be taken as \code{length(alphabet_levels)}.
#'
#' @param order_bound
#' (Integerish scalar)
#' The model's Markov order bound. For example, an order bound of two means
#' that the model makes predictions based on the two preceding symbols.
#'
#' @param shortest_deterministic
#' (Logical scalar)
#' If TRUE, the model will 'select' the shortest available order
#' that provides a deterministic prediction, if such an order exists,
#' otherwise defaulting to the longest available order.
#' For a given prediction, if this rule results in a lower model order
#' than would have otherwise been selected,
#' then full counts (not update-excluded counts) will be used for
#' the highest model order (but not for lower model orders).
#' This behaviour matches the implementations of PPM* in
#' \insertCite{Pearce2005;textual}{ppm} and
#' \insertCite{Bunton1996;textual}{ppm}.
#'
#' @param exclusion
#' (Logical scalar)
#' If TRUE, implements exclusion as defined in
#' \insertCite{Pearce2005;textual}{ppm} and
#' \insertCite{Bunton1996;textual}{ppm}.
#'
#' @param update_exclusion
#' (Logical scalar)
#' If TRUE, implements update exclusion as defined in
#' \insertCite{Pearce2005;textual}{ppm} and
#' \insertCite{Bunton1996;textual}{ppm}.
#'
#' @param escape
#' (Character scalar)
#' Takes values 'a', 'b', 'c', 'd', or 'ax',
#' corresponding to the eponymous escape methods
#' in \insertCite{Pearce2005;textual}{ppm}.
#' Note that there is a mistake in the definition of escape method
#' "AX" in \insertCite{Pearce2005;textual}{ppm};
#' the denominator of lambda needs to have 1 added.
#' This is what we implement here. Note that Pearce's
#' LISP implementation correctly adds 1 here, like us.
#'
#' @param debug_smooth
#' (Logical scalar)
#' Whether to print (currently rather messy and ad hoc) debug output
#' for smoothing.
#'
#' @param alphabet_levels
#' (Character vector)
#' Optional vector of levels for the alphabet. If provided,
#' these will be used to define factor levels for the output.
#'
#' @note
#' The implementation does not scale well to very large order bounds (> 50).
#'
#' @references
#' \insertAllCited{}
#'
#' @return
#' A PPM model object.
#' These objects have reference semantics.
#'
#' @seealso
#' \code{\link{new_ppm_decay}},
#' \code{\link{model_seq}}.
#'
#' @export
new_ppm_simple <- function(
alphabet_size,
order_bound = 10L,
shortest_deterministic = TRUE,
exclusion = TRUE,
update_exclusion = TRUE,
escape = "c",
debug_smooth = FALSE,
alphabet_levels = character()
) {
if (missing(alphabet_size) && length(alphabet_levels) > 0) {
alphabet_size <- length(alphabet_levels)
}
if (length(alphabet_levels) > 0 && length(alphabet_levels) != alphabet_size) {
stop("length(alphabet_levels) must equal alphabet_size.")
}
checkmate::qassert(alphabet_size, "X1")
checkmate::qassert(order_bound, "X[0,)")
checkmate::qassert(shortest_deterministic, "B1")
checkmate::qassert(exclusion, "B1")
checkmate::qassert(update_exclusion, "B1")
checkmate::qassert(escape, "S1")
checkmate::qassert(debug_smooth, "B1")
checkmate::qassert(alphabet_levels, "S")
valid_escape_methods <- c("a", "b", "c", "d", "ax")
if (!escape %in% valid_escape_methods)
stop("escape parameter must be one of: ",
paste(valid_escape_methods, collapse = ", "))
mod <- new(
ppm_simple,
alphabet_size = as.integer(alphabet_size),
order_bound = as.integer(order_bound),
shortest_deterministic = shortest_deterministic,
exclusion = exclusion,
update_exclusion = update_exclusion,
escape = escape,
alphabet_levels
)
mod$debug_smooth <- debug_smooth
mod
}
#' Create decay-based PPM model
#'
#' Creates a decay-based PPM model.
#'
#' Decay-based PPM models generalise the PPM algorithm to incorporate
#' memory decay, where the effective counts of observed n-grams
#' decrease over time to reflect processes of auditory memory.
#'
#' The weight of a given n-gram over time is determined by a \emph{decay kernel}.
#' This decay kernel is parametrised by the arguments
#' \eqn{w_0}, \eqn{w_1}, \eqn{w_2}, \eqn{w_\infty},
#' \eqn{n_b}, \eqn{t_b}, \eqn{t_1}, \eqn{t_2}, \eqn{\sigma_\epsilon}
#' (see above).
#' These parameters combine to define a decay kernel of the following form:
#'
#' \if{latex}{\figure{example-decay-kernel.png}{options: width=6in}}
#' \if{html}{\figure{example-decay-kernel.png}{options: width=450}}
#'
#' The decay kernel has three phases:
#'
#' - Buffer (yellow);
#' - Short-term memory (red);
#' - Long-term mermory (blue).
#'
#' While within the buffer, the n-gram has weight \eqn{w_0}.
#' The buffer has limited temporal and itemwise capacity.
#' In particular, an n-gram will leave the buffer once one
#' of two conditions is satisfied:
#'
#' - A set amount of time, \eqn{t_b}, elapses since the first symbol in the n-gram was observed, or
#' - The buffer exceeds the number of symbols it can store, \eqn{n_b},
#' and the n-gram no longer fits completely in the buffer,
#' having been displaced by new symbols.
#'
#' There are some subtleties about how this actually works in practice,
#' refer to \insertCite{Harrison2020;textual}{ppm} for details.
#'
#' The second phase, short-term memory, begins as soon as the
#' buffer phase completes. It has a fixed temporal duration
#' of \eqn{t_1}. At the beginning of this phase,
#' the n-gram has weight \eqn{w_1};
#' during this phase, its weight decays exponentially until it reaches
#' \eqn{w_2} at timepoint \eqn{t_2}.
#'
#' The second phase, long-term memory, begins as soon as the
#' short-term memory phase completes. It has an unlimited temporal duration.
#' At the beginning of this phase,
#' the n-gram has weight \eqn{w_2};
#' during this phase, its weight decays exponentially
#' to an asymptote of \eqn{w_\infty}.
#'
#' The model optionally implements Gaussian noise at the weight retrieval stage.
#' This Gaussian is parametrised by the standard deviation parameter
#' \eqn{\sigma_\epsilon}.
#' See \insertCite{Harrison2020;textual}{ppm} for details.
#'
#' This function supports simpler decay functions with fewer stages;
#' in fact, the default parameters define a one-stage decay function,
#' corresponding to a simple exponential decay with a half life of 10 s.
#' To enable the buffer, \code{buffer_length_time} and \code{buffer_length_items}
#' should be made non-zero, and \code{only_learn_from_buffer} and
#' \code{only_predict_from_buffer} should be set to \code{TRUE}.
#' Likewise, retrieval noise is enabled by setting \code{noise} to a non-zero value,
#' and the short-term memory phase is enabled by setting \code{stm_duration}
#' to a non-zero value.
#'
#' The names of the 'short-term memory' and 'long-term memory' phases
#' should be considered arbitrary in this context;
#' they do not necessarily correspond directly to their
#' psychological namesakes, but are instead simply terms of convenience.
#'
#' The resulting PPM-Decay model uses interpolated smoothing with escape method A,
#' and explicitly disables exclusion and update exclusion.
#' See \insertCite{Harrison2020;textual}{ppm} for details.
#'
#' @param order_bound
#' (Integerish scalar)
#' The model's Markov order bound.
#'
#' @param ltm_weight
#' (Numeric scalar)
#' \eqn{w_2}, initial weight in the long-term memory phase.
#'
#' @param ltm_half_life
#' (Numeric scalar)
#' \eqn{t_2}, half life of the long-term memory phase.
#' Must be greater than zero.
#'
#' @param ltm_asymptote
#' (Numeric scalar)
#' \eqn{w_\infty}, asymptotic weight as time tends to infinity.
#'
#' @param noise
#' (Numeric scalar)
#' \eqn{\sigma_\epsilon}, scale parameter for the retrieval noise distribution.
#'
#' @param stm_weight
#' (Numeric scalar)
#' \eqn{w_1}, initial weight in the short-term memory phase.
#'
#' @param stm_duration
#' (Numeric scalar)
#' \eqn{t_1}, temporal duration of the short-term memory phase, in seconds.
#'
#' @param buffer_weight
#' (Numeric scalar)
#' \eqn{w_0}, weight during the buffer phase.
#'
#' @param buffer_length_time
#' (Numeric scalar)
#' \eqn{n_b}, the model's temporal buffer capacity.
#'
#' @param buffer_length_items
#' (Integerish scalar)
#' \eqn{t_b}, the model's itemwise buffer capacity.
#'
#' @param only_learn_from_buffer
#' (Logical scalar)
#' If TRUE, then n-grams are only learned if they fit within
#' the memory buffer. The default value is \code{FALSE}.
#'
#' @param only_predict_from_buffer
#' (Logical scalar)
#' If TRUE, then the context used for prediction is limited by the memory buffer.
#' Specifically, for a context to be used for prediction,
#' the first symbol within that context must still be within the buffer
#' at the point immediately before the predicted event occurs.
#' The default value is \code{FALSE}.
#'
#' @param seed
#' Random seed for prediction generation.
#' By default this is linked with R's random seed, such that
#' reproducible behaviour can be ensured as usual with the
#' \code{\link{set.seed}} function.
#'
#' @param debug_smooth
#' (Logical scalar)
#' Whether to print (currently rather messy and ad hoc) debug output
#' for the smoothing mechanism.
#'
#' @param debug_decay
#' (Logical scalar)
#' Whether to print (currently rather messy and ad hoc) debug output
#' for the decay mechanism.
#'
#' @return
#' A PPM-decay model object.
#' These objects have reference semantics.
#'
#' @seealso
#' \code{\link{new_ppm_simple}},
#' \code{\link{model_seq}}.
#'
#' @inheritParams new_ppm_simple
#'
#' @md
#'
#' @references
#' \insertAllCited{}
#'
#' @export
new_ppm_decay <- function(
alphabet_size,
order_bound = 10L,
ltm_weight = 1,
ltm_half_life = 10,
ltm_asymptote = 0,
noise = 0,
stm_weight = 1,
stm_duration = 0,
buffer_weight = 1,
buffer_length_time = 0,
buffer_length_items = 0L,
only_learn_from_buffer = FALSE,
only_predict_from_buffer = FALSE,
seed = sample.int(.Machine$integer.max, 1),
debug_smooth = FALSE,
debug_decay = FALSE,
alphabet_levels = character()
) {
if (missing(alphabet_size) && length(alphabet_levels) > 0) {
alphabet_size <- length(alphabet_levels)
}
if (length(alphabet_levels) > 0 && length(alphabet_levels) != alphabet_size) {
stop("length(alphabet_levels) must equal alphabet_size.")
}
checkmate::qassert(alphabet_size, "X1")
checkmate::qassert(order_bound, "X[0,)")
checkmate::qassert(ltm_weight, "N1[0,)")
checkmate::qassert(ltm_half_life, "N1(0,)")
checkmate::qassert(ltm_asymptote, "N1[0,)")
checkmate::qassert(noise, "N1[0,)")
checkmate::qassert(stm_weight, "N1[0,)")
checkmate::qassert(stm_duration, "N1[0,)")
checkmate::qassert(buffer_length_time, "N1[0,)")
checkmate::qassert(buffer_length_items, "X1[0,)")
checkmate::qassert(buffer_weight, "N1[0,)")
checkmate::qassert(only_learn_from_buffer, "B1")
checkmate::qassert(only_predict_from_buffer, "B1")
checkmate::qassert(debug_smooth, "B1")
checkmate::qassert(debug_decay, "B1")
checkmate::qassert(alphabet_levels, "S")
decay_par = list(
ltm_weight = as.numeric(ltm_weight),
ltm_half_life = as.numeric(ltm_half_life),
ltm_asymptote = as.numeric(ltm_asymptote),
noise = as.numeric(noise),
stm_weight = as.numeric(stm_weight),
stm_duration = as.numeric(stm_duration),
buffer_weight = as.numeric(buffer_weight),
buffer_length_time = as.numeric(buffer_length_time),
buffer_length_items = as.integer(buffer_length_items),
only_learn_from_buffer = as.logical(only_learn_from_buffer),
only_predict_from_buffer = as.logical(only_predict_from_buffer)
)
mod <- new(
ppm_decay,
alphabet_size = as.integer(alphabet_size),
order_bound = as.integer(order_bound),
decay_par = decay_par,
seed = as.integer(seed),
alphabet_levels = alphabet_levels
)
mod$debug_decay <- debug_decay
mod$debug_smooth <- debug_smooth
mod
}
#' Is 'x' a 'ppm' object?
#'
#' Tests for objects of class "ppm".
#'
#' @param x Object to test.
#'
#' @return TRUE if the object is of class "ppm", FALSE otherwise.
#' @export
is_ppm <- function(x) {
is_ppm_simple(x) || is_ppm_decay(x)
}
#' Is 'x' a 'ppm_simple' object?
#'
#' Tests for objects of class "ppm_simple".
#'
#' @param x Object to test.
#'
#' @return TRUE if the object is of class "ppm_simple", FALSE otherwise.
#' @export
is_ppm_simple <- function(x) {
is(x, "Rcpp_ppm_simple")
}
#' Is 'x' a 'ppm_decay' object?
#'
#' Tests for objects of class "ppm_decay".
#'
#' @param x Object to test.
#'
#' @return TRUE if the object is of class "ppm_decay", FALSE otherwise.
#' @export
is_ppm_decay <- function(x) {
is(x, "Rcpp_ppm_decay")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.