Nothing
#' Test if the object is a vlmc model
#'
#' This function returns `TRUE` for VLMC models and `FALSE` for other objects.
#'
#' @param x an R object.
#' @returns `TRUE` for VLMC models.
#' @export
#' @examples
#' pc <- powerconsumption[powerconsumption$week == 5, ]
#' dts <- cut(pc$active_power, breaks = c(0, quantile(pc$active_power, probs = c(0.25, 0.5, 0.75, 1))))
#' model <- vlmc(dts)
#' # should be true
#' is_ctx_tree(model)
#' # should be true
#' is_vlmc(model)
#' # should be false
#' is_covlmc(model)
is_vlmc <- function(x) {
inherits(x, "vlmc")
}
assertthat::on_failure(is_vlmc) <- function(call, env) {
paste0(deparse(call$x), " is not a vlmc")
}
kl_div <- function(p, q) {
pratio <- p / q
pratio <- ifelse(p == q, 1, pratio) ## handles p=q=0
sum(p * ifelse(pratio > 0, log(pratio), 0))
}
#' Cut off values for pruning the context tree of a VLMC
#'
#' This function returns a collection of cut off values that are guaranteed to
#' induce all valid pruned trees of the context tree of a VLMC. Pruning is
#' implemented by the [prune()] function.
#'
#' By default, the function returns values that can be used directly to induce
#' pruning in the context tree. This is done by computing the log likelihood
#' ratios used by the context algorithm on the reference VLMC and by keeping the
#' relevant ones. From them the function selects intermediate values that are
#' guaranteed to generate via pruning all the VLMC models that could be
#' generated by using larger values of the `cutoff` parameter that was used to
#' build the reference model (or smaller values of the `alpha` parameter in
#' "quantile" scale).
#'
#' Setting the `raw` parameter to `TRUE` removes this operation on the values
#' and asks the function to return the relevant log likelihood ratios.
#'
#' For large VLMC, some log likelihood ratios can be almost identical, with a
#' difference of the order of the machine epsilon value. The `tolerance`
#' parameter is used to keep only values that are different enough. This is done
#' in the native scale, before transformations implemented when `raw` is
#' `FALSE`.
#'
#' As automated model selection is provided by [tune_vlmc()], the direct use of
#' `cutoff` should be reserved to advanced exploration of the set of trees that
#' can be obtained from a complex one, e.g. to implement model selection
#' techniques that are not provided by [tune_vlmc()].
#'
#' @param model a fitted VLMC model.
#' @param scale specify whether the results should be "native" log likelihood
#' ratio values or expressed in a "quantile" scale of a chi-squared
#' distribution (defaults to "quantile").
#' @param raw specify whether the returned values should be limit values
#' computed in the model or modified values that guarantee pruning (see
#' details)
#' @param tolerance specify the minimum separation between two consecutive
#' values of the cut off in native mode (before any transformation). See
#' details.
#' @param ... additional arguments for the cutoff function.
#' @returns a vector of cut off values.
#' @examples
#' pc <- powerconsumption[powerconsumption$week == 5, ]
#' dts <- cut(pc$active_power, breaks = c(0, quantile(pc$active_power, probs = c(0.25, 0.5, 0.75, 1))))
#' model <- vlmc(dts)
#' draw(model)
#' model_cuts <- cutoff(model)
#' model_2 <- prune(model, model_cuts[2])
#' draw(model_2)
#' @seealso [prune()] and [tune_vlmc()]
#' @export
cutoff.vlmc <- function(model, scale = c("quantile", "native"), raw = FALSE,
tolerance = .Machine$double.eps^0.5, ...) {
scale <- match.arg(scale)
recurse_kl_cutoff <- function(model, p_probs) {
c_probs <- model$f_by / sum(model$f_by)
local_kl <- kl_div(c_probs, p_probs) * sum(model$f_by)
if (!is.null(model$children)) {
child_kl <- c()
for (v in seq_along(model$children)) {
if (length(model$children[[v]]) > 0) {
child_kl <- c(child_kl, recurse_kl_cutoff(model$children[[v]], c_probs))
}
}
if (local_kl > max(child_kl)) {
c(local_kl, child_kl)
} else {
child_kl
}
} else {
local_kl
}
}
pre_result <- relaxed_unique(
sort(recurse_kl_cutoff(model, model$f_by / sum(model$f_by))),
tolerance
)
guaranteed_pruning(pre_result, length(model$vals), scale, raw)
}
prune_ctx_tree <- function(tree, alpha = 0.05, cutoff = NULL, verbose = FALSE) {
recurse_prune_kl_ctx_tree <- function(tree, p_probs, ctx, K) {
c_probs <- tree$f_by / sum(tree$f_by)
if (!is.null(tree$children)) {
subtrees <- vector(mode = "list", length(tree$children))
nb_pruned <- 0L
for (v in seq_along(tree$children)) {
subtrees[[v]] <- recurse_prune_kl_ctx_tree(tree$children[[v]], c_probs, c(ctx, v - 1), K)
if (!is.null(subtrees[[v]][["kl"]])) {
if (subtrees[[v]]$kl < K) {
## let's prune it
if (verbose) { # nocov start
cat(paste("pruning", paste(c(ctx, v - 1), sep = "", collapse = ""), subtrees[[v]]$kl), "\n")
} # nocov end
subtrees[[v]] <- list()
nb_pruned <- nb_pruned + 1L
} else {
subtrees[[v]] <- subtrees[[v]]$tree
}
}
}
if (nb_pruned < length(tree$children)) {
tree$children <- subtrees
tree
} else {
tree$children <- NULL
kl <- kl_div(c_probs, p_probs) * sum(tree$f_by)
list(kl = kl, tree = tree)
}
} else {
kl <- kl_div(c_probs, p_probs) * sum(tree$f_by)
list(kl = kl, tree = tree)
}
}
if (is.null(cutoff)) {
K <- to_native(alpha, length(tree$vals))
} else {
K <- cutoff
}
pre_res <- recurse_prune_kl_ctx_tree(tree, tree$f_by / sum(tree$f_by), c(), K)
if (!is.null(pre_res$kl)) {
# empty result
pre_res <- new_ctx_tree(tree$vals, list(f_by = tree$f_by), class = "vlmc")
} else {
## compute stats
pre_res <- new_ctx_tree(pre_res$vals, pre_res, class = "vlmc")
}
## preserve construction information
pre_res$max_depth <- tree$max_depth
pre_res$keep_match <- tree$keep_match
pre_res$ix <- tree$ix
pre_res$data_size <- tree$data_size
pre_res
}
#' Prune a Variable Length Markov Chain (VLMC)
#'
#' This function prunes a VLMC.
#'
#' In general, pruning a VLMC is more efficient than constructing two VLMC (the
#' base one and pruned one). Up to numerical instabilities, building a VLMC with
#' a `a` cut off and then pruning it with a `b` cut off (with `a>b`) should
#' produce the same VLMC than building directly the VLMC with a `b` cut off.
#' Interesting cut off values can be extracted from a VLMC using the [cutoff()]
#' function.
#'
#' As automated model selection is provided by [tune_vlmc()], the direct use of `cutoff`
#' should be reserved to advanced exploration of the set of trees that can be
#' obtained from a complex one, e.g. to implement model selection techniques that
#' are not provided by [tune_vlmc()].
#'
#' @param vlmc a fitted VLMC model.
#' @param alpha number in (0,1] (default: 0.05) cut off value in quantile scale
#' for pruning.
#' @param cutoff positive number: cut off value in native (log likelihood ratio)
#' scale for pruning. Defaults to the value obtained from `alpha`. Takes
#' precedence over `alpha` if specified.
#' @param ... additional arguments for the prune function.
#'
#' @returns a pruned VLMC
#' @seealso [cutoff()] and [tune_vlmc()]
#' @export
#' @examples
#' pc <- powerconsumption[powerconsumption$week == 5, ]
#' dts <- cut(pc$active_power, breaks = c(0, quantile(pc$active_power, probs = c(0.25, 0.5, 0.75, 1))))
#' base_model <- vlmc(dts, alpha = 0.1)
#' model_cuts <- cutoff(base_model)
#' pruned_model <- prune(base_model, model_cuts[3])
#' draw(pruned_model)
#' direct_simple <- vlmc(dts, alpha = model_cuts[3])
#' draw(direct_simple)
#' # pruned_model and direct_simple should be identical
#' all.equal(pruned_model, direct_simple)
prune <- function(vlmc, alpha = 0.05, cutoff = NULL, ...) {
UseMethod("prune")
}
#' @rdname prune
#' @export
prune.vlmc <- function(vlmc, alpha = 0.05, cutoff = NULL, ...) {
if (is.null(cutoff)) {
if (is.null(alpha) || !is.numeric(alpha) || alpha <= 0 || alpha > 1) {
stop("the alpha parameter must be in (0, 1]")
}
}
result <- prune_ctx_tree(vlmc, alpha = alpha, cutoff = cutoff)
if (is.null(cutoff)) {
cutoff <- to_native(alpha, length(vlmc$vals))
} else {
## cutoff takes precedence
alpha <- to_quantile(cutoff, length(vlmc$vals))
}
result$alpha <- alpha
result$cutoff <- cutoff
## recompute the extended_ll
if (depth(result) > 0) {
result$ix <- result$ix[1:min(depth(result), length(result$ix))]
ivlmc <- match_ctx(result, result$ix)
result$extended_ll <- rec_loglikelihood_vlmc(ivlmc, TRUE)
} else {
result$extended_ll <- 0
result$ix <- NULL
}
## preserve match information
result$keep_match <- vlmc$keep_match
result$data_size <- vlmc$data_size
if (result$keep_match) {
## handle the case where the root is context
if (!is_full_node(result)) {
result$match <- 0:(result$data_size - 1)
}
}
## preserve the construction information
result$max_depth <- vlmc$max_depth
result$pruned <- TRUE
result
}
#' Fit a Variable Length Markov Chain (VLMC)
#'
#' This function fits a Variable Length Markov Chain (VLMC) to a discrete time
#' series.
#'
#' The VLMC is built using Bühlmann and Wyner's algorithm which consists in
#' fitting a context tree (see [ctx_tree()]) to a time series and then pruning
#' it in such as way that the conditional distribution of the next state of the
#' time series given the context is significantly different from the
#' distribution given a truncated version of the context.
#'
#' The construction of the context tree is controlled by `min_size` and
#' `max_depth`, exactly as in [ctx_tree()]. Significativity is measured using a
#' likelihood ratio test (threshold can be specified in terms of the ratio
#' itself with `cutoff`) or in quantile scale with `alpha`.
#'
#' Pruning can be postponed by setting `prune=FALSE`. Using a combination of
#' [cutoff()] and [prune()], the complexity of the VLMC can then be adjusted.
#' Any VLMC model can be pruned after construction, `prune=FALSE` is a
#' convenience parameter to avoid setting `alpha=1` (which essentially prevents
#' any pruning). Automated model selection is provided by [tune_vlmc()].
#'
#' @param x a discrete time series; can be numeric, character, factor or
#' logical.
#' @param alpha number in (0,1] (default: 0.05) cut off value in quantile scale
#' in the pruning phase.
#' @param cutoff non negative number: cut off value in native (likelihood ratio)
#' scale in the pruning phase. Defaults to the value obtained from `alpha`.
#' Takes precedence over `alpha` is specified.
#' @param min_size integer >= 1 (default: 2). Minimum number of observations for
#' a context in the growing phase of the context tree.
#' @param max_depth integer >= 1 (default: 100). Longest context considered in
#' growing phase of the context tree.
#' @param prune logical: specify whether the context tree should be pruned
#' (default behaviour).
#' @param keep_match logical: specify whether to keep the context matches
#' (default to FALSE)
#' @param backend "R" or "C++" (default: as specified by the "mixvlmc.backend"
#' option). Specifies the implementation used to represent the context tree
#' and to built it. See details.
#' @inheritSection ctx_tree Back ends
#'
#' @returns a fitted vlmc model.
#' @examples
#' pc <- powerconsumption[powerconsumption$week == 5, ]
#' dts <- cut(pc$active_power,
#' breaks = c(0, quantile(pc$active_power, probs = c(0.25, 0.5, 0.75, 1)))
#' )
#' model <- vlmc(dts)
#' draw(model)
#' depth(model)
#' ## reduce the detph of the model
#' shallow_model <- vlmc(dts, max_depth = 3)
#' draw(shallow_model, prob = FALSE)
#' ## improve probability estimates
#' robust_model <- vlmc(dts, min_size = 25)
#' draw(robust_model, prob = FALSE) ## show the frequencies
#' draw(robust_model)
#' @export
#' @seealso [cutoff()], [prune()] and [tune_vlmc()]
#' @references Bühlmann, P. and Wyner, A. J. (1999), "Variable length Markov
#' chains. Ann. Statist." 27 (2) 480-513 \doi{10.1214/aos/1018031204}
vlmc <- function(x, alpha = 0.05, cutoff = NULL, min_size = 2L, max_depth = 100L,
prune = TRUE, keep_match = FALSE,
backend = getOption("mixvlmc.backend", "R")) {
backend <- match.arg(backend, c("R", "C++"))
# data conversion
nx <- to_dts(x)
ix <- nx$ix
vals <- nx$vals
if (is.null(cutoff)) {
if (is.null(alpha) || !is.numeric(alpha) || alpha <= 0 || alpha > 1) {
stop("the alpha parameter must be in (0, 1]")
}
cutoff <- to_native(alpha, length(vals))
} else {
## cutoff takes precedence
if (!is.numeric(cutoff) || cutoff < 0) {
stop("the cutoff parameter must be a non negative number")
}
alpha <- to_quantile(cutoff, length(vals))
}
if (length(vals) > max(10, 0.05 * length(x))) {
warning(paste0("x as numerous unique values (", length(vals), ")"))
}
if (backend == "R") {
ctx_tree <- grow_ctx_tree(ix, vals, min_size = min_size, max_depth = max_depth, compute_stats = !prune, keep_match = keep_match)
result <- ctx_tree
if (prune) {
result <- prune_ctx_tree(ctx_tree, alpha = alpha, cutoff = cutoff)
} else {
result <- new_ctx_tree(result$vals, result, class = "vlmc")
}
} else {
cpp_tree <- build_suffix_tree(rev(ix)[-1], length(nx$vals))
cpp_tree$compute_counts(ix[length(ix)], keep_match)
if (prune) {
cpp_tree$prune_context(min_size, max_depth, cutoff)
} else {
cpp_tree$prune(min_size, max_depth)
}
result <- new_ctx_tree_cpp(vals, cpp_tree, class = c("vlmc_cpp", "vlmc"))
result$root$make_explicit()
result$root$compute_reverse()
## with the C++ backend, max_depth is only used during pruning
result$max_depth <- FALSE
result$restoration <- cpp_tree$restoration_info()
}
## prepare for loglikelihood calculation
result$alpha <- alpha
result$cutoff <- cutoff
if (depth(result) > 0) {
result$ix <- ix[1:min(depth(result), length(x))]
if (backend == "R") {
ivlmc <- match_ctx(result, result$ix)
result$extended_ll <- rec_loglikelihood_vlmc(ivlmc, TRUE)
} else {
result$extended_ll <- result$root$loglikelihood(result$ix, 0, TRUE, FALSE)
}
} else {
result$extended_ll <- 0
}
result$keep_match <- keep_match
result$data_size <- length(x)
if (backend == "R" && keep_match) {
## handle the case where the root is context
if (!is_full_node(result)) {
result$match <- 0:(length(x) - 1)
}
}
result$pruned <- prune
result
}
#' @export
print.vlmc <- function(x, ...) {
cat(paste(
"VLMC context tree on",
paste(x$vals, collapse = ", ")
), "\n")
cat(paste(" cutoff: ", signif(x$cutoff, 4), " (quantile: ", signif(x$alpha, 4), ")\n", sep = ""))
if (!is.null(x$nb_ctx)) {
cat(paste(" Number of contexts:", x$nb_ctx, "\n"))
}
if (!is.null(x$depth)) {
cat(paste(" Maximum context length:", x$depth, "\n"))
}
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.