R/class-mod_cpt.R

Defines functions diagnose.mod_cpt regions.mod_cpt print.mod_cpt plot.mod_cpt autoregress_errors glance.mod_cpt tidy.mod_cpt augment.mod_cpt changepoints.mod_cpt model_name.mod_cpt coef.mod_cpt model_variance residuals.mod_cpt fitted.mod_cpt as.logLik logLik.mod_cpt nobs.mod_cpt as.ts.mod_cpt mod_cpt validate_mod_cpt new_mod_cpt

Documented in as.ts.mod_cpt augment.mod_cpt changepoints.mod_cpt coef.mod_cpt diagnose.mod_cpt fitted.mod_cpt glance.mod_cpt logLik.mod_cpt mod_cpt model_name.mod_cpt model_variance new_mod_cpt nobs.mod_cpt plot.mod_cpt print.mod_cpt regions.mod_cpt residuals.mod_cpt tidy.mod_cpt validate_mod_cpt

globalVariables(c("bmdl", "nhpp", "cpt_length", "value", ".fitted", ".resid"
                  , "xintercept"))

#' @rdname mod_cpt
#' @export
new_mod_cpt <- function(x = numeric(), 
                       tau = integer(),
                       region_params = tibble::tibble(),
                       model_params = double(),
                       fitted_values = double(), 
                       model_name = character(), ...) {
  stopifnot(is.numeric(x))
  structure(
    list(
      data = stats::as.ts(x),
      tau = tau,
      region_params = region_params,
      model_params = model_params,
      fitted_values = fitted_values,
      model_name = model_name
    ), 
    class = "mod_cpt"
  )
}

#' @rdname mod_cpt
#' @export
validate_mod_cpt <- function(x) {
  if (!stats::is.ts(as.ts(x))) {
    stop("data attribute is not coercible into a ts object.")
  }
  if (!is_valid_tau(x$tau, nobs(x))) {
#    message("Removing 1 and/or n from tau...")
    x$tau <- validate_tau(x$tau, nobs(x))
  }
  x
}

#' Base class for changepoint models
#' @description
#' Create changepoint detection model objects
#' @details
#' Changepoint detection models know how they were created, on what data set,
#' about the optimal changepoint set found, and the parameters that were fit
#' to the model. 
#' Methods for various generic reporting functions are provided. 
#' 
#' All changepoint detection models inherit from [mod_cpt]: the 
#' base class for changepoint detection models. 
#' These models are created by one of the `fit_*()` functions, or by 
#' [as.model()]. 
#' 
#' @export
#' @param x a numeric vector coercible into a `ts` object
#' @param tau indices of the changepoint set
#' @param region_params A [tibble::tibble()] with one row for each region 
#' defined by the changepoint set `tau`. Each variable represents a parameter
#' estimated in that region. 
#' @param model_params A numeric vector of parameters estimated by the model
#' across the entire data set (not just in each region). 
#' @param fitted_values Fitted values returned by the model on the original
#' data set. 
#' @param model_name A `character` vector giving the model's name. 
#' @param ... currently ignored
#' @returns A [mod_cpt] object
#' @seealso [as.model()]
#' @examples
#' cpt <- mod_cpt(CET)
#' str(cpt)
#' as.ts(cpt)
#' changepoints(cpt)
mod_cpt <- function(x, ...) {
  obj <- new_mod_cpt(x, ...)
  validate_mod_cpt(obj)
}

#' @rdname reexports
#' @export
as.ts.mod_cpt <- function(x, ...) {
  as.ts(x$data)
}

#' @rdname reexports
#' @export
nobs.mod_cpt <- function(object, ...) {
  length(as.ts(object))
}

#' @rdname reexports
#' @export
logLik.mod_cpt <- function(object, ...) {
  sigma_hatsq <- model_variance(object)
  if ("durbin_watson" %in% names(object)) {
    N <- nobs(object) - 1
  } else {
    N <- nobs(object)
  }
  ll <- -N * (log(sigma_hatsq) + 1 + log(2 * pi)) / 2
  as.logLik(object, ll)
}

as.logLik <- function(object, ll = 0) {
  m <- length(object$tau)
  num_params_per_region <- object |>
    coef() |>
    dplyr::select(dplyr::contains("param_")) |>
    ncol()
  num_model_params <- length(object$model_params)
  
  if (!is.numeric(ll)) {
    warning("Invalid log-likelihood value...returning 0")
    ll <- 0
  }
  attr(ll, "num_params_per_region") <- num_params_per_region
  attr(ll, "num_model_params") <- num_model_params
  attr(ll, "df") <- m + num_params_per_region * (m + 1) + num_model_params
  attr(ll, "nobs") <- nobs(object)
  attr(ll, "tau") <- object$tau
  class(ll) <- "logLik"
  return(ll)
}

#' @rdname reexports
#' @export
fitted.mod_cpt <- function(object, ...) {
  object$fitted_values
}

#' @rdname reexports
#' @export
residuals.mod_cpt <- function(object, ...) {
  object$data - fitted(object)
}

#' Compute model variance
#' @param object A model object implementing [residuals()] and [nobs()]
#' @param ... currently ignored
#' @details
#' Using the generic functions [residuals()] and [nobs()], this function 
#' computes the variance of the residuals. 
#' 
#' Note that unlike [stats::var()], it does not use \eqn{n-1} as the denominator.
#' @returns A `double` vector of length 1
#' @export
model_variance <- function(object, ...) {
  sum(residuals(object)^2) / nobs(object)
}

#' @rdname reexports
#' @export
coef.mod_cpt <- function(object, ...) {
  object$region_params
}

#' @rdname model_name
#' @export
model_name.mod_cpt <- function(object, ...) {
  object$model_name
}


#' @rdname changepoints
#' @export
changepoints.mod_cpt <- function(x, ...) {
  x$tau |>
    as.integer()
}

#' @rdname reexports
#' @export
augment.mod_cpt <- function(x, ...) {
  tau <- changepoints(x)
  tibble::enframe(as.ts(x), name = "index", value = "y") |>
    tsibble::as_tsibble(index = index) |>
    dplyr::mutate(
      region = cut_by_tau(index, pad_tau(tau, nobs(x))),
      .fitted = fitted(x),
      .resid = residuals(x)
    ) |>
    dplyr::group_by(region)
}

#' @rdname reexports
#' @export
tidy.mod_cpt <- function(x, ...) {
  tau <- changepoints(x)
  n <- nobs(x)
  tau_padded <- pad_tau(tau, n)
  
  augment(x) |>
    dplyr::ungroup() |>
    # why is this necessary????
    as.data.frame() |>
    dplyr::group_by(region) |>
    dplyr::summarize(
      num_obs = dplyr::n(),
#      begin = min(y),
#      end = max(y),
      min = min(y, na.rm = TRUE),
      max = max(y, na.rm = TRUE),
      mean = mean(y, na.rm = TRUE),
      sd = stats::sd(y, na.rm = TRUE),
#      ... = ...
    ) |>
    dplyr::mutate(
      begin = utils::head(tau_padded, -1),
      end = utils::tail(tau_padded, -1)
    ) |>
    dplyr::inner_join(coef(x), by = "region")
}

#' @rdname reexports
#' @export
glance.mod_cpt <- function(x, ...) {
  tibble::tibble(
    pkg = "tidychangepoint",
    version = package_version(utils::packageVersion("tidychangepoint")),
    algorithm = x$model_name,
    params = list(x$model_params),
    num_cpts = length(changepoints(x)),
    rmse = sqrt(model_variance(x)),
    logLik = as.double(logLik(x)),
    AIC = AIC(x),
    BIC = BIC(x),
    MBIC = MBIC(x),
    MDL = MDL(x)
  )
}

autoregress_errors <- function(mod, ...) {
  n <- nobs(mod)
  resid <- residuals(mod)
  
  y <- as.ts(mod)
  
  phi_hat <- sum(resid[-1] * resid[-n] ) / sum(resid^2)
  d <- sum((resid[-n] - resid[-1])^2) / sum(resid^2)
  y_hat <- fitted(mod) + c(0, phi_hat * resid[-n])
  sigma_hatsq <- sum((y - y_hat)^2) / n
  
  out <- mod
  out$fitted_values <- y_hat
  out$model_params[["sigma_hatsq"]] <- sigma_hatsq
  out$model_params[["phi_hat"]] <- phi_hat
  out$durbin_watson <- d
  out$model_name <- paste0(out$model_name, "_ar1")
  return(out)
}


#' @rdname reexports
#' @export
#' @examples
#' # Plot a meanshift model fit
#' plot(fit_meanshift_norm(CET, tau = 330))
#' 
#' #' # Plot a trendshift model fit
#' plot(fit_trendshift(CET, tau = 330))
#' 
#' #' # Plot a quadratic polynomial model fit
#' plot(fit_lmshift(CET, tau = 330, deg_poly = 2))
#' 
#' #' # Plot a 4th degree polynomial model fit
#' plot(fit_lmshift(CET, tau = 330, deg_poly = 10))
#' 
plot.mod_cpt <- function(x, ...) {
  regions <- tidy(x)
  m <- length(changepoints(x))
  breaks_default <- scales::extended_breaks(3)(1:nobs(x))
  b <- c(1, changepoints(x), nobs(x))
  if (m == 0) {
    b2 <- c(1, breaks_default, nobs(x)) |>
      sort()
    b <- b2[b2 > 0]
  }
  ggplot2::ggplot(
    data = augment(x), 
    ggplot2::aes(x = index, y = y)
  ) +
    #    ggplot2::geom_rect(
    #      data = regions,
    #      ggplot2::aes(xmin = begin, xmax = end, ymin = 0, ymax = Inf, x = NULL, y = NULL),
    #      fill = "grey90"
    #    ) +
    ggplot2::geom_vline(
      data = tibble::tibble(xintercept = unique(c(1, changepoints(x), nobs(x)))),
      ggplot2::aes(xintercept = xintercept), 
      linetype = 2
    ) +
    ggplot2::geom_hline(yintercept = mean(as.ts(x)), linetype = 3) +
    ggplot2::geom_rug(sides = "l") +
    ggplot2::geom_line() + 
    ggplot2::geom_line(ggplot2::aes(y = .fitted), color = "red") + 
#    ggplot2::geom_segment(
#      data = regions,
#      ggplot2::aes(x = begin, y = mean, xend = end, yend = mean),
#      color = "red"
#    ) +
    ggplot2::geom_segment(
      data = regions,
      ggplot2::aes(x = begin, y = mean + 1.96 * sd, xend = end, yend = mean + 1.96 * sd),
      color = "red",
      linetype = 3
    ) +
    ggplot2::geom_segment(
      data = regions,
      ggplot2::aes(x = begin, y = mean - 1.96 * sd, xend = end, yend = mean - 1.96 * sd),
      color = "red",
      linetype = 3
    ) + 
    ggplot2::scale_x_continuous("Time Index (t)", breaks = b) +
    ggplot2::labs(
#      title = "Original time series",
      subtitle = paste("Global mean value is", round(mean(as.ts(x), na.rm = TRUE), 2))
    )
}


#' @rdname reexports
#' @export
print.mod_cpt <- function(x, ...) {
  utils::str(x)
}

#' @rdname regions
#' @export
#' @examples
#' 
#' cpt <- fit_meanshift_norm(CET, tau = 330)
#' regions(cpt)
#' 
regions.mod_cpt <- function(x, ...) {
  regions_tau(changepoints(x), nobs(x))
}

#' @rdname diagnose
#' @export
#' @examples
#' # For meanshift models, show the distribution of the residuals by region
#' fit_meanshift_norm(CET, tau = 330) |>
#'   diagnose()
diagnose.mod_cpt <- function(x, ...) {
  ggplot2::ggplot(
    data = augment(x), 
    ggplot2::aes(x = region, y = .resid)
  ) +
    ggplot2::geom_hline(yintercept = 0, linetype = 3) +
    ggplot2::geom_violin(alpha = 0.5, ggplot2::aes(fill = region)) +
    ggplot2::geom_rug(sides = "l") +
    ggplot2::scale_x_discrete("Region defined by changepoint set") +
    ggplot2::scale_y_continuous("Residual") + 
    ggplot2::labs(
      title = "Distribution of residuals by region"
    )
}

Try the tidychangepoint package in your browser

Any scripts or data that you put into this service are public.

tidychangepoint documentation built on April 4, 2025, 4:31 a.m.