R/utils.R

Defines functions implements_all_methods get_all_methods is_segmenter is_model vec_cast.logLik.logLik vec_ptype2.logLik.logLik as_year deg_free test_set split_by_tau cut_by_tau time2tau tau2time tau2binary binary2tau validate_tau regions_tau is_valid_tau unpad_tau pad_tau

Documented in as_year binary2tau cut_by_tau deg_free is_model is_segmenter is_valid_tau pad_tau regions_tau split_by_tau tau2binary tau2time test_set time2tau unpad_tau validate_tau vec_cast.logLik.logLik vec_ptype2.logLik.logLik

#' Pad and unpad changepoint sets with boundary points
#' @param tau a numeric vector of changepoint indices
#' @param n the length of the original time series
#' @details
#' If a time series contains \eqn{n} observations, we label them from 1 
#' to \eqn{n}. 
#' Neither the 1st point nor the \eqn{n}th point can be a changepoint, since the
#' regions they create on one side would be empty. 
#' However, for dividing the time series into non-empty segments, we start with 
#' 1, add \eqn{n+1}, and then divide the half-open interval \eqn{[1, n+1)} into
#' half-open subintervals that define the regions. 
#' 
#' [pad_tau()] ensures that 1 and \eqn{n+1} are included. 
#' 
#' [unpad_tau()] removes 1 and \eqn{n+1}, should they exist.
#' 
#' [is_valid_tau()] checks to see if the supplied set of changepoints is valid
#' 
#' [validate_tau()] removes duplicates and boundary values.
#' 
#' @returns 
#'   - [pad_tau()]: an `integer` vector that starts with 0 and ends in \eqn{n}.
#' @export
pad_tau <- function(tau, n) {
  if (!is_valid_tau(tau, n)) {
    tau <- tau[tau >= 1 & tau <= n]
  }
  unique(c(1, tau, n + 1))
}

#' @rdname pad_tau
#' @param padded_tau Output from [pad_tau()]
#' @returns 
#'   - [unpad_tau()]: an `integer` vector stripped of its first and last entries.
#' @export
unpad_tau <- function(padded_tau) {
  padded_tau |>
    utils::head(-1) |>
    utils::tail(-1)
}

#' @rdname pad_tau
#' @export
#' @returns 
#'   - [is_valid_tau()]: a `logical` if all of the entries are between 2 and 
#'   \eqn{n-1}.
#' @examples
#' # Anything less than 2 is not allowed
#' is_valid_tau(0, length(DataCPSim))
#' is_valid_tau(1, length(DataCPSim))
#' 
#' # Duplicates are allowed
#' is_valid_tau(c(42, 42), length(DataCPSim))
#' is_valid_tau(826, length(DataCPSim))
#' 
#' # Anything greater than \eqn{n} (in this case 1096) is not allowed
#' is_valid_tau(1096, length(DataCPSim))
#' is_valid_tau(1097, length(DataCPSim))
#' 
is_valid_tau <- function(tau, n) {
  # the first and last points cannot be changepoints!
  all(tau %in% 2:(n-1))
}

#' @rdname pad_tau
#' @export
#' @returns 
#'   - [regions_tau()]: A [base::factor()]
#' @examples
#' # Always return a factor with half-open intervals on the right
#' regions_tau(c(42, 330), 1096)
regions_tau <- function(tau, n) {
  paste0("[", c(1, tau), ",", c(tau, n + 1), ")") |>
    factor()
}

#' @rdname pad_tau
#' @export
#' @returns 
#'   - [validate_tau()]: an `integer` vector with only the [base::unique()] 
#'   entries between 2 and \eqn{n-1}, inclusive.  
#' @examples
#' # Anything less than 2 is not allowed
#' validate_tau(0, length(DataCPSim))
#' validate_tau(1, length(DataCPSim))
#' validate_tau(826, length(DataCPSim))
#' 
#' # Duplicates are removed
#' validate_tau(c(826, 826), length(DataCPSim))
#' 
#' # Anything greater than \eqn{n} (in this case 1096) is not allowed
#' validate_tau(1096, length(DataCPSim))
#' validate_tau(1097, length(DataCPSim))
#' 
#' # Fix many problems
#' validate_tau(c(-4, 0, 1, 4, 5, 5, 824, 1096, 1097, 182384), length(DataCPSim))
#' 
validate_tau <- function(tau, n) {
  # the first and last points cannot be changepoints!
  tau[tau %in% 2:(n-1)] |>
    unique()
}


#' Convert changepoint sets to binary strings
#' @param x A binary string that encodes a changepoint set. See 
#' [GA::gabin_Population()].
#' @details
#' 
#' In order to use [GA::ga()] in a genetic algorithm, we need to encoude a 
#' changepoint set as a binary string. 
#' 
#' [binary2tau()] takes a binary string representation of a changepoint set and
#' converts it into a set of changepoint indices. 
#' 
#' @returns
#'   - [binary2tau()]: an `integer` vector
#'   
#' @export
#' @examples
#' # Recover changepoint set indices from binary strings
#' binary2tau(c(0, 0, 1, 0, 1))
#' binary2tau(round(runif(10)))
#' 
binary2tau <- function(x) {
  # tau.vec <- loc.ind * (1:N) # convert binary to CPT location
  which(x == 1)
}

#' @rdname binary2tau
#' @inheritParams pad_tau
#' @details
#' 
#' [tau2binary()] takes a set of changepoint indices the number of observations
#' in the time series and converts them into a binary string representation of
#' that changepoint set. 
#' 
#' @returns
#'   - [tau2binary()]: an `integer` vector of length `n`
#' 
#' @export
#' @examples
#' # Recover binary strings from changepoint set indices
#' tau2binary(c(7, 17), n = 24)
#' tau2binary(binary2tau(c(0, 0, 1, 1, 0, 1)), n = 6)
#' 
tau2binary <- function(tau, n) {
  out <- rep(0, times = n)
  out[tau] <- 1
  out
}

#' Convert changepoint sets to time indices
#' @inheritParams tau2binary
#' @param index Index of times, typically returned by [stats::time()]
#' @seealso [stats::time()], [as_year()]
#' @export
#' @returns
#'   - [tau2time()]: a `character` of time labels
#' @examples
#' # Recover the years from a set of changepoint indices
#' tau2time(c(42, 81, 330), index = as_year(time(CET)))
#' 
tau2time <- function(tau, index) {
  index[tau]
}

#' @rdname tau2time
#' @param cpts Time series observation labels to be converted to indices
#' @export
#' @returns
#'   - [time2tau()]: an `integer` vector of changepoint indices
#' @examples
#' # Recover the changepoint set indices from the years
#' time2tau(c(1700, 1739, 1988), index = as_year(time(CET)))
#' 
time2tau <- function(cpts, index) {
  match(cpts, index)
}

#' Use a changepoint set to break a time series into regions
#' @param x A numeric vector
#' @inheritParams pad_tau
#' @details
#' A changepoint set `tau` of length \eqn{k} breaks a time series of length 
#' \eqn{n} into \eqn{k+1} non-empty regions.
#' These non-empty regions can be defined by half-open intervals, starting with
#' 1 and ending with \eqn{n+1}. 
#' 
#' [cut_by_tau()] splits a set of indices into a [base::factor()] of 
#' half-open intervals
#' 
#' @returns 
#'   - [cut_by_tau()] a [base::factor()] of half-open intervals
#' 
#' @export
#' @seealso [base::cut()]
#' @examples
#' n <- length(CET)
#' 
#' # Return a factor of intervals
#' cut_by_tau(1:n, tau = pad_tau(c(42, 81, 330), n))
#' 
cut_by_tau <- function(x, tau) {
  cut(x, breaks = tau, include.lowest = FALSE, right = FALSE)
}

#' @rdname cut_by_tau
#' @details
#' [split_by_tau()] splits a time series into a named [base::list()] of numeric
#' vectors
#' @returns 
#'   - [split_by_tau()] a named [base::list()] of numeric
#' vectors
#' 
#' @seealso [base::split()]
#' @export
#' @examples
#' # Return a list of observations
#' split_by_tau(DataCPSim, c(365, 826))
#' 
split_by_tau <- function(x, tau) {
  tau <- validate_tau(tau, n = length(x))
  idx <- cut_by_tau(1:length(x), pad_tau(tau, length(x)))
  split(x, idx)
}

#' Simulate time series with known changepoint sets
#' @param n Number of true changepoints in set
#' @param sd Standard deviation passed to [stats::rnorm()]
#' @param seed Value passed to [base::set.seed()]
#' @export
#' @returns A [stats::ts()] object
#' @seealso [DataCPSim]
#' @examples
#' x <- test_set()
#' plot(x)
#' changepoints(x)
test_set <- function(n = 1, sd = 1, seed = NULL) {
  if (!is.null(seed)) {
    set.seed(seed)
  }
  num_obs <- 1000
  tau <- sample.int(n = num_obs, size = n) |>
    sort()
  means <- sample.int(n = 100, size = n + 1)
  
  region_lengths <- tau |>
    pad_tau(num_obs) |>
    diff()
  
  out <- purrr::map2(region_lengths, means, ~rnorm(.x, mean = .y, sd = sd)) |>
    c(recursive = TRUE) |>
    stats::as.ts()
  attr(out, "cpt_true") <- tau
  return(out)
}

#' Retrieve the degrees of freedom from a `logLik` object
#' @param x An object that implements a method for [stats::logLik()].
#' @returns The `df` attribute of the [stats::logLik()] of the given object.
#' @export
#' @examples
#' # Retrieve the degrees of freedom model a changepoint model
#' DataCPSim |>
#'   segment() |>
#'   as.model() |>
#'   deg_free()
#'   
deg_free <- function(x) {
  attr(logLik(x), "df")
}

#' Convert a date into a year
#' @param x an object coercible into a [base::Date]. See [base::as.Date()].
#' @export
#' @returns A `character` vector representing the years of the input
#' @examples
#' # Retrieve only the year
#' as_year("1988-01-01")
#' 

as_year <- function(x) {
  x |> 
    as.Date() |>
    format("%Y")
}

#' Vectors implementation for logLik
#' 
#' @export
#' @inheritParams vctrs::vec_ptype2
#' @seealso [stats::logLik()]
#' @keywords internal
#' @returns A [stats::logLik()] vector.
#' @examples
#' a <- logLik(lm(mpg ~ disp, data = mtcars))
#' b <- logLik(lm(mpg ~ am, data = mtcars))
#' vec_ptype2(a, b)
#' c(a, b)
#' vec_cast(a, b)
vec_ptype2.logLik.logLik <- function(x, y, ...) {
  x
}

#' @rdname vec_ptype2.logLik.logLik
#' @inheritParams vctrs::vec_cast
#' @export
vec_cast.logLik.logLik <- function(x, to, ...) {
  x
}

#' @rdname as.model
#' @param x An object, typically returned by `fit_*()`
#' @export
#' @details
#' [is_model()] checks to see if a model object implements all of the 
#' S3 methods necessary to be considered a model. 
#' @return 
#'   - [is_model()] a `logical` vector of length 1
#' @examples
#' 
#' # Fit a model directly, without using [segment()]
#' x <- fit_nhpp(CET, tau = 330)
#' is_model(x)
is_model <- function(x, ...) {
  req <- c(common, mods_only)
  implements_all_methods(x, req)
}

#' @rdname as.segmenter
#' @export
#' @details
#' [is_segmenter()] checks to see if a segmenter object implements all of the 
#' S3 methods necessary to be considered a segmenter. 
#' @return 
#'   - [is_segmenter()] a `logical` vector of length 1
#' @examples
#' # Segment a time series using PELT
#' x <- segment(CET, method = "pelt")
#' 
#' # Return the segmenter component
#' x |>
#'   as.segmenter()
#'   
#' # Note the class of this object could be anything
#' x |>
#'   as.segmenter() |>
#'   class()
#'   
#' # Convert the segmenter into the standardized seg_cpt class
#' x |>
#'   as.segmenter() |>
#'   as.seg_cpt()
#' 
#' # Is the segmenter valid?
#' x |>
#'   as.segmenter() |>
#'   is_segmenter()
is_segmenter <- function(object, ...) {
  req <- c(common, segs_only)
  implements_all_methods(object, req)
}

get_all_methods <- function(object) {
  if (isS4(object)) {
    classes <- object |>
      class() |>
      methods::extends()
  } else {
    classes <- object |>
      class() 
  }
  classes |>
    purrr::map(~methods(class = .x)) |>
    purrr::map(attr, "info") |>
    purrr::list_rbind() |>
    dplyr::filter(!isS4) |>
    dplyr::pull("generic") |>
    unique()
}

implements_all_methods <- function(object, required_methods, ...) {
  available <- object |>
    get_all_methods()
  
  missing <- setdiff(required_methods, available)
  
  if (length(missing) > 0) {
    message(paste("No methods for:"), missing)
    return(FALSE)
  } else {
    return(TRUE)
  }
}

common <- c("as.ts", "changepoints", "model_name", "nobs")
segs_only <- c("fitness", "model_args", "seg_params")
mods_only <- c("augment", "coef", "fitted", "glance", "logLik", "plot", "residuals", "tidy")

Try the tidychangepoint package in your browser

Any scripts or data that you put into this service are public.

tidychangepoint documentation built on April 4, 2025, 4:31 a.m.