R/format.timestamps.R

Defines functions RegularizeTimestamps.POSIXt RegularizeTimestamps.Date MATCH.NumericTimestamps RegularizeTimestamps.numeric RegularizeTimestamps.default RegularizeTimestamps IsRegular NoGaps HasDuplicateTimestamps NoDuplicates TimestampInfo

Documented in HasDuplicateTimestamps IsRegular MATCH.NumericTimestamps NoDuplicates NoGaps RegularizeTimestamps RegularizeTimestamps.Date RegularizeTimestamps.default RegularizeTimestamps.numeric RegularizeTimestamps.POSIXt

# Copyright 2019 Steven L. Scott.  All rights reserved.
#
# Copyright 2018 Google LLC. All Rights Reserved.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA

TimestampInfo <- function(response, data = NULL, timestamps = NULL) {
  ## Args:
  ##   response: A vector or matrix.  If the response is a zoo object with
  ##     timestamps then the timestamps will be processed.
  ##   data: an optional data frame, list or environment (or object coercible by
  ##     ‘as.data.frame’ to a data frame) containing the variables in the bsts
  ##     model.  This is only used as source of timestamps if they are not
  ##     provided otherwise.
  ##   timestamps: A vector of user-supplied timestamps (of the same length as
  ##     'response'), or NULL.
  ##
  ## Returns:
  ##   An object of class TimestampInfo, which is a list containing the
  ##   following elements:
  ##
  ##   * timestamps.are.trivial: Logical.  TRUE indicates that 'timestamps' are
  ##     either NULL, or that there are no duplicates and no skipped values.  If
  ##     timestamps are trivial then the response is a simple uninterrupted
  ##     sequence (like you'd expect at time series to be.
  ##
  ##   * number.of.time.points: The number of unique time points contained in
  ##       the data, including any skipped time points containing no
  ##       observations.
  ## 
  ##   * timestamps: the time stamps taken from the original response or zoo
  ##       data frame containing the data.  This might be NULL.  It might also
  ##       contain duplicate values.
  ##
  ##   * regular.timestamps: A regular grid of timestamps containing the
  ##       smallest and largest and largest entries in 'timestamps', with an
  ##       increment equal to the smallest nonzero increment in 'timestamps'.
  ##       This will be NULL if 'timestamps' is NULL.
  ##
  ##   * timestamp.mapping: This is only present if timestamps.are.trivial is
  ##       FALSE.  It is a numeric vector giving the index of the entry in
  ##       'regular.timestamps' to which each observation in 'response' is
  ##       associated.
  if (is.null(timestamps)) {
    if (is.zoo(response)) {
      timestamps <- index(response)
    } else if (!is.null(data) && is.zoo(data)) {
      timestamps <- index(data)
    }
  }
  number.of.observations <-
      if (is.matrix(response)) nrow(response) else length(response)

  if (is.null(timestamps)) {
    ## Handle the trivial case when no timestamps are passed.
    number.of.time.points <- number.of.observations
    stopifnot(number.of.time.points > 0)
    ans <- list(timestamps.are.trivial = TRUE,
                number.of.time.points = as.integer(number.of.time.points),
                timestamps = 1:number.of.observations,
                regular.timestamps = 1:number.of.time.points)
  } else {
    ## If timestamps were passed then process them.
    stopifnot(number.of.observations == length(timestamps))
    regular.timestamps <- RegularizeTimestamps(timestamps)
    ans <- list(timestamps.are.trivial = IsRegular(timestamps),
      number.of.time.points = as.integer(length(regular.timestamps)),
      timestamps = timestamps,
      regular.timestamps = regular.timestamps)
    if (!ans$timestamps.are.trivial) {
      ## A hack to handle numeric timestamps appropriately.
      class(timestamps) <- class(regular.timestamps)
      ans$timestamp.mapping <- as.integer(
        zoo::MATCH(timestamps, regular.timestamps))
    }
    if (length(ans$regular.timestamps) > 2 * length(ans$timestamps)) {
      warning("Expanding the time series to a regular interval resulted ",
        "in very large amounts of missing data.")
    }
  }
  class(ans) <- "TimestampInfo"
  return(ans)
}

NoDuplicates <- function(timestamps) {
  ## Returns TRUE iff the vector of timestamps contains no duplicate values.
  return(length(timestamps) == length(unique(timestamps)))
}

HasDuplicateTimestamps <- function(bsts.object) {
  # Returns TRUE iff the object has nontrivial timestamps and at least one time
  # stamp is associated with more than one observation.
  if (bsts.object$timestamp.info$timestamps.are.trivial) {
    return(FALSE)
  }
  return(!NoDuplicates(bsts.object$timestamps))
}

NoGaps <- function(timestamps) {
  ## Returns TRUE iff there are no gaps in the series of time stamps.  The
  ## deltas between time points do not need to be equally spaced.  A distance
  ## between observations counts as a 'gap' if it is at least twice as large as
  ## the smallest duration between two time points.  If we take "twice"
  ## literally then we are vulnerable to floating point junk, so take a factor
  ## of 1.8 instead of 2.
  unique.timestamps <- unique(sort(timestamps))
  dt <- diff(unique.timestamps)
  min.dt <- min(dt)
  return(all(dt < 1.8 * min.dt))
}

IsRegular <- function(timestamps) {
  ## A sequence of time stamps is regular if it contains no duplicates and no
  ## gaps.
  return(NoDuplicates(timestamps) && NoGaps(timestamps))
}

RegularizeTimestamps <- function(timestamps) {
  ## Args:
  ##   timestamps: A set of timestamps.  This can be NULL, a numeric object
  ##     (presumably time 1, 2, 3), a numeric set of timestamps that came from a
  ##     ts object (like 1945.08333, 1945.127, ...), a Date object, or a POSIXt
  ##     object.
  ##
  ## Returns:
  ##   A vector of unique, regularly spaced time stamps containing the
  ##   (deduplicated) entries in the argument, and maybe more, if there were
  ##   some time steps passed over by the supplied time stamps.
  ##
  ##   Note that 'regularly spaced' time steps do not always have the same time
  ##   gap between them.  Monthly data is the most obvious example, but daily
  ##   POSIXt data passing over a DST boundary is another.
  if (is.null(timestamps) || IsRegular(timestamps)) {
    return(timestamps)
  }
  UseMethod("RegularizeTimestamps")
}

RegularizeTimestamps.default <- function(timestamps) {
  ## Args:
  ##   timestamps: A set of timestamps.  This can be NULL, a numeric object
  ##     (presumably time 1, 2, 3), a Date object, or a POSIXt object.
  ##
  ## Returns:
  ##   A vector of unique, regularly spaced time stamps containing the
  ##   (deduplicated) entries in the argument, and maybe more, if there were
  ##   some time steps passed over by the supplied time stamps.
  unique.timestamps <- sort(unique(timestamps))
  dt <- diff(unique.timestamps)
  dt <- min(dt)
  regular.timestamps <- seq(from = unique.timestamps[1],
                            to = tail(unique.timestamps, 1),
                            by = dt)
  if (!all(timestamps %in% regular.timestamps)) {
    stop("Time stamps must occur at regular intervals.")
  }
  return(regular.timestamps)
}

RegularizeTimestamps.numeric <- function(timestamps) {
  ## If the timestamps are numeric then we need to guard against floating point
  ## garbage.
  unique.timestamps <- sort(unique(timestamps))
  dt <- min(diff(unique.timestamps))
  regular.timestamps <- seq(from = unique.timestamps[1],
                            to = tail(unique.timestamps, 1),
                            by = dt)
  if (!all(signif(timestamps, digits = 8)  %in%
           signif(regular.timestamps, digits = 8))) {
    stop("Time stamps must occur at regular intervals.")
  }
  class(regular.timestamps) <- "NumericTimestamps"
  return(regular.timestamps)
}

MATCH.NumericTimestamps <- function(x, table, nomatch = NA, ...) {
  ## An S3 generic for the MATCH function provided by the zoo package.  Numeric
  ## timestamps match if they agree on 8 significant digits.
  match(signif(x, digits = 8),
        signif(table, digits = 8),
        nomatch = nomatch,
        ...)
}

RegularizeTimestamps.Date <- function(timestamps) {
  unique.timestamps <- sort(unique(timestamps))
  dt <- diff(unique.timestamps)
  min.dt <- min(dt)
  dt.table <- table(dt)
  if (min.dt == 1 || min.dt == 7) {
    ## Daily or weekly data is regular.
    return(RegularizeTimestamps.default(timestamps))
  } else if (min.dt %in% 28:31 &&
             any(dt.table[c("30", "31")] > 0)) {
    ## Monthly data
    regular.timestamps <- seq(from = unique.timestamps[1],
                              to = tail(unique.timestamps, 1),
                              by = "month")
  } else if (min.dt %in% 89:93) {
    regular.timestamps <- seq(from = unique.timestamps[1],
                              to = tail(unique.timestamps, 1),
                              by = "quarter")
  } else if (min.dt == 365) {
    regular.timestamps <- seq(from = unique.timestamps[1],
                              to = tail(unique.timestamps, 1),
                              by = "year")
  } else {
    stop("Could not determine an appropriate time scale.")
  }
  if (!all(timestamps %in% regular.timestamps)) {
    stop("Date time stamps must occur at regular intervals.")
  }
  return(regular.timestamps)
}

RegularizeTimestamps.POSIXt <- function(timestamps) {
  dt <- diff(sort(unique(timestamps)))
  if (attributes(dt)$units == "days") {
    days <- as.POSIXct(RegularizeTimestamps(as.Date(timestamps)))
    offset <- min(timestamps) - min(days)
    times <- as.POSIXlt(days + offset)
    dst <- as.logical(times$isdst)
    if (dst[1]) {
      ## If the first time period was in daylight savings time then all the time
      ## points not in daylight savings time need to be shifted back by an hour.
      times[!dst] <- times[!dst] + as.difftime(1, units = "hours")
    } else {
      ## If the first time period was in standard time then all the time
      ## points in daylight savings time need to be shifted forward by an hour.
      times[dst] <- times[dst] - as.difftime(1, units = "hours")
    }
    return(as.POSIXct(times))
  } else if(attributes(dt)$units == "hours") {
    ## If the difference in times is on the hourly scale, it might be because
    ## we've got daily data with DST effects yielding differences of 23, 24, and
    ## 25 hours.
    dt.table <- table(dt)
    min.dt <- min(dt)
    mode.dt <- as.numeric(names(dt.table))[which.max(dt.table)]
    if (min.dt == 23 && mode.dt == 24) {
      return(seq(from = min(timestamps), to = max(timestamps), by = "DSTday"))
    } else {
      return(RegularizeTimestamps.default(timestamps))
    }
  } else {
    return(RegularizeTimestamps.default(timestamps))
  }
}

Try the bsts package in your browser

Any scripts or data that you put into this service are public.

bsts documentation built on Nov. 10, 2022, 5:53 p.m.