Nothing
#' @title CFTime class
#'
#' @description This class manages the "time" dimension of netCDF files that
#' follow the CF Metadata Conventions, and its productive use in R.
#'
#' The class has a field `cal` which holds a specific calendar from the
#' allowed types (all named calendars are supported). The calendar is
#' also implemented as a (hidden) class which converts netCDF file encodings to
#' timestamps as character strings, and vice-versa. Bounds information (the
#' period of time over which a timestamp is valid) is used when defined in the
#' netCDF file.
#'
#' Additionally, this class has functions to ease use of the netCDF "time"
#' information when processing data from netCDF files. Filtering and indexing of
#' time values is supported, as is the generation of factors.
#'
#' @export
#' @references
#' https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#time-coordinate
#' @docType class
CFTime <- R6::R6Class("CFTime",
private = list(
# The boundary values of the time instance.
bnds = NULL
),
public = list(
#' @field cal The calendar of this `CFTime` instance, a descendant of the
#' [CFCalendar] class.
cal = NULL,
#' @field offsets A numeric vector of offsets from the origin of the
#' calendar.
offsets = numeric(),
#' @field resolution The average number of time units between offsets.
resolution = NA_real_,
#' @description Create a new instance of this class.
#' @param definition Character string of the units and origin of the
#' calendar.
#' @param calendar Character string of the calendar to use. Must be one of
#' the values permitted by the CF Metadata Conventions. If `NULL`, the
#' "standard" calendar will be used.
#' @param offsets Numeric or character vector, optional. When numeric, a
#' vector of offsets from the origin in the time series. When a character
#' vector of length 2 or more, timestamps in ISO8601 or UDUNITS format.
initialize = function(definition, calendar = "standard", offsets = NULL) {
if (is.null(calendar)) calendar <- "standard" # This may occur when "calendar" attribute is not defined in the NC file
calendar <- tolower(calendar)
self$cal <- switch(calendar,
"standard" = CFCalendarStandard$new(calendar, definition),
"gregorian" = CFCalendarStandard$new(calendar, definition),
"proleptic_gregorian" = CFCalendarProleptic$new(calendar, definition),
"tai" = CFCalendarTAI$new(calendar, definition),
"utc" = CFCalendarUTC$new(calendar, definition),
"julian" = CFCalendarJulian$new(calendar, definition),
"360_day" = CFCalendar360$new(calendar, definition),
"365_day" = CFCalendar365$new(calendar, definition),
"noleap" = CFCalendar365$new(calendar, definition),
"366_day" = CFCalendar366$new(calendar, definition),
"all_leap" = CFCalendar366$new(calendar, definition),
"none" = CFCalendarNone$new(calendar, definition),
stop("Invalid calendar specification", call. = FALSE)
)
if (is.null(offsets)) return()
if (is.numeric(offsets)) {
dim(offsets) <- NULL
stopifnot(.validOffsets(offsets))
if (length(offsets) > 1L) {
self$resolution <- (max(offsets) - min(offsets)) / (length(offsets) - 1L)
if (any(diff(offsets) <= 0))
warning("Offsets not monotonically increasing.", call. = FALSE)
} else {
self$resolution <- NA_real_
}
self$offsets <- as.numeric(offsets)
} else if (is.character(offsets)) {
time <- self$cal$parse(offsets)
if (anyNA(time$year)) stop("Argument `offsets` contains invalid timestamps", call. = FALSE) # nocov
self$offsets <- time$offset
if (length(offsets) > 1L)
self$resolution <- (max(self$offsets) - min(self$offsets)) / (length(self$offsets) - 1L)
else
self$resolution <- NA_real_
if (any(diff(self$offsets) <= 0))
warning("Offsets not monotonically increasing.", call. = FALSE)
} else if (!is.null(offsets)) stop("Invalid offsets for CFTime object", call. = FALSE)
},
#' @description Print a summary of the `CFTime` object to the console.
#' @param ... Ignored.
#' @return `self` invisibly.
print = function(...) {
noff <- length(self$offsets)
if (noff == 0L) {
el <- " Elements: (no elements)\n"
b <- " Bounds : (not set)\n"
} else {
d <- self$range()
if (inherits(self$cal, "CFCalendarNone")) {
el <- if (noff > 1L) {
sprintf(" Elements: Series of %d elements at %s\n", noff, d[1L])
} else
paste(" Element : One element at", d[1L], "\n")
} else {
el <- if (noff > 1L) {
sprintf(" Elements: [%s .. %s] (average of %f %s between %d elements)\n",
d[1L], d[2L], self$resolution, CFt$units$name[self$cal$unit], noff)
} else
paste(" Element :", d[1L], "\n")
}
b <- if (is.null(private$bnds)) " Bounds : not set\n"
else " Bounds : set\n"
}
cal <- capture.output(self$cal$print())
cat(paste(cal, collapse = "\n"), "\nTime series:\n", el, b, sep = "")
invisible(self)
},
#' @description This method returns the first and last timestamp of the time
#' series as a vector. Note that the offsets do not have to be sorted.
#'
#' @param format Value of "date" or "timestamp". Optionally, a
#' character string that specifies an alternate format.
#' @param bounds Logical to indicate if the extremes from the bounds should
#' be used, if set. Defaults to `FALSE`.
#'
#' @return Vector of two character strings that represent the starting and
#' ending timestamps in the time series. If a `format` is supplied, that
#' format will be used. Otherwise, if all of the timestamps in the time
#' series have a time component of `00:00:00` the date of the timestamp is
#' returned, otherwise the full timestamp (without any time zone
#' information).
range = function(format = "", bounds = FALSE) {
if (length(self$offsets) == 0L) return(c(NA_character_, NA_character_))
if (!missing(format) && ((!is.character(format)) || length(format) != 1L))
stop("`format` argument, when present, must be a character string with formatting specifiers", call. = FALSE) # nocov
if (!is.logical(bounds) || length(bounds) != 1L)
stop("`bounds` argument, when present, must be a single logical value", call. = FALSE) # nocov
if (bounds) {
bnds <- private$bnds
if (is.null(bnds)) time <- self$cal$offsets2time(base::range(self$offsets))
else time <- self$cal$offsets2time(c(bnds[1L, 1L], bnds[2L, length(self$offsets)]))
} else time <- self$cal$offsets2time(base::range(self$offsets))
.format_format(time, self$cal$timezone, format)
},
#' @description This method generates a vector of character strings or
#' `POSIXct`s that represent the date and time in a selectable combination
#' for each offset.
#'
#' The character strings use the format `YYYY-MM-DDThh:mm:ss±hhmm`,
#' depending on the `format` specifier. The date in the string is not
#' necessarily compatible with `POSIXt` - in the `360_day` calendar
#' `2017-02-30` is valid and `2017-03-31` is not.
#'
#' For the "proleptic_gregorian" calendar the output can also be generated
#' as a vector of `POSIXct` values by specifying `asPOSIX = TRUE`. The
#' same is possible for the "standard" and "gregorian" calendars but only
#' if all timestamps fall on or after 1582-10-15. If `asPOSIX = TRUE` is
#' specified while the calendar does not support it, an error will be
#' generated.
#'
#' @param format character. A character string with either of the values
#' "date" or "timestamp". If the argument is not specified, the format
#' used is "timestamp" if there is time information, "date" otherwise.
#' @param asPOSIX logical. If `TRUE`, for "standard", "gregorian" and
#' "proleptic_gregorian" calendars the output is a vector of `POSIXct` -
#' for other calendars an error will be thrown. Default value is `FALSE`.
#'
#' @return A character vector where each element represents a moment in
#' time according to the `format` specifier.
as_timestamp = function(format = NULL, asPOSIX = FALSE) {
if (asPOSIX && !self$cal$POSIX_compatible(self$offsets))
stop("Cannot make a POSIX timestamp with this calendar.", call. = FALSE)
if (length(self$offsets) == 0L) return()
time <- self$cal$offsets2time(self$offsets)
if (is.null(format))
format <- ifelse(self$cal$unit < 4L || .has_time(time), "timestamp", "date")
else if (!(format %in% c("date", "timestamp")))
stop("Format specifier not recognized", call. = FALSE) # nocov
if (asPOSIX) {
if (format == "date") ISOdate(time$year, time$month, time$day, 0L)
else ISOdatetime(time$year, time$month, time$day, time$hour, time$minute, time$second, "UTC")
} else .format_format(time, self$cal$timezone, format)
},
#' @description Format timestamps using a specific format string, using the
#' specifiers defined for the [base::strptime()] function, with
#' limitations. The only supported specifiers are `bBdeFhHImMpRSTYz%`.
#' Modifiers `E` and `O` are silently ignored. Other specifiers, including
#' their percent sign, are copied to the output as if they were adorning
#' text.
#'
#' The formatting is largely oblivious to locale. The reason for this is
#' that certain dates in certain calendars are not POSIX-compliant and the
#' system functions necessary for locale information thus do not work
#' consistently. The main exception to this is the (abbreviated) names of
#' months (`bB`), which could be useful for pretty printing in the local
#' language. For separators and other locale-specific adornments, use
#' local knowledge instead of depending on system locale settings; e.g.
#' specify `%m/%d/%Y` instead of `%D`.
#'
#' Week information, including weekday names, is not supported at all as a
#' "week" is not defined for non-standard CF calendars and not generally
#' useful for climate projection data. If you are working with observed
#' data and want to get pretty week formats, use the [as_timestamp()]
#' method to generate `POSIXct` timestamps (observed data generally uses a
#' "standard" calendar) and then use the [base::format()] function which
#' supports the full set of specifiers.
#'
#' @param format A character string with `strptime` format specifiers. If
#' omitted, the most economical format will be used: a full timestamp when
#' time information is available, a date otherwise.
#'
#' @return A vector of character strings with a properly formatted
#' timestamp. Any format specifiers not recognized or supported will be
#' returned verbatim.
format = function(format) {
if (length(self$offsets) == 0L) return(character(0L))
if (!requireNamespace("stringr", quietly = TRUE))
stop("package `stringr` is required - please install it first", call. = FALSE) # nocov
if (missing(format)) format <- ""
else if (!is.character(format) || length(format) != 1L)
stop("`format` argument must be a character string with formatting specifiers", call. = FALSE)
ts <- self$cal$offsets2time(self$offsets)
.format_format(ts, self$cal$timezone, format)
},
#' @description Find the index in the time series for each timestamp given
#' in argument `x`. Values of `x` that are before the earliest value in
#' the time series will be returned as `0`; values of `x` that are after
#' the latest values in the time series will be returned as
#' `.Machine$integer.max`. Alternatively, when `x` is a numeric vector of
#' index values, return the valid indices of the same vector, with the
#' side effect being the attribute "CFTime" associated with the result.
#'
#' Matching also returns index values for timestamps that fall between two
#' elements of the time series - this can lead to surprising results when
#' time series elements are positioned in the middle of an interval (as
#' the CF Metadata Conventions instruct us to "reasonably assume"): a time
#' series of days in January would be encoded in a netCDF file as
#' `c("2024-01-01 12:00:00", "2024-01-02 12:00:00", "2024-01-03 12:00:00", ...)`
#' so `x <- c("2024-01-01", "2024-01-02", "2024-01-03")` would
#' result in `(NA, 1, 2)` (or `(NA, 1.5, 2.5)` with `method = "linear"`)
#' because the date values in `x` are at midnight. This situation is
#' easily avoided by ensuring that this `CFTime` instance has bounds set
#' (use `bounds(y) <- TRUE` as a proximate solution if bounds are not
#' stored in the netCDF file). See the Examples.
#'
#' If bounds are set, the indices are taken from those bounds. Returned
#' indices may fall in between bounds if the latter are not contiguous,
#' with the exception of the extreme values in `x`.
#'
#' Values of `x` that are not valid timestamps according to the calendar
#' of this `CFTime` instance will be returned as `NA`.
#'
#' `x` can also be a numeric vector of index values, in which case the
#' valid values in `x` are returned. If negative values are passed, the
#' positive counterparts will be excluded and then the remainder returned.
#' Positive and negative values may not be mixed. Using a numeric vector
#' has the side effect that the result has the attribute "CFTime"
#' describing the temporal dimension of the slice. If index values outside
#' of the range of `self` are provided, an error will be thrown.
#'
#' @param x Vector of character, POSIXt or Date values to find indices for,
#' or a numeric vector.
#' @param method Single value of "constant" or "linear". If `"constant"` or
#' when bounds are set on `self`, return the index value for each
#' match. If `"linear"`, return the index value with any fractional value.
#'
#' @return A numeric vector giving indices into the "time" dimension of the
#' dataset associated with `self` for the values of `x`. If there is at
#' least 1 valid index, then attribute "CFTime" contains an instance of
#' `CFTime` that describes the dimension of filtering the dataset
#' associated with `self` with the result of this function, excluding any
#' `NA`, `0` and `.Machine$integer.max` values.
indexOf = function(x, method = "constant") {
stopifnot(inherits(x, c("character", "POSIXt", "Date")) || is.numeric(x),
method %in% c("constant", "linear"))
if (is.numeric(x)) {
if (!(all(x < 0, na.rm = TRUE) || all(x > 0, na.rm = TRUE)))
stop("Cannot mix positive and negative index values", call. = FALSE)
intv <- (1:length(self$offsets))[x]
xoff <- self$offsets[x]
} else {
if (self$cal$unit > 4L)
stop("Parsing of timestamps on a 'month' or 'year' time unit is not supported.", call. = FALSE)
xoff <- self$cal$parse(as.character(x))$offset
vals <- self$get_bounds()
vals <- if (is.null(vals)) self$offsets
else c(vals[1L, 1L], vals[2L, ])
intv <- stats::approx(vals, 1L:length(vals), xoff, method = method,
yleft = 0, yright = .Machine$integer.max)$y
intv[which(intv == length(vals))] <- .Machine$integer.max
}
valid <- which(!is.na(intv) & intv > 0 & intv < .Machine$integer.max)
if (any(valid)) {
t <- CFTime$new(self$cal$definition, self$cal$name, xoff[valid])
bnds <- private$bnds
if (!is.null(bnds))
t$set_bounds(bnds[, intv[valid], drop = FALSE])
attr(intv, "CFTime") <- t
}
intv
},
#' @description Return bounds.
#'
#' @param format A string specifying a format for output, optional.
#' @return An array with dims(2, length(offsets)) with values for the
#' bounds. `NULL` if the bounds have not been set.
get_bounds = function(format) {
bnds <- private$bnds
if (is.null(bnds) || missing(format)) return(bnds)
ts <- self$cal$offsets2time(as.vector(bnds))
b <- .format_format(ts, self$cal$timezone, format)
dim(b) <- dim(bnds)
b
},
#' @description Set or delete the bounds of the `CFTime` instance.
#'
#' @param value The bounds to set, in units of the offsets. A matrix `(2,
#' length(self$offsets))`. If `NULL`, the bounds are deleted. If `TRUE`,
#' make regular, consecutive bounds.
#' @return `self` invisibly.
set_bounds = function(value) {
if (is.null(value) || isFALSE(value)) private$bnds <- NULL
else if (isTRUE(value)) {
len <- length(self$offsets)
b <- seq(from = self$offsets[1L] - self$resolution * 0.5,
by = self$resolution,
length.out = len + 1L)
private$bnds <- rbind(b[1L:len], b[2L:(len+1L)])
} else {
off <- self$offsets
len <- length(off)
if (len == 0L)
stop("Cannot set bounds when there are no offsets", call. = FALSE)
if (is.matrix(value) && is.numeric(value)) {
if (!all(dim(value) == c(2L, len)))
stop("Replacement value has incorrect dimensions", call. = FALSE)
} else stop("Replacement value must be a numeric matrix or a single logical value", call. = FALSE)
if (!(all(value[2L,] >= off) && all(off >= value[1L,])))
stop("Values of the replacement value must surround the offset values", call. = FALSE)
private$bnds <- value
}
invisible(self)
},
#' This method returns `TRUE` if the time series has uniformly distributed
#' time steps between the extreme values, `FALSE` otherwise. First test
#' without sorting; this should work for most data sets. If not, only then
#' offsets are sorted. For most data sets that will work but for implied
#' resolutions of month, season, year, etc based on a "days" or finer
#' calendar unit this will fail due to the fact that those coarser units
#' have a variable number of days per time step, in all calendars except for
#' `360_day`. For now, an approximate solution is used that should work in
#' all but the most non-conformal exotic arrangements.
#'
#' @return `TRUE` if all time steps are equidistant, `FALSE` otherwise, or
#' `NA` if no offsets have been set.
equidistant = function() {
if (length(self$offsets) == 0L) return(NA)
out <- all(diff(self$offsets) == self$resolution)
if (!out) {
doff <- diff(sort(self$offsets))
out <- all(doff == self$resolution)
if (!out) {
# Don't try to make sense of totally non-standard arrangements such as
# calendar units "years" or "months" describing sub-daily time steps.
# Also, 360_day calendar should be well-behaved so we don't want to get here.
if (self$cal$unit > 4L || inherits(self$cal, "CFCalendar360")) return(FALSE)
# Check if we have monthly or yearly data on a finer-scale calendar
# This is all rather approximate but should be fine in most cases
# This accommodates middle-of-the-time-period offsets as per the
# CF Metadata Conventions
# Please report problems at https://github.com/pvanlaake/CFtime/issues
ddays <- range(doff) * CFt$units$per_day[self$cal$unit]
return((ddays[1] >= 28 && ddays[2] <= 31) || # months
(ddays[1] >= 8 && ddays[2] <= 11) || # dekads
(ddays[1] >= 90 && ddays[2] <= 92) || # seasons, quarters
(ddays[1] >= 365 && ddays[2] <= 366)) # years
}
}
out
},
#' @description Given a vector of character timestamps, return a logical
#' vector of a length equal to the number of time steps in the time series
#' with values `TRUE` for those time steps that fall between the two
#' extreme values of the vector values, `FALSE` otherwise.
#'
#' @param extremes Character vector of timestamps that represent the
#' time period of interest. The extreme values are selected. Badly
#' formatted timestamps are silently dropped.
#' @param closed Is the right side closed, i.e. included in the result?
#' Default is `FALSE`. A specification of `c("2022-01-01", "2023-01-01)`
#' will thus include all time steps that fall in the year 2022 when
#' `closed = FALSE` but include `2023-01-01` if that exact value is
#' present in the time series.
#' @return A logical vector with a length equal to the number of time steps
#' in `self` with values `TRUE` for those time steps that fall between the
#' extreme values, `FALSE` otherwise.
#'
#' An attribute 'CFTime' will have the same definition as `self` but with
#' offsets corresponding to the time steps falling between the two
#' extremes. If there are no values between the extremes, the attribute is
#' not set.
slice = function(extremes, closed = FALSE) {
if (!is.character(extremes) || length(extremes) < 1L)
stop("Second argument must be a character vector of at least one timestamp.", call. = FALSE) # nocov
off <- self$offsets
roff <- range(off)
ext <- range(self$cal$parse(extremes)$offset, na.rm = TRUE)
if (all(is.na(ext)) || ext[1L] > roff[2L] || ext[2L] < roff[1L])
out <- rep(FALSE, length(off))
else {
if (ext[1L] == ext[2L]) closed <- TRUE
out <- if (closed) off >= ext[1L] & off <= ext[2L]
else off >= ext[1L] & off < ext[2L]
if (any(out)) {
t <- CFTime$new(self$cal$definition, self$cal$name, off[out])
bnds <- private$bnds
if (!is.null(bnds))
t$set_bounds(bnds[, out, drop = FALSE])
attr(out, "CFTime") <- t
} else
out <- rep(FALSE, length(off)) # nocov
}
out
},
#' @description Can the time series be converted to POSIXt?
#' @return `TRUE` if the calendar support coversion to POSIXt, `FALSE`
#' otherwise.
POSIX_compatible = function() {
self$cal$POSIX_compatible(self$offsets)
},
#' @description Create a factor for a `CFTime` instance.
#'
#' When argument `breaks` is one of `"year", "season", "quarter", "month",
#' "dekad", "day"`, a factor is generated like by [CFfactor()]. When
#' `breaks` is a vector of character timestamps a factor is produced with
#' a level for every interval between timestamps. The last timestamp,
#' therefore, is only used to close the interval started by the
#' pen-ultimate timestamp - use a distant timestamp (e.g. `range(x)[2]`)
#' to ensure that all offsets to the end of the CFTime time series are
#' included, if so desired. The last timestamp will become the upper bound
#' in the `CFTime` instance that is returned as an attribute to this
#' function so a sensible value for the last timestamp is advisable.
#'
#' This method works similar to [base::cut.POSIXt()] but there are some
#' differences in the arguments: for `breaks` the set of options is
#' different and no preceding integer is allowed, `labels` are always
#' assigned using values of `breaks`, and the interval is always
#' left-closed.
#'
#' @param breaks A character string of a factor period (see [CFfactor()] for
#' a description), or a character vector of timestamps that conform to the
#' calendar of `x`, with a length of at least 2. Timestamps must be given
#' in ISO8601 format, e.g. "2024-04-10 21:31:43".
#'
#' @return A factor with levels according to the `breaks` argument, with
#' attributes 'period', 'era' and 'CFTime'. When `breaks` is a factor
#' period, attribute 'period' has that value, otherwise it is '"day"'.
#' When `breaks` is a character vector of timestamps, attribute 'CFTime'
#' holds an instance of `CFTime` that has the same definition as `x`, but
#' with (ordered) offsets generated from the `breaks`. Attribute 'era'
#' is always -1.
cut = function(breaks) {
if (missing(breaks) || !is.character(breaks) || (len <- length(breaks)) < 1L)
stop("Argument 'breaks' must be a character vector with at least 1 value", call. = FALSE) # nocov
if(len == 1L) {
breaks <- sub("s$", "", tolower(breaks))
if (breaks %in% CFt$factor_periods)
return(self$factor(breaks))
else stop("Invalid specification of 'breaks'", call. = FALSE) # nocov
}
# breaks is a character vector of multiple timestamps
if (self$cal$unit > 4L) stop("Factorizing on a 'month' or 'year' time unit is not supported", call. = FALSE) # nocov
time <- self$cal$parse(breaks)
if (anyNA(time$year))
stop("Invalid specification of 'breaks'", call. = FALSE) # nocov
sorted <- order(time$offset)
ooff <- time$offset[sorted]
intv <- findInterval(self$offsets, ooff)
intv[which(intv %in% c(0L, len))] <- NA
f <- factor(intv, labels = breaks[sorted][1L:(len-1L)])
# Attributes
bnds <- rbind(ooff[1L:(len-1L)], ooff[2L:len])
off <- bnds[1L, ] + (bnds[2L, ] - bnds[1L, ]) * 0.5
t <- CFTime$new(self$cal$definition, self$cal$name, off)
t$set_bounds(bnds)
attr(f, "period") <- "day"
attr(f, "era") <- -1L
attr(f, "CFTime") <- t
f
},
#' @description Generate a factor for the offsets, or a part thereof. This
#' is specifically interesting for creating factors from the date part of
#' the time series that aggregate the time series into longer time periods
#' (such as month) that can then be used to process daily CF data sets
#' using, for instance, `tapply()`.
#'
#' The factor will respect the calendar that the time series is built on.
#'
#' The factor will be generated in the order of the offsets. While typical
#' CF-compliant data sources use ordered time series there is, however, no
#' guarantee that the factor is ordered. For most processing with a factor
#' the ordering is of no concern.
#'
#' If the `era` parameter is specified, either as a vector of years to
#' include in the factor, or as a list of such vectors, the factor will
#' only consider those values in the time series that fall within the list
#' of years, inclusive of boundary values. Other values in the factor will
#' be set to `NA`.
#'
#' The following periods are supported by this method:
#'
#' \itemize{
#' \item `year`, the year of each offset is returned as "YYYY".
#' \item `season`, the meteorological season of each offset is returned as
#' "Sx", with x being 1-4, preceeded by "YYYY" if no `era` is
#' specified. Note that December dates are labeled as belonging to the
#' subsequent year, so the date "2020-12-01" yields "2021S1". This implies
#' that for standard CMIP files having one or more full years of data the
#' first season will have data for the first two months (January and
#' February), while the final season will have only a single month of data
#' (December).
#' \item `quarter`, the calendar quarter of each offset is returned as "Qx",
#' with x being 1-4, preceeded by "YYYY" if no `era` is specified.
#' \item `month`, the month of each offset is returned as "01" to
#' "12", preceeded by "YYYY-" if no `era` is specified. This is the default
#' period.
#' \item `dekad`, ten-day periods are returned as
#' "Dxx", where xx runs from "01" to "36", preceeded by "YYYY" if no `era`
#' is specified. Each month is subdivided in dekads as follows: 1- days 01 -
#' 10; 2- days 11 - 20; 3- remainder of the month.
#' \item `day`, the month and day of each offset are returned as "MM-DD",
#' preceeded by "YYYY-" if no `era` is specified.
#' }
#'
#' It is not possible to create a factor for a period that is shorter than
#' the temporal resolution of the calendar. As an example, if the calendar
#' has a monthly unit, a dekad or day factor cannot be created.
#'
#' Creating factors for other periods is not supported by this method.
#' Factors based on the timestamp information and not dependent on the
#' calendar can trivially be constructed from the output of the
#' [as_timestamp()] function.
#'
#' Attribute 'CFTime' of the result contains a `CFTime` instance that is
#' valid for the result of applying the factor to a resource that this
#' instance is associated with. In other words, if `CFTime` instance 'At'
#' describes the temporal dimension of resource 'A' and a factor 'Af' is
#' generated from `Af <- At$factor()`, then `Bt <- attr(Af, "CFTime")`
#' describes the temporal dimension of the result of, say, `B <- apply(A,
#' 1:2, tapply, Af, FUN)`. The 'CFTime' attribute contains a
#' [CFClimatology] instance for era factors.
#'
#' @param period character. A character string with one of the values
#' "year", "season", "quarter", "month" (the default), "dekad" or "day".
#' @param era numeric or list, optional. Vector of years for which to
#' construct the factor, or a list whose elements are each a vector of
#' years. The extreme values of the supplied vector will be used. Note
#' that if a single year is specified that the result is valid, but not a
#' climatological statistic. If `era` is not specified, the factor will
#' use the entire time series for the factor.
#' @return If `era` is a single vector or `NULL`, a factor with a length
#' equal to the number of offsets in this instance. If `era` is a list, a
#' list with the same number of elements and names as `era`, each
#' containing a factor. Elements in the factor will be set to `NA` for
#' time series values outside of the range of specified years.
#'
#' The factor, or factors in the list, have attributes 'period', 'era' and
#' 'CFTime'. Attribute 'period' holds the value of the `period` argument.
#' Attribute 'era' indicates the number of years that are included in the
#' era, or -1 if no `era` is provided. Attribute 'CFTime' holds an
#' instance of `CFTime` or `CFClimatology` that has the same definition as
#' this instance, but with offsets corresponding to the mid-point of the
#' factor levels.
factor = function(period = "month", era = NULL) {
if (length(self$offsets) < 10L) stop("Cannot create a factor for very short time series", call. = FALSE) # nocov
period <- tolower(period)
if (!((length(period) == 1L) && (period %in% CFt$factor_periods)))
stop("Period specifier must be a single value of a supported period", call. = FALSE) # nocov
# No fine-grained period factors for coarse source data
timestep <- CFt$units$seconds[self$cal$unit] * self$resolution;
if ((period == "year") && (timestep > 86400 * 366) ||
(period %in% c("season", "quarter")) && (timestep > 86400 * 90) || # Somewhat arbitrary
(period == "month") && (timestep > 86400 * 31) ||
(period == "dekad") && (timestep > 86400) || # Must be constructed from daily or finer data
(period == "day") && (timestep > 86400)) # Must be no longer than a day
stop("Cannot produce a short period factor from source data with long time interval", call. = FALSE) # nocov
time <- self$cal$offsets2time(self$offsets)
months <- c("01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12")
if (is.null(era)) {
# Create the factor for the specified period as well as bounds dates for a
# new CFtime instance for the factor. Lower bounds for the factor level is
# easy, upper bound of last level takes effort.
switch(period,
"year" = {
out <- as.factor(sprintf("%04d", time$year))
l <- levels(out)
dt <- c(paste0(l, "-01-01"), sprintf("%04d-01-01", as.integer(l[nlevels(out)]) + 1L))
},
"season" = {
if (!requireNamespace("stringr"))
stop("Must install package `stringr` to use this functionality.", call. = FALSE) # nocov
out <- as.factor(
ifelse(time$month == 12L, sprintf("%04dS1", time$year + 1L),
sprintf("%04dS%d", time$year, time$month %/% 3L + 1L)))
l <- levels(out)
dt <- ifelse(substr(l, 6L, 6L) == "1", paste0(as.integer(substr(l, 1L, 4L)) - 1L, "-12-01"),
stringr::str_replace_all(l, c("S2" = "-03-01", "S3" = "-06-01", "S4" = "-09-01")))
ll <- l[nlevels(out)]
lp <- as.integer(substr(ll, 6L, 6L))
if (lp == 1L)
dt <- c(dt, sprintf("%04d-03-01", as.integer(substr(ll, 1L, 4L)) + 1L))
else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp * 3L))
},
"quarter" = {
if (!requireNamespace("stringr"))
stop("Must install package `stringr` to use this functionality.", call. = FALSE) # nocov
out <- as.factor(sprintf("%04dQ%d", time$year, (time$month - 1L) %/% 3L + 1L))
l <- levels(out)
dt <- stringr::str_replace_all(l, c("Q1" = "-01-01", "Q2" = "-04-01", "Q3" = "-07-01", "Q4" = "-10-01"))
ll <- l[nlevels(out)]
lp <- as.integer(substr(ll, 6L, 6L))
if (lp == 4L)
dt <- c(dt, sprintf("%04d-01-01", as.integer(substr(ll, 1L, 4L)) + 1L))
else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp * 3L + 1L))
},
"month" = {
out <- as.factor(sprintf("%04d-%s", time$year, months[time$month]))
l <- levels(out)
dt <- paste0(l, "-01")
ll <- l[nlevels(out)]
lp <- as.integer(substr(ll, 6L, 7L))
if (lp == 12L)
dt <- c(dt, sprintf("%04d-01-01", as.integer(substr(ll, 1L, 4L)) + 1L))
else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp + 1L))
},
"dekad" = {
out <- as.factor(sprintf("%04dD%02d", time$year, (time$month - 1L) * 3L + pmin.int((time$day - 1L) %/% 10L + 1L, 3L)))
l <- levels(out)
dk <- as.integer(substr(l, 6L, 7L)) - 1L
dt <- sprintf("%s-%02d-%s", substr(l, 1L, 4L), dk %/% 3L + 1L, c("01", "11", "21")[dk %% 3L + 1L])
ll <- l[nlevels(out)]
lp <- as.integer(substr(ll, 6L, 7L))
yr <- as.integer(substr(ll, 1L, 4L))
if (lp == 36L)
dt <- c(dt, sprintf("%04d-01-01", yr + 1L))
else dt <- c(dt, sprintf("%04d-%02d-%s", yr, lp %/% 3L + 1L, c("01", "11", "21")[lp %% 3L + 1L]))
},
"day" = {
out <- as.factor(sprintf("%04d-%02d-%02d", time$year, time$month, time$day))
final <- levels(out)[nlevels(out)]
yr <- as.integer(substring(final, 1, 4))
mon <- as.integer(substring(final, 6, 7))
day <- as.integer(substring(final, 9, 10))
post <- self$cal$add_day(data.frame(year = yr, month = mon, day = day))
post <- sprintf("%04d-%02d-%02d", post$year, post$month, post$day)
dt <- c(levels(out), post)
}
)
# Convert bounds dates to an array of offsets, find mid-points, create new CFTime instance
off <- self$cal$parse(dt)$offset
off[is.na(off)] <- 0 # This can happen only when the time series starts at or close to the origin, for seasons
noff <- length(off)
bnds <- rbind(off[1L:(noff - 1L)], off[2L:noff])
off <- bnds[1L,] + (bnds[2L,] - bnds[1L,]) * 0.5
new_time <- CFTime$new(self$cal$definition, self$cal$name, off)
new_time$set_bounds(bnds)
# Bind attributes to the factor
attr(out, "era") <- -1L
attr(out, "period") <- period
attr(out, "CFTime") <- new_time
return(out)
}
# Era factor
if (is.numeric(era)) ep <- list(era)
else if ((is.list(era) && all(unlist(lapply(era, is.numeric))))) ep <- era
else stop("When specified, the `era` parameter must be a numeric vector or a list thereof", call. = FALSE)
out <- lapply(ep, function(years) {
years <- as.integer(years)
yrs <- range(years)
mid_yr <- as.integer(yrs[1L] + (yrs[2L] - yrs[1L]) * 0.5)
switch(period,
"year" = {
f <- as.factor(ifelse(time$year %in% years, sprintf("%04d", time$year), NA_character_))
n <- nlevels(f)
if (n > 0L) {
off <- self$cal$parse(paste0(levels(f), "-07-01"))$offset
last <- as.integer(levels(f)[n]) + 1L
bnds <- c(rep(paste0(levels(f)[1L], "-01-01"), n), rep(paste0(last, "-01-01"), n))
}
},
"season" = {
f <- as.factor(ifelse((time$month == 12L) & ((time$year + 1L) %in% years), "S1",
ifelse((time$month < 12L) & (time$year %in% years), sprintf("S%d", time$month %/% 3L + 1L), NA_character_)))
if (nlevels(f) > 0L) {
s <- as.integer(substr(levels(f), 2, 2))
mid_s <- c("-01-16T12", "-04-16", "-07-16T12", "-10-16T12")
off <- self$cal$parse(paste0(mid_yr, mid_s[s]))$offset
bnds <- c(ifelse(s == 1L, sprintf("%d-12-01", yrs[1L] - 1L),
sprintf("%d-%02d-01", yrs[1L], (s - 1L) * 3L)),
sprintf("%d-%02d-01", yrs[2L], s * 3L))
}
},
"quarter" = {
f <- as.factor(ifelse(time$year %in% years, sprintf("Q%d", (time$month - 1L) %/% 3L + 1L), NA_character_))
if (nlevels(f) > 0L) {
q <- as.integer(substr(levels(f), 2, 2))
mid_q <- c("-02-15", "-05-16T12", "-08-16T12", "-11-16")
off <- self$cal$parse(paste0(mid_yr, mid_q[q]))$offset
bnds <- c(sprintf("%d-%02d-01", yrs[1L], (q - 1L) * 3L + 1L),
ifelse(q == 4L, sprintf("%d-01-01", yrs[2L] + 1L),
sprintf("%d-%02d-01", yrs[2L], q * 3L + 1L)))
}
},
"month" = {
f <- as.factor(ifelse(time$year %in% years, months[time$month], NA_character_))
if (nlevels(f) > 0L) {
mons <- as.numeric(levels(f))
mid_mon <- c("-16T12", "-15", "-16T12", "-16", "-16T12", "-16", "-16T12", "-16T12", "-16", "-16T12", "-16", "-16T12")
off <- self$cal$parse(paste0(mid_yr, "-", levels(f), mid_mon[mons]))$offset
bnds <- c(paste0(yrs[1L], "-", levels(f), "-01"),
ifelse(mons == 12L, sprintf("%d-01-01", yrs[2L] + 1L),
sprintf("%d-%02d-01", yrs[2L], mons + 1L)))
}
},
"dekad" = {
f <- as.factor(ifelse(time$year %in% years, sprintf("D%02d", (time$month - 1L) * 3L + pmin.int((time$day - 1L) %/% 10L + 1L, 3L)), NA_character_))
if (nlevels(f) > 0L) {
dek <- as.integer(substr(levels(f), 2, 3))
mon <- (dek - 1L) %/% 3L + 1L
mid_dek <- c("06", "16", "26T12", "06", "16", "25", "06", "16", "26T12", "06", "16", "26",
"06", "16", "26T12", "06", "16", "26", "06", "16", "26T12", "06", "16", "26T12",
"06", "16", "26", "06", "16", "26T12", "06", "16", "26", "06", "16", "26T12")
off <- self$cal$parse(sprintf("%d-%02d-%s", mid_yr, mon, mid_dek[dek]))$offset
day <- c("01", "11", "21")
y2 <- rep(yrs[2L], length(dek))
m2 <- mon
ndx <- which(!(dek %% 3L))
m2[ndx] <- m2[ndx] + 1L
ndx <- which(dek == 36L)
y2[ndx] <- yrs[2L] + 1L
m2[ndx] <- 1L
bnds <- c(sprintf("%d-%02d-%s", yrs[1L], mon, day[(dek - 1L) %% 3L + 1L]),
sprintf("%d-%02d-%s", y2, m2, day[dek %% 3L + 1L]))
}
},
"day" = {
f <- as.factor(ifelse(time$year %in% years, sprintf("%s-%02d", months[time$month], time$day), NA_character_))
if (nlevels(f) > 0L) {
mons <- as.integer(substr(levels(f), 1, 2))
days <- as.integer(substr(levels(f), 4, 5))
off <- self$cal$parse(sprintf("%d-%s-%02dT12", mid_yr, months[mons], days))$offset
# Add a day to the calendar for the final year
y2off1 <- self$cal$parse(paste0(yrs[2L], "-", levels(f)))$offset + CFt$units$per_day[self$cal$unit]
y2off1 <- self$cal$offsets2time(y2off1)
bnds <- c(sprintf("%d-%s-%02d", yrs[1L], months[mons], days),
sprintf("%d-%02d-%02d", y2off1$year, y2off1$month, y2off1$day))
}
}
)
attr(f, "era") <- length(years)
attr(f, "period") <- period
if (nlevels(f) > 0L) {
bnds <- matrix(self$cal$parse(bnds)$offset, nrow = 2L, byrow = TRUE)
attr(f, "CFTime") <- CFClimatology$new(self$cal$definition, self$cal$name, off, bnds)
}
f
})
if (is.numeric(era)) out <- out[[1L]]
else names(out) <- names(era)
out
},
#' @description Given a factor as produced by `CFTime$factor()`, this method
#' will return a numeric vector with the number of time units in each
#' level of the factor.
#'
#' The result of this method is useful to convert between absolute and
#' relative values. Climate change anomalies, for instance, are usually
#' computed by differencing average values between a future period and a
#' baseline period. Going from average values back to absolute values for
#' an aggregate period (which is typical for temperature and
#' precipitation, among other variables) is easily done with the result of
#' this method, without having to consider the specifics of the calendar
#' of the data set.
#'
#' If the factor `f` is for an era (e.g. spanning multiple years and the
#' levels do not indicate the specific year), then the result will
#' indicate the number of time units of the period in a regular single
#' year. In other words, for an era of 2041-2060 and a monthly factor on a
#' standard calendar with a `days` unit, the result will be
#' `c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)`. Leap days are thus
#' only considered for the `366_day` and `all_leap` calendars.
#'
#' Note that this function gives the number of time units in each level of
#' the factor - the actual number of data points in the time series per
#' factor level may be different. Use [CFfactor_coverage()] to determine
#' the actual number of data points or the coverage of data points
#' relative to the factor level.
#'
#' @param f A factor or a list of factors derived from the method
#' `CFTime$factor()`.
#' @return If `f` is a factor, a numeric vector with a length equal to the
#' number of levels in the factor, indicating the number of time units in
#' each level of the factor. If `f` is a list of factors, a list with each
#' element a numeric vector as above.
factor_units = function(f) {
if (is.list(f)) factors <- f else factors <- list(f)
if (!(all(unlist(lapply(factors, function(x) is.factor(x) && is.numeric(attr(x, "era")) &&
attr(x, "period") %in% CFt$factor_periods)))))
stop("Argument `f` must be a factor generated by the method `CFTime$factor()`", call. = FALSE) # nocov
out <- lapply(factors, function(fac) .factor_units(fac, self$cal, CFt$units$per_day[self$cal$unit]))
if (is.factor(f)) out <- out[[1L]]
out
},
#' @description Calculate the number of time elements, or the relative
#' coverage, in each level of a factor generated by `CFTime$factor()`.
#'
#' @param f A factor or a list of factors derived from the method
#' `CFTime$factor()`.
#' @param coverage "absolute" or "relative".
#' @return If `f` is a factor, a numeric vector with a length equal to the
#' number of levels in the factor, indicating the number of units from the
#' time series contained in each level of the factor when
#' `coverage = "absolute"` or the proportion of units present relative to the
#' maximum number when `coverage = "relative"`. If `f` is a list of factors, a
#' list with each element a numeric vector as above.
factor_coverage = function(f, coverage = "absolute") {
if (is.list(f)) factors <- f else factors <- list(f)
if (!(all(unlist(lapply(factors, function(x) is.factor(x) && is.numeric(attr(x, "era")) &&
attr(x, "period") %in% CFt$factor_periods)))))
stop("Argument `f` must be a factor generated by the method `CFTime$factor()`", call. = FALSE) # nocov
if (!(is.character(coverage) && coverage %in% c("absolute", "relative")))
stop("Argument `coverage` must be a character string with a value of 'absolute' or 'relative'", call. = FALSE) # nocov
if (coverage == "relative") {
out <- lapply(factors, function(fac) {
res <- tabulate(fac) / .factor_units(fac, self$cal, CFt$units$per_day[self$cal$unit])
yrs <- attr(fac, "era")
if (yrs > 0) res <- res / yrs
return(res)
})
} else {
out <- lapply(factors, tabulate)
}
if (is.factor(f)) out <- out[[1L]]
out
}
),
active = list(
#' @field unit (read-only) The unit string of the calendar and time series.
unit = function(value) {
if (missing(value))
CFt$units$name[self$cal$unit]
},
#' @field bounds Retrieve or set the bounds for the offsets. On setting, a
#' `matrix` with columns for `offsets` and low values in the first row,
#' high values in the second row. This may be simply `TRUE` to set regular
#' and consecutive bounds.
bounds = function(value) {
if (missing(value))
private$bnds
else
self$set_bounds(value)
},
#' @field friendlyClassName Character string with class name for display
#' purposes.
friendlyClassName = function(value) {
if (missing(value))
"CFTime"
}
)
)
# ==============================================================================
# S3 functions
#' Compact display of a CFTime instance
#' @param object A `CFTime` instance.
#' @param ... Ignored.
#' @export
str.CFTime <- function(object, ...) {
cat(object$friendlyClassName, " with origin [", object$cal$definition,
"] using calendar [", object$cal$name,
"] having ", length(object$offsets), " offset values", sep = "")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.