R/regularSeries.R

Defines functions regularSeries

Documented in regularSeries

#'Regular Series
#'
#'Some time-series analyses require data that are uniformly spaced in time.
#'This function will construct a regular series from randomly spaced data
#'using any of several user-definable methods.
#'
#'For \code{regularSeries}, if there is no observation during a period, then that
#'value is set to \code{NA}. If there is one observation, then the value is set
#'to the value of that single observation. The value of \code{which} controls how
#'periods with multiple observations are handled. Three character strings are
#'recognized for selecting a single value: "earliest" selects the earliest
#'observation in the period, "middle" selects the observation closest to the
#'middle of the period, and "latest" selects the latest observation in the
#'period. If \code{which} is not one of these, then it should be the name of a
#'function such as mean or median.\cr
#'
#' @param x a vector of observations that represents a series.
#' @param times a date-like vector corresponding to data.
#' @param period character string that is valid input to the POSIXct method
#'for the function \code{seq} is acceptable, specifying the spacing between 
#'successive periods. For example "year," "month," or "day."
#' @param which a character string indicating the method to use, or the name of
#'a function. See \bold{Details} for options.
#' @param begin the beginning date as POSIXt or as character.
#' @param end the end date as POSIXt or as character.
#' @param k.period the number of units of \code{period} in each period of the
#'output series.
#' @return The function \code{regularSeries} returns a data frame with the
#'following columns:
#'\item{Season}{the season number.}
#'\item{SeasonStartDate}{the starting date of the corresponding season
#'number---the season includes dates greater than or equal to this date.}
#'\item{SeasonEndDate}{the end date of the corresponding season number---the
#'season includes dates strictly less than this date.}
#'\item{Value}{the value from \code{x} for the corresponding season number.}
#'\item{ValueDate}{the date from \code{times} for the corresponding season
#'number if \code{which} was one of "earliest," "middle," or "latest";
#'otherwise missing.}
#' @keywords manip
#' @examples
#'\dontrun{
#'library(smwrData)
#'data(QW05078470)
#'with(QW05078470, regularSeries(P00665, DATES))
#'# there should be no values for season numbers 2, 5, or 10
#'}
#' @export
regularSeries <- function(x, times, period="month", which = "middle",
                          begin, end, k.period=1) {
  ## 
  ## Force times to POSIXlt format
  times <- as.POSIXlt(times)
  tzone <- attr(times, "tzone")[1L] # protect against length 3 in POSIXlt
  if(missing(begin)) {
    begin <- floor_date(min(times, na.rm=TRUE), unit=period)
  } else
    begin <- as.POSIXlt(begin, tz=tzone)
  if(missing(end)) {
    end <- floor_date(max(times, na.rm=TRUE), unit=period)
    ## This construct is needed because there is no way to represent a difftime in units
    ## of a month or a year (differing number of seconds in those periods).
    end <- seq(end, by=period, length=k.period+1)[k.period+1]
  } else
    end <- as.POSIXlt(end, tz=tzone)
  ## Remove the missing values in x from consideration
  drop <- is.na(x)
  if(any(drop)) {
    x <- x[!drop]
    times <- times[!drop]
  }
  ## Create the sequence of 'seasons' and assign each x to one
  cutdates <- seq(begin, end, by = period)
  if(k.period > 1) # trim those not equal to the k.period multiple
    cutdates <- cutdates[(seq(0, by=1, length=length(cutdates)) %% k.period) == 0L]
  seasons <- cut(times, cutdates, start.on.monday = FALSE, labels=FALSE)
  ## Assign a value or NA to each element in the regular series
  nobs <- length(cutdates) - 1L
  ## Some seasons may have more than a single observation, so process as 
  ## instructed by which
  ## Modify the seasons vector so that there is a unique observation in
  ## each season
  if(is.character(which)) {
    ## Select which observation to use
    seasons.dupes <- unique(seasons[duplicated(seasons, incomparables=NA)])
    if(length(seasons.dupes) > 0L) {
      for(i in seasons.dupes) {
        test.obs <- which(seasons == i)
        check.data <- as.double(times[test.obs])
        test.julian <- switch(which,
                              earliest = as.double(cutdates[i]),
                              middle = (as.double(cutdates[i]) + as.double(cutdates[i + 1]))/2. + 0.01,
                              latest = as.double(cutdates[i + 1]))
        ## Here's an easy way to get the minimum of the vector
        obs.midrange <- mahalanobis(as.matrix(check.data), as.matrix(test.julian), as.matrix(1))
        obs.midrange <- obs.midrange == min(obs.midrange)
        ## OK, replace the data
        seasons[test.obs] <- as.double(obs.midrange) * i
      }
    }
    ## Replace NAs with 0
    seasons[is.na(seasons)] <- 0
    ret.data <- x[match(1:nobs, seasons)]
    ret.times <- times[match(1:nobs, seasons)]
  }
  else {
    ## Must be a function like mean
    temp <- tapply(x, seasons, FUN = which)
    ## Protect against NAs in seasons
    temp.ndx <- as.numeric(names(temp))
    temp.sel <- !is.na(temp.ndx)
    ret.data <- rep(NA, nobs)
    ret.data[temp.ndx[temp.sel]] <- temp[temp.sel]
    ret.times <- rep(as.POSIXlt(NA), nobs)
  }
  return(data.frame(Season=seq(along=ret.data), SeasonStartDate=cutdates[-(nobs+1)],
                    SeasonEndDate=cutdates[-1], Value=ret.data, ValueDate=ret.times))
}
USGS-R/smwrBase documentation built on Oct. 18, 2022, 9:55 a.m.