R/dvStat.R

Defines functions dvStat

Documented in dvStat

#'Daily Value Statistics
#'
#'Compute summary statistics for daily values, possibly grouped by
#'years or seasons.
#'
#'@param x the daily value data to be summarized. Missing values are permitted, but
#'some summary statistics could be biased if missing data are present.
#'@param Dates the date for each \code{x}, should be of class "Date." Missing values
#'are not permitted.
#'@param Start the start date for the analysis, can be either a character string or
#'class "Date."
#'@param End the end date for the analysis, can be either a character string or
#'class "Date."
#'@param by the grouping variable, generally the output from some date grouping function
#'like \code{waterYear}, \code{climateYear}, or \code{seasonYear}. The default is to construct
#'a group of the full range of data in calendar years.
#'@param pre any preprocessing function like \code{movingAve}. The function must return
#'a vector as long as the input vector.
#'@param stat the statistical function. Must accept the \code{na.rm} argument and return
#'a single value.
#'@param na.rm remove missiing values option for the function \code{stat}.
#'@param STAID the station identifier for the data.
#'@param \dots any additional arguments for the function \code{pre}.
#'@return A data.frame containing the \code{STAID}, groups generated by
#'\code{by}, the number of non-missing values in the grouop, and the statistic 
#'and first date on which that statistic occurred.
#'@examples
#'
#'\dontrun{
#'# For these examples, print to console
#'.pager <- options("pager")
#'options(pager="console")
#'# See the demo for examples of how to use the dvStat function.
#'demo(package="DVstats")
#'options(.pager)
#'}
#'@export
dvStat <- function(x, Dates, Start=NULL, End=NULL, by=NULL, pre=NULL,
                   stat=min, na.rm=TRUE, STAID="Unknown", ...) {
  ## Coding history:
  ##    2013Apr11 DLLorenz Initial coding.
  ##
  stat.name <- deparse(substitute(stat))
  call <- match.call()
  call <- capture.output(dput(call))
  Dates <- as.Date(Dates) # Force the issue
  ## Limit data if needed
  STAID <- as.character(STAID[1L])
  if(!is.null(Start)) {
    Start <- as.Date(Start)
    sel <- Dates >= Start
    Dates <- Dates[sel]
    x <- x[sel]
    if(!is.null(by))
      by <- by[sel]
  }
  if(!is.null(End)) {
    End <- as.Date(End)
    sel <- Dates <= End
    Dates <- Dates[sel]
    x <- x[sel]
    if(!is.null(by))
      by <- by[sel]
  }
  if(is.null(by)) # Make group
    by <- rep(paste(year(range(Dates)), collapse="-"), length.out=length(x))
  ## Create lists of data and dates
  x.l <- split(x, by)
  d.l <- split(Dates, by)
  x.n <- sapply(x.l, function(x) sum(!is.na(x)))
  ## Preprocess if requested
  if(!is.null(pre))
    x.l <- lapply(x.l, pre, ...)

  ## Compute the stat and dummy vector for date
  ## Supress warnings from min and max (and possibly others)
  ow <- options("warn")
  options(warn=-1)
  x.s <- sapply(x.l, stat, na.rm=na.rm)
  options(ow)
  d.s <- as.Date(rep(NA_real_, length(x.s)), origin="1970-01-01")
  names(d.s) <- names(x.s)
  for(i in names(x.s)) {
    dd <- which(x.s[i] == x.l[[i]])
    if(length(dd))
      d.s[i] <- d.l[[i]][dd[1]]
  }
  retval <- data.frame(STAID=STAID, Group=names(x.l), Nobs=x.n, Stat=x.s, Date=d.s, 
    row.names=as.character(seq(along=x.s)))
  names(retval)[4L] <- stat.name
  comment(retval) <- c("Call:",call)
  retval
}
USGS-R/DVstats documentation built on Oct. 11, 2022, 6:03 a.m.