R/analyzeTS.R
In CoSMoS: Complete Stochastic Modelling Solution

Documented in analyzeTS reportTS simulateTS

#' The Functions analyzeTS, reportTS, and simulateTS
#'
#' Provide a complete set of tools to make time series analysis a piece of cake -
#' \code{analyzeTS} automatically performs seasonal analysis, fits distributions
#' and correlation structures, \code{reportTS} provides visualizations of the fitted
#' distributions and correlation structures, and a table with the values of the fitted
#' parameters and basic descriptive statistics, \code{simulateTS} automatically takes
#' the results of \code{analyzeTS} and generates synthetic ones.
#'
#' In practice, we usually want to simulate a natural process using some sampled time series.
#' To generate a synthetic time series with similar characteristics to the observed values,
#' we have to determine marginal distribution, autocorrelation structure and probability zero
#' for each individual month. This can is done by fitting distributions and autocorrelation
#' structures with \code{analyzeTS}. Result can be checked with \code{reportTS}.
#' Syynthetic time series with the same statistical properties can be produced with
#' \code{simulateTS}.
#'
#' Recomended distributions for variables:
#'  * _precipitation_: ggamma (Generalized Gamma), burr### (Burr type)
#'  * _streamflow_: ggamma (Generalized Gamma), burr### (Burr type)
#'  * _relative humidity_: beta
#'  * _temperature_: norm (Normal distribution)
#'
#' @param TS time series in format - date, value
#' @param season name of the season (e.g. month, week)
#' @param acsID ID of the autocorrelation structure to be fitted
#' @param lag.max max lag for the empirical autocorrelation structure
#' @param aTS analyzed timeseries
#' @param method report method - \code{dist} for distribution fits, \code{acs} for ACS fits and \code{stat} for basic statistical report
#' @param from starting date/time of the simulation
#' @param to end date/time of the simulation
#' @inheritParams N
#' @inheritParams fitDist
#'
#' @rdname analyzeTS
#' @export
#' @import ggplot2 data.table
#'
#' @examples
#'
#' library(CoSMoS)
#'
#' ## Load data included in the package
#' ## (to find out more about the data use ?precip)
#' data('precip')
#' \donttest{
#' ## Fit seasonal ACSs and distributions to the data
#' a <- analyzeTS(precip)
#'
#' reportTS(a, 'dist') ## show seasonal distribution fit
#' reportTS(a, 'acs') ## show seasonal ACS fit
#' reportTS(a, 'stat') ## display basic descriptive statisctics
#'
#' ######################################
#' ## 'duplicate' analyzed time series ##
#' sim <- simulateTS(a)
#'
#' ## plot the result
#' precip[, id := 'observed']
#' sim[, id := 'simulated']
#'
#' dta <- rbind(precip, sim)
#'
#' ggplot(dta) +
#'   geom_line(aes(x = date, y = value)) +
#'   facet_wrap(~id, ncol = 1) +
#'   theme_classic()
#'
#' ################################################
#' ## or simulate timeseries of different length ##
#' sim <- simulateTS(a,
#'                   from = as.POSIXct('1978-12-01 00:00:00'),
#'                   to = as.POSIXct('2008-12-01 00:00:00'))
#'
#' ## and plot the result
#' precip[, id := 'observed']
#' sim[, id := 'simulated']
#'
#' dta <- rbind(precip, sim)
#'
#' ggplot(dta) +
#'   geom_line(aes(x = date, y = value)) +
#'   facet_wrap(~id, ncol = 1) +
#'   theme_classic()
#'}
#' \dontshow{
#' ## test for one month to make it fast
#' precip <- precip[between(date, as.POSIXct('1990-1-01', format('%Y-%m-%d'), tz = 'America/Regina'), as.POSIXct('1990-1-5', format('%Y-%m-%d'), tz = 'America/Regina'))]
#' a <- analyzeTS(precip, opts = list('algorithm' = 'NLOPT_LN_NELDERMEAD',
#'                                    'maxeval' = 10))
#'}
#'
analyzeTS <- function(TS, season = 'month', dist = 'ggamma', acsID = 'weibull', norm = 'N1', n.points = 30, lag.max = 30, constrain = FALSE, opts = NULL){

  if (is.null(opts)) {

    opts <- formals(fitDist)$opts
  }

  ea <- seasonalACF(TS, ## calculate seasonal empirical ACS
                    season = season,
                    lag.max = lag.max)

  a <- lapply(ea, function(x) fitACS(x, ## fit empirical ACS by ID
                                     acsID))

  x <- stratifySeasonData(TS, season) ## split data to seasons

  f <- lapply(x[[2]], function(x) fitDist(x$value, ## fit the seasonal data
                                          dist,
                                          norm = norm,
                                          n.points = n.points,
                                          constrain = constrain,
                                          opts = opts))

  structure(.Data = list(data = x, ## send the result out
                         dfits = f,
                         afits = a),
            season = season,
            dist = dist,
            acsID = acsID,
            date = TS[, 'date'])
}

#' @rdname analyzeTS
#' @export
reportTS <- function(aTS, method = 'dist') {

  dist <- attr(aTS, 'dist')
  acsID <- attr(aTS, 'acsID')
  season <- attr(aTS, 'season')

  if((method == 'stat')) { # | (method == 'all')) {

    nz <- aTS$data[[2]]

    dp <- as.data.frame(round(t(sapply(aTS$dfits, function(x) do.call(rbind, x))), 3))
    names(dp) <- getDistArg(dist)

    ap <- as.data.frame(round(t(sapply(aTS$afits, function(x) do.call(rbind, x))), 3))
    names(ap) <- getACSArg(acsID)

    laux <- t(sapply(nz, function(x) {

      lmom(x$value)
    }))

    l <- round(data.frame(l.var = laux[, 1]/laux[, 2],
                          l.skew = laux[, 3],
                          l.kurt = laux[, 4]), 2)

    s <- t(round(sapply(nz, function(x) {

      c(mean = mean(x$value, na.rm = T),
        sd = sd(x$value, na.rm = T),
        min = min(x$value, na.rm = T),
        q = quantile(x$value, na.rm = T, probs = .05),
        q = quantile(x$value, na.rm = T, probs = .25),
        q = quantile(x$value, na.rm = T, probs = .5),
        q = quantile(x$value, na.rm = T, probs = .75),
        q = quantile(x$value, na.rm = T, probs = .95),
        max = max(x$value, na.rm = T),
        skew = sample.moments(x$value,
                              raw = F,
                              central = F,
                              coef = T)$coefficients[2])
    }), 2))

    err <- round(sapply(aTS$dfits, function(x) attr(x, 'nfo')$objective), 4)

    p0 <- 1 - round(sapply(nz, dim)[1,]/sapply(aTS$data[[1]], dim)[1,], 2)

    a <- round(as.data.frame(t(sapply(aTS$afits, function(x) {attr(x, 'eACS')[2:5]}))), 2)
    names(a) <- paste0('acs.l.', 2:5)

    stat <- cbind(dist = dist, dp,
                  error = err,
                  acsID = acsID, ap,
                  s,
                  l,
                  p0,
                  a)

    rownames(stat) <- paste(season, 1:dim(stat)[1], sep = '_')

    out <- stat
  }

  if((method == 'dist')) { # | (method == 'all')) {

    f <- aTS$dfits

    CDF <- lapply(f, function(x) {

      edf <- attr(x, 'edf')
      dist <- attr(x, 'dist')

      mm <- do.call(paste0('p', dist),
                    c(list(q = range(edf$value)),
                      x))
      p <- exp(seq(ifelse(!is.finite(log(mm[1])),
                          .00001,
                          log(mm[1])),
                   log(mm[2]),
                   length.out = 10000))
      cdf <- data.frame(p = p,
                        value = do.call(paste0('q', dist),
                                        c(list(p = p), x)))
      cdf
    })

    names(CDF) <- paste(season,
                        seq_along(CDF),
                        sep = '_')

    sea <- NULL
    cdf <- rbindlist(CDF,
                     idcol = 'sea')
    cdf[, sea := factor(sea,
                        levels = paste(season,
                                       seq_along(CDF),
                                       sep = '_'))]

    EDF <- lapply(f, function(x) {

      attr(x, 'edf')
    })

    names(EDF) <- paste(season,
                        seq_along(EDF),
                        sep = '_')

    edf <- rbindlist(EDF,
                     idcol = 'sea')
    edf[, sea := factor(sea,
                        levels = paste(season,
                                       seq_along(EDF),
                                       sep = '_'))]

    df <- ggplot() +
      geom_point(data = edf,
                 aes(x = edf$value,
                     y = log(1 - edf$p),
                     shape = factor(2)),
                 colour = 'royalblue4',
                 alpha = .5) +
      scale_shape_manual(values = 19,
                         label = 'Empirical',
                         name = NULL) +
      geom_line(data = cdf,
                aes(x = cdf$value,
                    y = log(1 - cdf$p),
                    linetype = factor(1)),
                colour = 'red4',
                lwd = .5,
                alpha = .75) +
      scale_linetype_manual(values = 1,
                            label = 'Fitted',
                            name = NULL) +
      scale_y_continuous(breaks = seq(-10,
                                      0,
                                      length.out = 5),
                         labels = format(exp(seq(log(.0001),
                                                 log(1),
                                                 length.out = 5)),
                                         scientific = TRUE)) +
      labs(x = 'Nonzero values',
           y = 'Exceedence probability',
           title = 'Probability distribution fit') +
      theme_gray() +
      facet_wrap(~sea,
                 scales = 'free',
                 nrow = 4) +
      theme(legend.position = 'bottom',
            strip.background = element_rect(fill = 'grey5'),
            strip.text = element_text(colour = 'grey95'))

    out <- df
  }

  if((method == 'acs')) { # | (method == 'all')) {

    a <- aTS$afits

    ACS <- lapply(a, function(x) {

      eACS <- attr(x, 'eACS')
      lag <- 0:(length(eACS) - 1)
      id <- attr(x, 'ID')

      ACS <-   ACS <- do.call(acs,
                              c(list(id = id,
                                     t = lag), x))
      ac <- data.frame(lag = lag,
                       ACS = ACS,
                       eACS = eACS)
      ac
    })

    names(ACS) <- paste(season,
                        seq_along(ACS),
                        sep = '_')

    sea <- NULL
    acs <- rbindlist(ACS,
                     idcol = 'sea')
    acs[, sea := factor(sea,
                        levels = paste(season,
                                       seq_along(ACS),
                                       sep = '_'))]

    ac <- ggplot(acs) +
      geom_point(aes(x = as.factor(acs$lag),
                     y = acs$eACS,
                     shape = factor(2)),
                 colour = 'royalblue4',
                 alpha = .5) +
      scale_shape_manual(values = 19,
                         label = 'Empirical',
                         name = NULL) +
      geom_line(aes(x = acs$lag + 1,

                    y = acs$ACS,
                    linetype = factor(1)),
                colour = 'red4',
                lwd = .5,
                alpha = .75) +
      scale_linetype_manual(values = 1,
                            label = 'Fitted',
                            name = NULL) +
      labs(x = bquote(lag ~ tau),
           y = 'Autocorrelation',
           title = 'Autocorrelation structure fit') +
      theme_grey() +
      facet_wrap(~sea,
                 scales = 'free',
                 nrow = 4) +
      theme(legend.position = 'bottom',
            strip.background = element_rect(fill = 'grey5'),
            strip.text = element_text(colour = 'grey95'))

    out <- ac
  }

  return(out)
}

#' @rdname analyzeTS
#' @export
simulateTS <- function(aTS, from = NULL, to = NULL) {

  dist <- attr(aTS, 'dist') ## get necesary info from attributes
  acsID <- attr(aTS, 'acsID')
  season <- attr(aTS, 'season')
  date <- data.table(attr(aTS, 'date'))

  x <- aTS$data

  r <- reportTS(aTS, method = 'stat')

  f <- aTS$dfits
  a <- aTS$afits

  ACS <- list()

  #################
  if (dist == 'beta') {

    distbounds = c(0, 1)
  } else {

    distbounds = c(-Inf, Inf)
  }
  ################

  for (i in seq_along(x[[1]])) {

    p <- actpnts(margdist = attr(f[[i]], 'dist'), ## caculate acti points for each season
                 margarg = f[[i]],
                 p0 = r[i, 'p0'],
                 distbounds = distbounds)
    fp <- fitactf(p) ## fit acti points
    # plot(fp)

    lag <- 0:(length(attr(a[[i]], 'eACS')) - 1) ## get correct lag
    id <- attr(a[[i]], 'ID')

    as <- do.call(acs, c(list(id = id, t = lag), a[[i]])) ## get ACS
    ACS[[i]] <- actf(as, fp$actfcoef[1], fp$actfcoef[2]) ## transform ACS
  }

  names(ACS) <- names(a)

  p0 <- uval <- gauss <- value <- . <- season_id <- rn <- NULL ## global variable check

  if (is.null(from)) {

    from <- date[1, date]
  }

  if (is.null(to)) {

    to <- date[.N, date]
  }

  # by <- (strsplit(format(difftime(date[2, date], ## time seq step
  #                                 date[1, date])), ' ')[[1]][2])
  #
  by <- difftime(date[2, date], ## time seq step
                 date[1, date])

  gausian <- seasonalAR(x = seq(from = from, ## generate seasonal gaussian process
                                to = to,
                                by = by),
                        ACS = ACS)

  setkey(x = gausian,
         season)

  para <- as.data.table(

    x = t(sapply(f, function(x) {
      as.matrix(x = do.call(what = cbind, x))
    }))
  ) ## get distribution pars
  names(para) <- names(f[[1]])
  para[, season := as.numeric(gsub('data_nz_', '', rownames(para)))]
  para[, p0 := r[, 'p0']]
  setkey(para, season)

  aux <- merge(gausian, para, all.x = T) ## merge gaussian process with parameters
  aux <- aux[order(date)]
  aux[, uval := (pnorm(q = gauss) - p0)/(1 - p0)] ## calculate intermitent process
  aux[uval < 0, uval := 0]

  d <- getDistArg(dist)

  # l <- as.data.table(x = r[, d],
  #                    keep.rownames = TRUE) ## get dist parameters for each season
  #
  # l[, 'season_id' := as.numeric(
  #   x = sapply(X = strsplit(x = rn,
  #                           split = '_'),
  #              FUN = '[[', 2)
  # )] ## make season_id variable


  for(i in para[, season]) {

    trans.para <- para[season == i,
                       !c('p0', 'season')] ## select correct pars

    aux[season == i, value :=  do.call(what = paste0('q', dist),
                                       args = c(list(p = uval),
                                                as.list(trans.para)))] ## transform correct season gausian process
  }

  out <- aux[, .(date, value)] ## select date amd value

  return(out) ## send it out
}