#' The Functions analyzeTS, reportTS, and simulateTS
#'
#' Provide a complete set of tools to make time series analysis a piece of cake - analyzeTS() automatically performs seasonal analysis, fits distributions and correlation structures, reportTS provides visualizations of the fitted distributions and correlation structures, and a table with the values of the fitted parameters and basic descriptive statistics, simulateTS automatically takes the results of the analyseTS and generates syntetic ones.
#'
#' In practice, we usually want to simulate a natural process using some sampled time series. To generate a synthetic time series with similar characteristics to the observed values, we have to determine marginal distribution, autocorrelation structure and probability zero for each individual month. This can is done by fitting distributions and autocorrelation structures with analyzeTS(). Result can be checked with reportTS(). Syynthetic time series with the same statistical properties can be produced with simulateTS().
#'
#' Recomended distributions for variables:
#' * _precipitation_: ggamma (Generalized Gamma), burr### (Burr type)
#' * _streamflow_: ggamma (Generalized Gamma), burr### (Burr type)
#' * _relative humidity_: beta
#' * _temperature_: norm (Normal distribution)
#'
#' @param TS time series in format - date, value
#' @param season name of the season (e.g. month, week)
#' @param acsID ID of the autocorrelation structure to be fitted
#' @param lag.max max lag for the empirical autocorrelation structure
#' @param aTS analyzed timeseries
#' @param method report method - 'dist' for distribution fits, 'acs' for ACS fits and 'stat' for basic statistical report
#' @param from starting date/time of the simulation
#' @param to end date/time of the simulation
#' @inheritParams N
#' @inheritParams fitDist
#'
#' @rdname analyzeTS
#' @export
#' @import ggplot2 data.table
#'
#' @examples
#'
#' library(CoSMoS)
#'
#' ## Load data included in the package
#' ## (to find out more about the data use ?precip)
#' data('precip')
#' \donttest{
#' ## Fit seasonal ACSs and distributions to the data
#' a <- analyzeTS(precip)
#'
#' reportTS(a, 'dist') ## show seasonal distribution fit
#' reportTS(a, 'acs') ## show seasonal ACS fit
#' reportTS(a, 'stat') ## display basic descriptive statisctics
#'
#' ######################################
#' ## 'duplicate' analyzed time series ##
#' sim <- simulateTS(a)
#'
#' ## plot the result
#' precip[, id := 'observed']
#' sim[, id := 'simulated']
#'
#' dta <- rbind(precip, sim)
#'
#' ggplot(dta) +
#' geom_line(aes(x = date, y = value)) +
#' facet_wrap(~id, ncol = 1) +
#' theme_classic()
#'
#' ################################################
#' ## or simulate timeseries of different length ##
#' sim <- simulateTS(a,
#' from = as.POSIXct('1978-12-01 00:00:00'),
#' to = as.POSIXct('2008-12-01 00:00:00'))
#'
#' ## and plot the result
#' precip[, id := 'observed']
#' sim[, id := 'simulated']
#'
#' dta <- rbind(precip, sim)
#'
#' ggplot(dta) +
#' geom_line(aes(x = date, y = value)) +
#' facet_wrap(~id, ncol = 1) +
#' theme_classic()
#'}
#' \dontshow{
#' ## test for one month to make it fast
#' precip <- precip[between(date, as.POSIXct('1990-1-01', format('%Y-%m-%d'), tz = 'America/Regina'), as.POSIXct('1990-1-5', format('%Y-%m-%d'), tz = 'America/Regina'))]
#' a <- analyzeTS(precip, opts = list('algorithm' = 'NLOPT_LN_NELDERMEAD',
#' 'maxeval' = 10))
#'}
#'
analyzeTS <- function(TS, season = 'month', dist = 'ggamma', acsID = 'weibull', norm = 'N1', n.points = 30, lag.max = 30, constrain = FALSE, opts = NULL){
if (is.null(opts)) {
opts <- formals(fitDist)$opts
}
ea <- seasonalACF(TS, ## calculate seasonal empirical ACS
season = season,
lag.max = lag.max)
a <- lapply(ea, function(x) fitACS(x, ## fit empirical ACS by ID
acsID))
x <- stratifySeasonData(TS, season) ## split data to seasons
f <- lapply(x[[2]], function(x) fitDist(x$value, ## fit the seasonal data
dist,
norm = norm,
n.points = n.points,
constrain = constrain,
opts = opts))
structure(.Data = list(data = x, ## send the result out
dfits = f,
afits = a),
season = season,
dist = dist,
acsID = acsID,
date = TS[, 'date'])
}
#' @rdname analyzeTS
#' @export
reportTS <- function(aTS, method = 'dist') {
dist <- attr(aTS, 'dist')
acsID <- attr(aTS, 'acsID')
season <- attr(aTS, 'season')
if((method == 'stat')) { # | (method == 'all')) {
nz <- aTS$data[[2]]
dp <- as.data.frame(round(t(sapply(aTS$dfits, function(x) do.call(rbind, x))), 3))
names(dp) <- getDistArg(dist)
ap <- as.data.frame(round(t(sapply(aTS$afits, function(x) do.call(rbind, x))), 3))
names(ap) <- getACSArg(acsID)
laux <- t(sapply(nz, function(x) {
lmom(x$value)
}))
l <- round(data.frame(l.var = laux[, 1]/laux[, 2],
l.skew = laux[, 3],
l.kurt = laux[, 4]), 2)
s <- t(round(sapply(nz, function(x) {
c(mean = mean(x$value, na.rm = T),
sd = sd(x$value, na.rm = T),
min = min(x$value, na.rm = T),
q = quantile(x$value, na.rm = T, probs = .05),
q = quantile(x$value, na.rm = T, probs = .25),
q = quantile(x$value, na.rm = T, probs = .5),
q = quantile(x$value, na.rm = T, probs = .75),
q = quantile(x$value, na.rm = T, probs = .95),
max = max(x$value, na.rm = T),
skew = sample.moments(x$value,
raw = F,
central = F,
coef = T)$coefficients[2])
}), 2))
err <- round(sapply(aTS$dfits, function(x) attr(x, 'nfo')$objective), 4)
p0 <- 1 - round(sapply(nz, dim)[1,]/sapply(aTS$data[[1]], dim)[1,], 2)
a <- round(as.data.frame(t(sapply(aTS$afits, function(x) {attr(x, 'eACS')[2:5]}))), 2)
names(a) <- paste0('acs.l.', 2:5)
stat <- cbind(dist = dist, dp,
error = err,
acsID = acsID, ap,
s,
l,
p0,
a)
rownames(stat) <- paste(season, 1:dim(stat)[1], sep = '_')
out <- stat
}
if((method == 'dist')) { # | (method == 'all')) {
f <- aTS$dfits
CDF <- lapply(f, function(x) {
edf <- attr(x, 'edf')
dist <- attr(x, 'dist')
mm <- do.call(paste0('p', dist),
c(list(q = range(edf$value)),
x))
p <- exp(seq(ifelse(!is.finite(log(mm[1])),
.00001,
log(mm[1])),
log(mm[2]),
length.out = 10000))
cdf <- data.frame(p = p,
value = do.call(paste0('q', dist),
c(list(p = p), x)))
cdf
})
names(CDF) <- paste(season,
seq_along(CDF),
sep = '_')
sea <- NULL
cdf <- rbindlist(CDF,
idcol = 'sea')
cdf[, sea := factor(sea,
levels = paste(season,
seq_along(CDF),
sep = '_'))]
EDF <- lapply(f, function(x) {
attr(x, 'edf')
})
names(EDF) <- paste(season,
seq_along(EDF),
sep = '_')
edf <- rbindlist(EDF,
idcol = 'sea')
edf[, sea := factor(sea,
levels = paste(season,
seq_along(EDF),
sep = '_'))]
df <- ggplot() +
geom_point(data = edf,
aes(x = edf$value,
y = log(1 - edf$p),
shape = factor(2)),
colour = 'royalblue4',
alpha = .5) +
scale_shape_manual(values = 19,
label = 'Empirical',
name = NULL) +
geom_line(data = cdf,
aes(x = cdf$value,
y = log(1 - cdf$p),
linetype = factor(1)),
colour = 'red4',
lwd = .5,
alpha = .75) +
scale_linetype_manual(values = 1,
label = 'Fitted',
name = NULL) +
scale_y_continuous(breaks = seq(-10,
0,
length.out = 5),
labels = format(exp(seq(log(.0001),
log(1),
length.out = 5)),
scientific = TRUE)) +
labs(x = 'Nonzero values',
y = 'Exceedence probability',
title = 'Probability distribution fit') +
theme_gray() +
facet_wrap(~sea,
scales = 'free',
nrow = 4) +
theme(legend.position = 'bottom',
strip.background = element_rect(fill = 'grey5'),
strip.text = element_text(colour = 'grey95'))
out <- df
}
if((method == 'acs')) { # | (method == 'all')) {
a <- aTS$afits
ACS <- lapply(a, function(x) {
eACS <- attr(x, 'eACS')
lag <- 0:(length(eACS) - 1)
id <- attr(x, 'ID')
ACS <- ACS <- do.call(acs,
c(list(id = id,
t = lag), x))
ac <- data.frame(lag = lag,
ACS = ACS,
eACS = eACS)
ac
})
names(ACS) <- paste(season,
seq_along(ACS),
sep = '_')
sea <- NULL
acs <- rbindlist(ACS,
idcol = 'sea')
acs[, sea := factor(sea,
levels = paste(season,
seq_along(ACS),
sep = '_'))]
ac <- ggplot(acs) +
geom_point(aes(x = as.factor(acs$lag),
y = acs$eACS,
shape = factor(2)),
colour = 'royalblue4',
alpha = .5) +
scale_shape_manual(values = 19,
label = 'Empirical',
name = NULL) +
geom_line(aes(x = acs$lag + 1,
y = acs$ACS,
linetype = factor(1)),
colour = 'red4',
lwd = .5,
alpha = .75) +
scale_linetype_manual(values = 1,
label = 'Fitted',
name = NULL) +
labs(x = bquote(lag ~ tau),
y = 'Autocorrelation',
title = 'Autocorrelation structure fit') +
theme_grey() +
facet_wrap(~sea,
scales = 'free',
nrow = 4) +
theme(legend.position = 'bottom',
strip.background = element_rect(fill = 'grey5'),
strip.text = element_text(colour = 'grey95'))
out <- ac
}
return(out)
}
#' @rdname analyzeTS
#' @export
simulateTS <- function(aTS, from = NULL, to = NULL) {
dist <- attr(aTS, 'dist') ## get necesary info from attributes
acsID <- attr(aTS, 'acsID')
season <- attr(aTS, 'season')
date <- attr(aTS, 'date')
x <- aTS$data
r <- reportTS(aTS, method = 'stat')
f <- aTS$dfits
a <- aTS$afits
ACS <- list()
#################
if (dist == 'beta') {
distbounds = c(0, 1)
} else {
distbounds = c(-Inf, Inf)
}
################
for (i in seq_along(x[[1]])) {
p <- actpnts(margdist = attr(f[[i]], 'dist'), ## caculate acti points for each season
margarg = f[[i]],
p0 = r[i, 'p0'],
distbounds = distbounds)
fp <- fitactf(p) ## fit acti points
# plot(fp)
lag <- 0:(length(attr(a[[i]], 'eACS')) - 1) ## get correct lag
id <- attr(a[[i]], 'ID')
as <- do.call(acs, c(list(id = id, t = lag), a[[i]])) ## get ACS
ACS[[i]] <- actf(as, fp$actfcoef[1], fp$actfcoef[2]) ## transform ACS
}
names(ACS) <- names(a)
p0 <- uval <- gauss <- value <- . <- season_id <- rn <- NULL ## global variable check
if (is.null(from)) {
from <- date[1, date]
}
if (is.null(to)) {
to <- date[.N, date]
}
# by <- (strsplit(format(difftime(date[2, date], ## time seq step
# date[1, date])), ' ')[[1]][2])
#
by <- difftime(date[2, date], ## time seq step
date[1, date])
gausian <- seasonalAR(x = seq(from = from, ## generate seasonal gaussian process
to = to,
by = by),
ACS = ACS)
setkey(x = gausian,
season)
para <- as.data.table(
x = t(sapply(f, function(x) {
as.matrix(x = do.call(what = cbind, x))
}))
) ## get distribution pars
names(para) <- names(f[[1]])
para[, season := as.numeric(gsub('data_nz_', '', rownames(para)))]
para[, p0 := r[, 'p0']]
setkey(para, season)
aux <- merge(gausian, para, all.x = T) ## merge gaussian process with parameters
aux <- aux[order(date)]
aux[, uval := (pnorm(q = gauss) - p0)/(1 - p0)] ## calculate intermitent process
aux[uval < 0, uval := 0]
d <- getDistArg(dist)
l <- as.data.table(x = r[, d],
keep.rownames = TRUE) ## get dist parameters for each season
l[, 'season_id' := as.numeric(
x = sapply(X = strsplit(x = rn,
split = '_'),
FUN = '[[', 2)
)] ## make season_id variable
for(i in l[, season_id]) {
trans.para <- l[season_id == i,
!c('rn', 'season_id')] ## select correct pars
aux[season == i, value := do.call(what = paste0('q', dist),
args = c(list(p = uval),
as.list(trans.para)))] ## transform correct season gausian process
}
out <- aux[, .(date, value)] ## select date amd value
return(out) ## send it out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.