Nothing
#' Simulate a Time Series List
#'
#' @description
#' Generates simulated time series lists for testing and learning.
#'
#' This function supports progress bars generated by the `progressr` package, and accepts a parallelization setup via [future::plan()] (see examples).
#'
#' @param n (optional, integer) Number of time series to simulate. Default: 2.
#' @param cols (optional, integer) Number of columns of each time series. Default: 5
#' @param rows (optional, integer) Length of each time series. Minimum is 10, but maximum is not limited. Very large numbers might crash the R session. Default: 100
#' @param time_range (optional character or numeric vector) Time interval of the time series. Either a character vector with dates in format YYYY-MM-DD or or a numeric vector. If there is a mismatch between `time_range` and `rows` (for example, the number of days in `time_range` is smaller than `rows`), the upper value in `time_range` is adapted to `rows`. Default: c("2010-01-01", "2020-01-01")
#' @param data_range (optional, numeric vector of length 2) Extremes of the time series values. Default: c(0, 1)
#' @param seasons (optional, integer) Number of seasons in the resulting time series. The maximum number of seasons is computed as `floor(rows/3)`. Default: 0
#' @param na_fraction (optional, numeric) Value between 0 and 0.5 indicating the approximate fraction of NA data in the simulated time series. Default: 0.
#' @param independent (optional, logical) If TRUE, each new column in a simulated time series is averaged with the previous column to generate dependency across columns, and each new simulated time series is weighted-averaged with a time series template to generate dependency across time series. Irrelevant when `cols < 2` or `n < 2`, and hard to perceive in the output when `seasons > 0`. Default: FALSE
#' @param irregular (optional, logical) If TRUE, the time intervals between consecutive samples and the number of rows are irregular. Default: TRUE
#' @param seed (optional, integer) Random seed used to simulate the zoo object. If NULL (default), a seed is selected at random. Default: NULL
#'
#' @return time series list
#' @export
#' @autoglobal
#' @examples
#'
#' # generates a different time series list on each iteration when seed = NULL
#' tsl <- tsl_simulate(
#' n = 2,
#' seasons = 4
#' )
#'
#' if(interactive()){
#' tsl_plot(
#' tsl = tsl
#' )
#' }
#'
#' # generate 3 independent time series
#' tsl_independent <- tsl_simulate(
#' n = 3,
#' cols = 3,
#' independent = TRUE
#' )
#'
#' if(interactive()){
#' tsl_plot(
#' tsl = tsl_independent
#' )
#' }
#'
#' # generate 3 independent time series
#' tsl_dependent <- tsl_simulate(
#' n = 3,
#' cols = 3,
#' independent = FALSE
#' )
#'
#' if(interactive()){
#' tsl_plot(
#' tsl = tsl_dependent
#' )
#' }
#'
#' # with seasons
#' tsl_seasons <- tsl_simulate(
#' n = 3,
#' cols = 3,
#' seasons = 4,
#' independent = FALSE
#' )
#'
#' if(interactive()){
#' tsl_plot(
#' tsl = tsl_seasons
#' )
#' }
#'
#' @family simulate_time_series
tsl_simulate <- function(
n = 2,
cols = 5,
rows = 100,
time_range = c("2010-01-01", "2020-01-01"),
data_range = c(0, 1),
seasons = 0,
na_fraction = 0,
independent = FALSE,
irregular = TRUE,
seed = NULL
){
# n ----
n <- as.integer(abs(n))[1]
if(n <= 0){
n <- 1
}
# na_fraction ----
na_fraction <- as.numeric(na_fraction[1])
if(na_fraction < 0){
na_fraction <- 0
}
if(na_fraction > 0.5){
na_fraction <- 0.5
}
# seed ----
if(is.null(seed)){
seed <- sample.int(
n = .Machine$integer.max,
size = 1
)
}
set.seed(seed)
# if irregular, increase row count
if(!is.logical(irregular)){
irregular <- FALSE
}
# time range ----
if(length(time_range) != 2){
stop("distantia::tsl_simulate(): argument 'time_range' must be a vector of length 2.")
}
time_range <- utils_as_time(
x = time_range
) |>
range()
# generate template
if(independent == FALSE){
template <- zoo_simulate(
cols = cols,
rows = rows,
time_range = time_range,
data_range = data_range,
seasons = seasons,
na_fraction = 0,
independent = independent,
irregular = FALSE,
seed = seed
)
template_weight <- seq(
from = 0.9,
to = 0,
length.out = n + 1
)
#loop args
arg_irregular <- FALSE
arg_na_fraction <- 0
} else {
arg_irregular <- irregular
arg_na_fraction <- na_fraction
}
#progress bar
p <- progressr::progressor(along = seq_len(n))
#generate tsl
tsl <- foreach::foreach(
i = seq_len(n),
.errorhandling = "pass",
.options.future = list(seed = TRUE)
) %dofuture% {
#call to progress bar
p()
#regular zoo with no NA
zoo.i <- zoo_simulate(
cols = cols,
rows = rows,
time_range = time_range,
data_range = data_range,
seasons = seasons,
na_fraction = arg_na_fraction,
independent = independent,
irregular = arg_irregular,
seed = NULL
)
#handle independence
if(independent == FALSE){
#add weighted template
zoo.i <-
zoo.i * (1 - template_weight[i]) +
template * template_weight[i]
}
#regular or irregular rows
if(irregular == TRUE){
if(rows > 10){
#compute rows to keep
rows.i <- sample(
x = rows,
size = rows - sample.int(n = rows/4, size = 1)
) |>
sort()
#subsetting
zoo.i <- zoo.i[rows.i]
}
} else {
rows.i <- seq_len(rows)
}
#regular or irregular time
zoo::index(x = zoo.i) <- seq(
from = min(time_range),
to = max(time_range),
length.out = length(rows.i) * ifelse(
test = irregular == TRUE,
yes = 4,
no = 1
)
) |>
sample(size = length(rows.i)) |>
sort()
#apply na_fraction
zoo.i[
sample.int(
n = length(zoo.i),
size = floor(length(zoo.i) * na_fraction)
)
] <- NA
zoo.i
}
# names ----
if(n > length(LETTERS)){
names_tsl <- c(
LETTERS,
as.vector(
outer(
X = LETTERS,
Y = LETTERS,
FUN = paste0
)
)
)[seq_len(n)]
} else {
names_tsl <- LETTERS[seq_len(n)]
}
names(tsl) <- names_tsl
tsl <- tsl_names_set(
tsl = tsl,
names = names_tsl
)
tsl
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.