R/tsl_simulate.R

Defines functions tsl_simulate

Documented in tsl_simulate

#' Simulate a Time Series List
#'
#' @description
#' Generates simulated time series lists for testing and learning.
#'
#' This function supports progress bars generated by the `progressr` package, and accepts a parallelization setup via [future::plan()] (see examples).
#'
#' @param n (optional, integer) Number of time series to simulate. Default: 2.
#' @param cols (optional, integer) Number of columns of each time series. Default: 5
#' @param rows (optional, integer) Length of each time series. Minimum is 10, but maximum is not limited. Very large numbers might crash the R session. Default: 100
#' @param time_range (optional character or numeric vector) Time interval of the time series. Either a character vector with dates in format YYYY-MM-DD or or a numeric vector. If there is a mismatch between `time_range` and `rows` (for example, the number of days in `time_range` is smaller than `rows`), the upper value in `time_range` is adapted to `rows`. Default: c("2010-01-01", "2020-01-01")
#' @param data_range (optional, numeric vector of length 2) Extremes of the time series values. Default: c(0, 1)
#' @param seasons (optional, integer) Number of seasons in the resulting time series. The maximum number of seasons is computed as `floor(rows/3)`. Default: 0
#' @param na_fraction (optional, numeric) Value between 0 and 0.5 indicating the approximate fraction of NA data in the simulated time series. Default: 0.
#' @param independent (optional, logical) If TRUE, each new column in a simulated time series is averaged with the previous column to generate dependency across columns, and each new simulated time series is weighted-averaged with a time series template to generate dependency across time series. Irrelevant when `cols < 2` or `n < 2`, and hard to perceive in the output when `seasons > 0`. Default: FALSE
#' @param irregular (optional, logical) If TRUE, the time intervals between consecutive samples and the number of rows are irregular. Default: TRUE
#' @param seed (optional, integer) Random seed used to simulate the zoo object. If NULL (default), a seed is selected at random. Default: NULL
#'
#' @return time series list
#' @export
#' @autoglobal
#' @examples
#'
#' # generates a different time series list on each iteration when seed = NULL
#' tsl <- tsl_simulate(
#'   n = 2,
#'   seasons = 4
#' )
#'
#' if(interactive()){
#'   tsl_plot(
#'     tsl = tsl
#'   )
#' }
#'
#' # generate 3 independent time series
#' tsl_independent <- tsl_simulate(
#'   n = 3,
#'   cols = 3,
#'   independent = TRUE
#' )
#'
#' if(interactive()){
#'   tsl_plot(
#'     tsl = tsl_independent
#'   )
#' }
#'
#' # generate 3 independent time series
#' tsl_dependent <- tsl_simulate(
#'   n = 3,
#'   cols = 3,
#'   independent = FALSE
#' )
#'
#' if(interactive()){
#'   tsl_plot(
#'     tsl = tsl_dependent
#'   )
#' }
#'
#' # with seasons
#' tsl_seasons <- tsl_simulate(
#'   n = 3,
#'   cols = 3,
#'   seasons = 4,
#'   independent = FALSE
#' )
#'
#' if(interactive()){
#'   tsl_plot(
#'     tsl = tsl_seasons
#'   )
#' }
#' 
#' @family simulate_time_series
tsl_simulate <- function(
    n = 2,
    cols = 5,
    rows = 100,
    time_range = c("2010-01-01", "2020-01-01"),
    data_range = c(0, 1),
    seasons = 0,
    na_fraction = 0,
    independent = FALSE,
    irregular = TRUE,
    seed = NULL
){

  # n ----
  n <- as.integer(abs(n))[1]
  if(n <= 0){
    n <- 1
  }

  # na_fraction ----
  na_fraction <- as.numeric(na_fraction[1])
  if(na_fraction < 0){
    na_fraction <- 0
  }
  if(na_fraction > 0.5){
    na_fraction <- 0.5
  }

  # seed ----
  if(is.null(seed)){
    seed <- sample.int(
      n = .Machine$integer.max,
      size = 1
      )
  }
  set.seed(seed)

  # if irregular, increase row count
  if(!is.logical(irregular)){
    irregular <- FALSE
  }

  # time range ----
  if(length(time_range) != 2){
    stop("distantia::tsl_simulate(): argument 'time_range' must be a vector of length 2.")
  }

  time_range <- utils_as_time(
    x = time_range
  ) |>
    range()

  # generate template
  if(independent == FALSE){

    template <- zoo_simulate(
      cols = cols,
      rows = rows,
      time_range = time_range,
      data_range = data_range,
      seasons = seasons,
      na_fraction = 0,
      independent = independent,
      irregular = FALSE,
      seed = seed
    )

    template_weight <- seq(
      from = 0.9,
      to = 0,
      length.out = n + 1
    )

    #loop args
    arg_irregular <- FALSE
    arg_na_fraction <- 0

  } else {

    arg_irregular <- irregular
    arg_na_fraction <- na_fraction

  }

  #progress bar
  p <- progressr::progressor(along = seq_len(n))



  #generate tsl
  tsl <- foreach::foreach(
    i = seq_len(n),
    .errorhandling = "pass",
    .options.future = list(seed = TRUE)
  ) %dofuture% {

    #call to progress bar
    p()

    #regular zoo with no NA
    zoo.i <- zoo_simulate(
      cols = cols,
      rows = rows,
      time_range = time_range,
      data_range = data_range,
      seasons = seasons,
      na_fraction = arg_na_fraction,
      independent = independent,
      irregular = arg_irregular,
      seed = NULL
    )

    #handle independence
    if(independent == FALSE){

      #add weighted template
      zoo.i <-
        zoo.i * (1 - template_weight[i]) +
        template * template_weight[i]

    }

    #regular or irregular rows
    if(irregular == TRUE){

      if(rows > 10){

        #compute rows to keep
        rows.i <- sample(
          x = rows,
          size = rows - sample.int(n = rows/4, size = 1)
        ) |>
          sort()

        #subsetting
        zoo.i <- zoo.i[rows.i]

      }


    } else {

      rows.i <- seq_len(rows)

    }

    #regular or irregular time
    zoo::index(x = zoo.i) <- seq(
      from = min(time_range),
      to = max(time_range),
      length.out = length(rows.i) * ifelse(
        test = irregular == TRUE,
        yes = 4,
        no = 1
      )
    ) |>
      sample(size = length(rows.i)) |>
      sort()

    #apply na_fraction
    zoo.i[
      sample.int(
        n = length(zoo.i),
        size = floor(length(zoo.i) * na_fraction)
      )
    ] <- NA

    zoo.i

  }

  # names ----
  if(n > length(LETTERS)){

    names_tsl <- c(
      LETTERS,
      as.vector(
        outer(
          X = LETTERS,
          Y = LETTERS,
          FUN = paste0
        )
      )
    )[seq_len(n)]

  } else {

    names_tsl <- LETTERS[seq_len(n)]

  }

  names(tsl) <- names_tsl

  tsl <- tsl_names_set(
    tsl = tsl,
    names = names_tsl
  )

  tsl

}

Try the distantia package in your browser

Any scripts or data that you put into this service are public.

distantia documentation built on April 4, 2025, 5:42 a.m.