R/simulateDriverS.R

Defines functions simulateDriverS

Documented in simulateDriverS

#' Generates drivers for \code{\link{simulatePopulation}}.
#'
#' @description Wrapper of \code{simulateDriver} to generate several drivers with different autocorrelation lengths, and return a long format dataframe to be used as input for \code{\link{simulatePopulation}}. It also produces a plot of the generated drivers. \strong{Important}: note that the variable \code{time} runs from left to right, with lower values representing older samples.
#'
#'@usage simulateDriverS(
#'  random.seeds=c(60, 120),
#'  time=1:10000,
#'  autocorrelation.lengths=c(200, 600),
#'  output.min=c(0,0),
#'  output.max=c(100, 100),
#'  driver.names=c("A", "B"),
#'  filename=NULL)
#'
#' @param random.seeds vector of integers, seeds to be used by \code{set.seed}.
#' @param time integer, or numeric vector of integers with constant intervals. If a single integer is provided, a time sequence is generated from 0 to the given integer as \emph{seq(0, time, by = 1)}. Default is 1:10000.
#' @param autocorrelation.lengths vector of integers, represents the lengths of the convolution filters to be used to impose a particular temporal structure on each driver. Default is 100, equivalent to a filter composed by a hundred of ones.
#' @param output.min numeric vector, minimum values of the output time series. Used as input for \strong{rescaleVector}. Default is 0.
#' @param output.max numeric vector, maximum values of the output time series. Used as input for \strong{rescaleVector}. Default is 100.
#' @param driver.names character vector, with labels to be used to identify the drivers.
#' @param filename character string, name of output pdf file. If NULL or empty, no pdf is produced.
#'
#' @details It is recommended to use \code{time} vectors with a time step of 1 between consecutive values when the output is to be used as input for \code{\link{simulatePopulation}}, which considers annual time-steps while simulating virtual pollen curves. Initial random sequence is generated by \code{rnorm}. Desired temporal autocorrelation are approximate, but deviation becomes higher if \code{autocorrelation.length} is larger than half the length of \code{time}. Consequently, the function limits \code{autocorrelation.length} to \code{length(time)/2}.
#'
#' @author Blas M. Benito  <blasbenito@gmail.com>
#'
#' @return A long format dataframe (see dataset \code{\link{drivers}}) with the following columns:
#'
#' \itemize{
#'   \item \emph{time}: integer.
#'   \item \emph{driver}: character, values are \code{A} and \code{B}
#'   \item \emph{autocorrelation.length}: numeric, default values are 200, 600, and 1800.
#'   \item \emph{value}: numeric, value of the driver for the given \emph{time}.
#' }
#'
#' @seealso \code{\link{drivers}}, \code{\link{simulateDriver}}, \code{\link{simulatePopulation}}, \code{\link{drivers}}
#'
#' @examples
#'
#'drivers <- simulateDriverS(
#'  random.seeds=c(60, 120),
#'  time=1:10000,
#'  autocorrelation.lengths=c(200, 600),
#'  output.min=c(0,0),
#'  output.max=c(100, 100),
#'  driver.names=c("A", "B"),
#'  filename=NULL
#')
#'
#'str(drivers)
#'
#' @export
simulateDriverS <- function(
    random.seeds = c(60, 120),
    time = 1:10000,
    autocorrelation.lengths = c(200, 600),
    output.min = c(0,0),
    output.max = c(100, 100),
    driver.names = c("A", "B"),
    filename = NULL
){

  scaled <- NULL

  #TESTING INPUT DATA
  #number of driver names
  n.names <- length(driver.names)

  #driver.names
  if(!is.character(driver.names)){stop("The argument driver.names should be a character vector.")}

  #random.seeds
  if(!is.numeric(random.seeds)){
    random.seeds  <-  1:n.names
  } else {
    random.seeds <- as.integer(random.seeds)
  }
  if(length(random.seeds) < n.names){
    random.seeds <- random.seeds[1]:(random.seeds[1]+n.names)
  }

  #output.min and output.max
  if(length(output.min) < n.names){
    output.min <- rep(output.min[1], n.names)
  }
  if(length(output.min) > n.names){
    output.min <- output.min[1:n.names]
  }
  if(length(output.max) < n.names){
    output.max <- rep(output.min[1], n.names)
  }
  if(length(output.max) > n.names){
    output.max <- output.max[1:n.names]
  }
  for(i in 1:n.names){
    if(output.max[i] < output.min[i]){
      temp.min <- output.max[i]
      output.max[i]  <-  output.min[i]
      output.min[i]  <-  temp.min
    }
  }

  #time
  if(!is.numeric(time)){stop("Argument 'time' should be a numeric vector, try 1:1000.")}
  if(length(time)==1){time <- 1:floor(time)}

  #autocorrelation.length
  if(!is.numeric(autocorrelation.lengths)){stop("Argument 'autocorrelation.length' should be a numeric vector.")}



  #data ranges and random seed for each drivers.2k
  data.ranges <- data.frame(driver=driver.names, output.min, output.max, random.seeds)

  #dataframes to store drivers
  drivers <- drivers.temp <- data.frame(
    time=numeric(),
    driver=character(),
    autocorrelation.length=numeric(),
    value=numeric()
    )

  autocorrelation <- data.frame(
    lag=numeric(),
    acf=numeric(),
    ci.max=numeric(),
    ci.min=numeric(),
    driver=character(),
    autocorrelation.length=numeric()
    )

  #looping through drivers and memory lengths
  for(driver in driver.names){
    for(autocorrelation.length in autocorrelation.lengths){

      #FILLING drivers.temp
      #---------------------------------------
      #fill drivers.temp with time and grouping
      drivers.temp[max(time), ]  <-  NA
      drivers.temp$time <- time

      #fill with parameter values
      drivers.temp$driver <- rep(driver, nrow(drivers.temp))
      drivers.temp$autocorrelation.length <- rep(autocorrelation.length, nrow(drivers.temp))

      #fill values of the driver
      simulated.driver <- simulateDriver(
        random.seed = data.ranges[data.ranges$driver==driver, "random.seeds"],
        time = time,
        autocorrelation.length = autocorrelation.length,
        output.min = data.ranges[data.ranges$driver==driver, "output.min"],
        output.max = data.ranges[data.ranges$driver==driver, "output.max"]
      )

      #getting the driver values
      drivers.temp$value <- rescaleVector(x = simulated.driver, new.max = 100, new.min = 0)

      #FILLING autocorrelation.temp
      #---------------------------------------
      #computing acf
      autocorrelation.temp <- acfToDf(
        x = simulated.driver,
        lag.max = max(max(autocorrelation.lengths)),
        length.out = 100
        )

      autocorrelation.temp$driver <- rep(driver, nrow(autocorrelation.temp))

      autocorrelation.temp$autocorrelation.length <- rep(autocorrelation.length, nrow(autocorrelation.temp))

      #merging with main dataframes
      drivers <- rbind(drivers, drivers.temp)
      autocorrelation <- rbind(autocorrelation, autocorrelation.temp)

    }
  }

  #PLOTTING OUTPUT
  p.drivers <- ggplot2::ggplot(
    data = drivers,
    ggplot2::aes(x = time, y = value, color = driver)
    ) +
    ggplot2::geom_line() +
    ggplot2::scale_color_viridis_d(begin = 0.2, end = 0.6) +
    ggplot2::facet_wrap(driver ~ autocorrelation.length, ncol = 1, scales = "free_y") +
    ggplot2::xlab("Time (years)") +
    ggplot2::ylab("") +
    ggplot2::ggtitle("Virtual drivers") +
    ggplot2::theme(
      plot.margin = ggplot2::unit(c(0.5, -1, 0.5, 0.5), "cm"),
      legend.position = "none",
      panel.background = ggplot2::element_blank()
      ) +
    cowplot::background_grid(major = "none", minor = "none")

  p.density <- ggplot2::ggplot(
    data = drivers,
    ggplot2::aes(value, fill = driver, colour = driver)
    ) +
    ggplot2::geom_density(ggplot2::aes(y = ggplot2::after_stat(scaled)), alpha = 0.5) +
    ggplot2::scale_color_viridis_d(begin = 0.2, end = 0.6) +
    ggplot2::scale_fill_viridis_d(begin = 0.2, end = 0.6) +
    ggplot2::facet_wrap(driver ~ autocorrelation.length, ncol = 1, scales = "free") +
    ggplot2::xlab("Years") +
    ggplot2::ylab("") +
    ggplot2::xlab("") +
    ggplot2::coord_flip() +
    ggplot2::theme(plot.margin = ggplot2::unit(c(0.5, 0.5, 0.5, 0), "cm"),
          axis.line = ggplot2::element_blank(),
          axis.text = ggplot2::element_blank(),
          axis.ticks = ggplot2::element_blank(),
          strip.background = ggplot2::element_blank(),
          strip.text.x = ggplot2::element_blank(),
          legend.position = "none",
          panel.background = ggplot2::element_blank()) +
    cowplot::background_grid(major = "none", minor = "none")

  p.acfs <- ggplot2::ggplot(
    data = autocorrelation,
    ggplot2::aes(x = lag, y = acf, color = driver)
    ) +
    ggplot2::scale_color_viridis_d(begin = 0.2, end = 0.6) +
    ggplot2::geom_hline(ggplot2::aes(yintercept = 0)) +
    ggplot2::geom_hline(ggplot2::aes(yintercept = ci.max), color = "red", linetype = "dashed") +
    ggplot2::geom_hline(ggplot2::aes(yintercept = ci.min), color = "red", linetype = "dashed") +
    ggplot2::geom_segment(mapping = ggplot2::aes(xend = lag, yend = 0)) +
    ggplot2::ylab("") +
    ggplot2::xlab("Lag (years)") +
    ggplot2::facet_wrap(driver ~ autocorrelation.length, ncol = 1, scales = "free_y") +
    ggplot2::ggtitle("Temporal autocorrelation") +
    ggplot2::theme(plot.margin = ggplot2::unit(c(0.5, 0.5, 0.5, -1), "cm"),
          legend.position = "none",
          panel.background = ggplot2::element_blank()) +
    cowplot::background_grid(major = "none", minor = "none")

  print(cowplot::plot_grid(p.drivers, NULL, p.density, NULL, p.acfs, align = "h", ncol = 5, rel_widths = c(1, 0, 0.3, 0, 1)))

  if(!is.null(filename) & is.character(filename)){
    ggplot2::ggsave(width = 12, height = (1.5*(length(driver.names) * length(autocorrelation.lengths))), filename = paste(filename, ".pdf", sep = ""))
  }


  return(drivers)

}

Try the virtualPollen package in your browser

Any scripts or data that you put into this service are public.

virtualPollen documentation built on Sept. 10, 2025, 10:37 a.m.