R/simulateDriverS.R

Defines functions simulateDriverS

Documented in simulateDriverS

#' Generates drivers for \code{\link{simulatePopulation}}.
#'
#' @description Wrapper of \code{simulateDriver} to generate several drivers with different autocorrelation lengths, and return a long format dataframe to be used as input for \code{\link{simulatePopulation}}. It also produces a plot of the generated drivers. \strong{Important}: note that the variable \code{time} runs from left to right, with lower values representing older samples.
#'
#'@usage simulateDriverS(
#'  random.seeds=c(60, 120),
#'  time=1:10000,
#'  autocorrelation.lengths=c(200, 600),
#'  output.min=c(0,0),
#'  output.max=c(100, 100),
#'  driver.names=c("A", "B"),
#'  filename=NULL)
#'
#' @param random.seeds vector of integers, seeds to be used by \code{set.seed}.
#' @param time integer, or numeric vector of integers with constant intervals. If a single integer is provided, a time sequence is generated from 0 to the given integer as \emph{seq(0, time, by = 1)}. Default is 1:10000.
#' @param autocorrelation.lengths vector of integers, represents the lengths of the convolution filters to be used to impose a particular temporal structure on each driver. Default is 100, equivalent to a filter composed by a hundred of ones.
#' @param output.min numeric vector, minimum values of the output time series. Used as input for \strong{rescaleVector}. Default is 0.
#' @param output.max numeric vector, maximum values of the output time series. Used as input for \strong{rescaleVector}. Default is 100.
#' @param driver.names character vector, with labels to be used to identify the drivers.
#' @param filename character string, name of output pdf file. If NULL or empty, no pdf is produced.
#'
#' @details It is recommended to use \code{time} vectors with a time step of 1 between consecutive values when the output is to be used as input for \code{\link{simulatePopulation}}, which considers annual time-steps while simulating virtual pollen curves. Initial random sequence is generated by \code{rnorm}. Desired temporal autocorrelation are approximate, but deviation becomes higher if \code{autocorrelation.length} is larger than half the length of \code{time}. Consequently, the function limits \code{autocorrelation.length} to \code{length(time)/2}.
#'
#' @author Blas M. Benito  <blasbenito@gmail.com>
#'
#' @return A long format dataframe (see dataset \code{\link{drivers}}) with the following columns:
#'
#' \itemize{
#'   \item \emph{time}: integer.
#'   \item \emph{driver}: character, values are \code{A} and \code{B}
#'   \item \emph{autocorrelation.length}: numeric, default values are 200, 600, and 1800.
#'   \item \emph{value}: numeric, value of the driver for the given \emph{time}.
#' }
#'
#' @seealso \code{\link{drivers}}, \code{\link{simulateDriver}}, \code{\link{simulatePopulation}}, \code{\link{drivers}}
#'
#' @examples
#'
#'drivers <- simulateDriverS(
#'  random.seeds=c(60, 120),
#'  time=1:10000,
#'  autocorrelation.lengths=c(200, 600),
#'  output.min=c(0,0),
#'  output.max=c(100, 100),
#'  driver.names=c("A", "B"),
#'  filename=NULL
#')
#'
#'str(drivers)
#'
#' @export
simulateDriverS <- function(random.seeds=c(60, 120),
                            time=1:10000,
                            autocorrelation.lengths=c(200, 600),
                            output.min=c(0,0),
                            output.max=c(100, 100),
                            driver.names=c("A", "B"),
                            filename=NULL){


#TESTING INPUT DATA
#number of driver names
n.names <- length(driver.names)

#driver.names
if(!is.character(driver.names)){stop("The argument driver.names should be a character vector.")}

#random.seeds
if(!is.numeric(random.seeds)){
  random.seeds  <-  1:n.names
} else {
  random.seeds <- as.integer(random.seeds)
}
if(length(random.seeds) < n.names){
  random.seeds <- random.seeds[1]:(random.seeds[1]+n.names)
}

#output.min and output.max
if(length(output.min) < n.names){
  output.min <- rep(output.min[1], n.names)
}
if(length(output.min) > n.names){
  output.min <- output.min[1:n.names]
}
if(length(output.max) < n.names){
  output.max <- rep(output.min[1], n.names)
}
if(length(output.max) > n.names){
  output.max <- output.max[1:n.names]
}
for(i in 1:n.names){
  if(output.max[i] < output.min[i]){
    temp.min <- output.max[i]
    output.max[i]  <-  output.min[i]
    output.min[i]  <-  temp.min
  }
}

#time
if(!is.numeric(time)){stop("time should be a numeric vector, try 1:1000.")}
if(length(time)==1){time <- 1:floor(time)}

#autocorrelation.length
if(!is.numeric(autocorrelation.lengths)){stop("autocorrelation.length should be a numeric vector.")}



#data ranges and random seed for each drivers.2k
data.ranges <- data.frame(driver=driver.names, output.min, output.max, random.seeds)

#dataframes to store drivers
drivers <- drivers.temp <- data.frame(time=numeric(), driver=character(), autocorrelation.length=numeric(), value=numeric())

autocorrelation <- data.frame(lag=numeric(), acf=numeric(), ci.max=numeric(), ci.min=numeric(), driver=character(), autocorrelation.length=numeric())

#looping through drivers and memory lengths
for(driver in driver.names){
  for(autocorrelation.length in autocorrelation.lengths){

    #FILLING drivers.temp
    #---------------------------------------
    #fill drivers.temp with time and grouping
    drivers.temp[max(time), ]  <-  NA
    drivers.temp$time <- time

    #fill with parameter values
    drivers.temp$driver <- rep(driver, nrow(drivers.temp))
    drivers.temp$autocorrelation.length <- rep(autocorrelation.length, nrow(drivers.temp))

    #fill values of the driver
    simulated.driver <- simulateDriver(
      random.seed = data.ranges[data.ranges$driver==driver, "random.seeds"],
      time = time,
      autocorrelation.length = autocorrelation.length,
      output.min = data.ranges[data.ranges$driver==driver, "output.min"],
      output.max = data.ranges[data.ranges$driver==driver, "output.max"]
    )

    #getting the driver values
    drivers.temp$value <- rescaleVector(x = simulated.driver, new.max = 100, new.min = 0)

    #FILLING autocorrelation.temp
    #---------------------------------------
    #computing acf
    autocorrelation.temp <- acfToDf(x = simulated.driver, lag.max = max(max(autocorrelation.lengths)), length.out = 100)
    autocorrelation.temp$driver <- rep(driver, nrow(autocorrelation.temp))
    autocorrelation.temp$autocorrelation.length <- rep(autocorrelation.length, nrow(autocorrelation.temp))

    #merging with main dataframes
    drivers <- rbind(drivers, drivers.temp)
    autocorrelation <- rbind(autocorrelation, autocorrelation.temp)

  }
}

#PLOTTING OUTPUT
p.drivers <- ggplot(data = drivers, aes(x = time, y = value, color = driver)) +
  geom_line() +
  scale_color_viridis(discrete = TRUE, begin = 0.2, end = 0.6) +
  facet_wrap(driver ~ autocorrelation.length, ncol = 1, scales = "free_y") +
  xlab("Time (years)") +
  ylab("") +
  ggtitle("Virtual drivers") +
  theme(plot.margin = unit(c(0.5, -1, 0.5, 0.5), "cm"),
        legend.position = "none",
        panel.background = element_blank()) +
  cowplot::background_grid(major = "none", minor = "none")

p.density <- ggplot(data = drivers, aes(value, fill = driver, colour = driver)) +
  geom_density(aes(y = ..scaled..), alpha = 0.5) +
  scale_color_viridis(discrete = TRUE, begin = 0.2, end = 0.6) +
  scale_fill_viridis(discrete = TRUE, begin = 0.2, end = 0.6) +
  facet_wrap(driver ~ autocorrelation.length, ncol = 1, scales = "free") +
  xlab("Years") +
  ylab("") +
  xlab("") +
  coord_flip() +
  theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0), "cm"),
        axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        legend.position = "none",
        panel.background = element_blank()) +
  cowplot::background_grid(major = "none", minor = "none")

p.acfs <- ggplot(data = autocorrelation, aes(x = lag, y = acf, color = driver)) +
  scale_color_viridis(discrete = TRUE, begin = 0.2, end = 0.6) +
  geom_hline(aes(yintercept = 0)) +
  geom_hline(aes(yintercept = ci.max), color = "red", linetype = "dashed") +
  geom_hline(aes(yintercept = ci.min), color = "red", linetype = "dashed") +
  geom_segment(mapping = aes(xend = lag, yend = 0)) +
  ylab("") +
  xlab("Lag (years)") +
  facet_wrap(driver ~ autocorrelation.length, ncol = 1, scales = "free_y") +
  ggtitle("Temporal autocorrelation") +
  theme(plot.margin = unit(c(0.5, 0.5, 0.5, -1), "cm"),
        legend.position = "none",
        panel.background = element_blank()) +
  cowplot::background_grid(major = "none", minor = "none")

print(cowplot::plot_grid(p.drivers, NULL, p.density, NULL, p.acfs, align = "h", ncol = 5, rel_widths = c(1, 0, 0.3, 0, 1)))

if(!is.null(filename) & is.character(filename)){
ggsave(width = 12, height = (1.5*(length(driver.names) * length(autocorrelation.lengths))), filename = paste(filename, ".pdf", sep = ""))
}


return(drivers)

}

Try the virtualPollen package in your browser

Any scripts or data that you put into this service are public.

virtualPollen documentation built on March 18, 2022, 6:16 p.m.