#' Cross-mapping from one variable to another variable over a range of lags.
#' Wrapper around OnewayLaggedCCM allowing multiple values for each input
#' argument. Useful when performing sensitivity tests. NOTE: automated
#' optimisation of embedding parameters is only available in this function;
#' lower-level functions do not implement them.
#'
#' @param data A data frame containing two columns - one for the presumed driver
#' and one for the response.
#' @param library.column Integer indicating which column to use as the library
#' column (presumed response) (1 for the first column and 2 for the second
#' column).
#' @param target.column Integer indicating which column to use as the target
#' column (presumed driver). Defaults to the opposite of 'library.column'.
#' @param lags A vector of lags to compute CCM for.
#' @param Es Vector of embedding dimensions. Defaults to NULL, which triggers
#' automated optimisation of the embedding dimension up to the dimension
#' specified by 'max.E'.
#' @param taus Embedding lags. Can either be several types:
#' 1. A vector of integer lags, in which case every lag provided is considered
#' 2. NULL, which sets the embedding lag to a default value of 1.
#' 3. "mi", which triggers automated optimisation of the embedding lag up to
#' the dimension specified by 'max.tau' using the first minima of the lagged
#' mutual information function.
#' 4. "acf", which triggers automated optimisation of the embedding lag up to
#' the dimension specified by 'max.tau' using the first minima of the lagged
#' autocorrelation function.
#' For densely sampled time series, the embedding lag should be set to the
#' first minima of the autocorrelation ("acf"), or mutual information ("mi")
#' functions. For sparsely sampled time series (for example geological time
#' series), set the embedding lag(s) to low values (defaults to 1 if NULL).
#'
#' @param min.E The minimum embedding dimension for which to optimise 'E'. Only
#' relevant if 'Es' is set to NULL.
#' @param max.E The maximum embedding dimension for which to optimise 'E'.
#' Only relevant if 'Es' is set to NULL.
#' @param max.tau The maximum embedding lag for which to optimise 'tau'. Only
#' relevant if 'taus' is set to "mi" or "acf".
#' @param optimise.FNNdim Optimise false nearest neighbours? Defaults to FALSE,
#' meaning that simplex projection only is used
#' for optimisation. If TRUE, simplex projection is performed with the minimum
#' embedding dimension outputted from
#' the FNN algorithm (and the box counting dimension, if acitivated). Only
#' relevant if either 'E' or tau is set to NULL.
#' @param optimise.boxcountdim Optimise box counting dimension? Defaults to
#' FALSE, meaning that simplex projection only is used for optimisation. If
#' TRUE, simplex projection is performed with the minimum embedding dimension
#' as the first integer larger than twice the box counting algorithm of the
#' attractor (also considering the minimum dimension indicated by the FNN
#' algorithm, if activated). Only relevant if either 'E' or tau is set to NULL.
#' @param which.optimdim.test Use the embedding dimension estimated by which
#' optimisation procedure? Defaults to "all", which uses the maximum of the
#' three methods. Other options are "FNN", "boxcount" or "simplex".
#' @param samples.original The number of random libraries to draw when
#' calculating cross map skill.
#' @param library.sizes The size of the random libraries drawn when calculating
#' cross map skill.
#' @param lib Indices of the original library time series to use as the library
#' (training) set.
#' @param pred Indices of the original target time series to use as prediction
#' set. If this overlaps with the training set, make sure to use leave-K-out
#' cross validation setting the 'exclusion.radius' parameters to a minimum of
#' E + 1.
#' @param exclusion.radius The number of temporal neighbours to exclude for the
#' leave-K-out cross validation. Must be at least E + 1. Can be one of the
#' following:
#' 1. An integer, in which case that exclusion radius is used.
#' 2. NULL, which sets the exclusion radius of E + 1.
#' 3. "mi", which triggers automated optimisation of the exclusion radius up
#' to using the first minima of the lagged mutual information function ran
#' over a maximum lag approximately equal to 5% of the time series length.
#' 3. "acf", which triggers automated optimisation of the exclusion radius up
#' to using the first minima of the lagged autocorrelation information
#' function ran over a maximum lag approximately equal to 5% of the time
#' series length.
#'
#' If the embedding lag is high, the exclusion radius can be set to E + 1.
#' If the embedding lag is low, the exclusion radius should be higher, or,
#' even better, estimated using either "mi" or "acf".
#' @param random.libs Whether or not to sample random library (training) sets.
#' Defaults to TRUE.
#' @param with.replacement Should samples be drawn with replacement? Defaults
#' to TRUE.
#' @param convergence.test Should a convergence test be performed? Analyses
#' where CCM does not convergence are nonsensical, so this option defaults to
#' TRUE.
#' @param n.libsizes.to.check How many library sizes to use for the convergence
#' check? Defaults to 30.
#' @param regression.convergence.plots Display regression plots for the
#' convergence test? Defaults to FALSE.
#' @param n.surrogates Should a surrogate test also be performed? If so,
#' 'n.surrogates' sets the number of surrogate time series to use. By default,
#' no surrogate test is performed (n.surrogates = 0).
#' @param samples.surrogates The number of surrogate series in each null
#' ensemble.
#' @param always.run.surrogates Should surrogate analyses be performed even if
#' the convergence test fails? Defaults to FALSE (there is, usually, no reason
#' to perform significance testing if the analysis is not causal to begin with).
#' @param surrogate.column Which column to use to generate surrogates. Defaults
#' to the value of 'target.column' (the presumed driver).
#' @param surrogate.methods Vector of types of surrogate time series to
#' generate. Will vary depending on what null hypothesis is being tested.
#' Defaults to AAFT surrogates.
#' @param num.neighbours The number of nearest neighbours to use in predictions.
#' Defaults to NULL, in which case it is set to E + 1 for each analysis (this
#' number will vary depending on the particular set of embedding parameters
#' used for that analysis).
#' @param RNGseed For reproducivility. Seed to use for the random number
#' generator.
#' @param parallel Activate parallellisation? Defaults to true. Currently, this
#' only works decently on Mac and Linux systems.
#' @param epsilon Exlude neighbours if the are within a distance of 'epsilon'
#' from the predictee.
#' @param silent Suppress warnings?
#' @param parallelize.on.each.lag Should parallellisation be done on the outer
#' lag loop? Defaults to TRUE. Otherwise, parallellisation is done over the
#' surrogate analyses.
#' @param num.cores The number of CPU cores to use for parallelisation. Defaults
#' to one core less than what is available.
#' @param time.series.length.threshold Display a warning if the time series
#' length drops below this threshold.
#' @param time.unit The time unit of the raw time series.
#' @param time.bin.size The temporal resolution of the raw time series (given in
#' the units indicated by 'time.unit').
#' @param print.to.console Display progress?
#' @param time.run Time the run?
#' @param plot.simplex.projection Plot simplex projection output?
#' @param ... Additional arguments to pass to lower-level functions.
#'
#' @export
ccm_lagged <- function(data,
lags = 0,
Es = NULL,
taus = NULL,
library.sizes = c(as.integer(nrow(data) / 2)),
lib = c(1, dim(data)[1]),
pred = lib,
samples.original = 100,
samples.surrogates = 50,
n.surrogates = 0,
surrogate.methods = c("aaft"),
time.unit = NULL,
time.bin.size = NULL,
num.neighbours = NULL,
random.libs = TRUE,
with.replacement = TRUE,
exclusion.radius = NULL,
epsilon = NULL,
RNGseed = 1111,
silent = TRUE,
time.run = F,
print.to.console = T,
time.series.length.threshold = 100,
library.column = 1,
target.column = 2,
surrogate.column = target.column,
convergence.test = TRUE,
parallel = TRUE,
parallelize.on.each.lag = F,
num.cores = parallel::detectCores() - 1,
regression.convergence.plots = F,
always.run.surrogates = F,
n.libsizes.to.check = 20,
optimise.FNNdim = T,
optimise.boxcountdim = T,
which.optimdim.test = "all",
min.E = 2,
max.E = 10,
max.tau = 10,
plot.simplex.projection = F,
...) {
# Save optimisation status.
if (is.null(Es)) {
dimoptimisation <- ifelse(is.null(Es), T, F)
assertthat::assert_that(
which.optimdim.test %in% c("FNN", "boxcount", "simplex", "all"),
msg = paste("which.optimdim.test argument",
which.optimdim.test, "is not valid."))
} else {
dimoptimisation <- F
}
if (is.null(taus)) {
lagoptimisation <- "default"
} else if (all(taus == "mi" | taus == "acf")) {
lagoptimisation <- taus
} else if (is.numeric(taus)) {
lagoptimisation <- "none"
} else {
if (class(taus) == "character") {
assertthat::assert_that(taus %in% c("mi", "acf"),
msg = paste("taus argument",
taus, "is not valid."))
} else {
lagoptimisation <- "none"
}
}
if (is.null(exclusion.radius)) {
exclusionradiusoptim <- "default"
} else if (exclusion.radius %in% c("mi", "acf")) {
exclusionradiusoptim <- exclusion.radius
} else {
exclusionradiusoptim <- "none"
}
# Warn if the number of library.sizes is too low. ----
if (n.libsizes.to.check < 20) {
# We need a reasonable amount if point to do a proper exponential
# function fit, so default to 20 if the provided number is less.
warning("Minimum number of libsizes to check is too low. Default to 20.")
n.libsizes.to.check <- 20
}
# If no embedding parameters are provided, find the best embedding parameters.
# Estimating the embedding lag ----
# The idea behind embedding lag optimisation is that points in the time
# series might be temporally correlated. Using small embedding lags may in
# such cases yield embeddings that are very stretched out in some directions
# compared to others. To get embeddings that are less streched, use a higher
# embedding lag.
#
# The disadvantage of using a high embedding lag is that many data points are
# left out (i.e. information is lost). For applications where the number of
# data points is low, it makes sense not to make the embedding lag too big
# (loosing precious data points). Therefore, if no lag is provided, the
# embedding lag defaults to 1.
# We use the putative driver time series to estimate best embedding lag.
if (is.numeric(taus)) {
taus <- taus
} else if (is.null(taus)) {
taus <- 1
} else if (taus == "mi") {
# Estimate the first minima of the lagged mutual information function for
# the putative driver time series.
v <- data[, target.column]
taus <- first_mi_minima(
v = v,
lag.max = min(max.tau, ceiling(length(v) * 0.05))
)
} else if (taus == "acf") {
# Estimate the first minima of the lagged autocorrelation function
# for the putative driver time series.
v <- data[, target.column]
taus <- first_mi_minima(
v = v,
lag.max = min(max.tau, ceiling(length(v) * 0.05))
)
}
# Estimate embedding dimension ----
if (is.null(Es)) {
Es <- vector(length = length(taus))
for (i in 1:length(taus)) {
optimal.embed.dim <- optimise_embedding_dim(
v = data[, target.column],
min.embedding.dim = min.E,
max.embedding.dim = max.E,
embedding.lag = taus[i],
optimise.FNNdim = optimise.FNNdim,
optimise.boxcountdim = optimise.boxcountdim,
plot.simplex.projection = plot.simplex.projection,
return.all = T
)
# Select the embedding dimension.
if (which.optimdim.test == "FNN") {
Es[i] <- optimal.embed.dim["FNN.criterion"]
} else if (which.optimdim.test == "boxcount") {
Es[i] <- optimal.embed.dim["boxcount.criterion"]
} else if (which.optimdim.test == "simplex") {
Es[i] <- optimal.embed.dim["simplex.projection.optimisation"]
} else if (which.optimdim.test == "all") {
Es[i] <- max(optimal.embed.dim, na.rm = T)
}
}
}
# Perform lagged CCM for each combination of the parameters ----
# Generate all possible combinations of the input parameters
params <- expand.grid(Es, taus, surrogate.methods)
colnames(params) <- c("E", "tau", "surrogate.method")
results <- list()
for (i in 1:nrow(params)) {
tau <- params[i, "tau"]
E <- params[i, "E"]
surrogate.method <- as.character(params[i, "surrogate.method"])
# If exclusion radius or number of neighbours is explicitly provided,
# use the provided values. Otherwise, use the first minimum of the
# mutual information or autocorrelation function with a maximum lag
# of 5% of the number of available data points. If this number is lower
# than E + 1, use E + 1.
if (is.numeric(exclusion.radius)) {
exclusion.radius <- exclusion.radius
} else if (is.null(exclusion.radius)) {
exclusion.radius <- E + 1
} else if (exclusion.radius == "mi") {
# Estimate the first minima of the lagged mutual information function for
# the putative driver time series.
v <- data[, target.column]
exclusion.radius <- first_mi_minima(v = v,
lag.max = ceiling(length(v) * 0.05))
} else if (exclusion.radius == "acf") {
# Estimate the first minima of the lagged autocorrelation function
# for the putative driver time series.
v <- data[, target.column]
exclusion.radius <- first_mi_minima(v = v,
lag.max = ceiling(length(v) * 0.05))
}
# Use E + 1 nearest neighbours unless otherwise specified. If the provided
# number of neighbours is too low, also use E + 1.
if (is.null(num.neighbours)) {
num.neighbours <- E + 1
} else if (num.neighbours < E + 1) {
warning(paste("num.neighbours (", num.neighbours, ") is too low.",
"Defaulting to E + 1 neighbours."))
num.neighbours <- E + 1
}
# Perform CCM for the given set of parameters
ccm <- ccm_lagged_oneway(
lags = lags,
data = data,
E = E,
tau = tau,
library.sizes = library.sizes,
lib = lib,
pred = lib,
samples.original = samples.original,
samples.surrogates = samples.surrogates,
n.surrogates = n.surrogates,
surrogate.method = surrogate.method,
time.unit = time.unit,
time.bin.size = time.bin.size,
num.neighbours = num.neighbours,
random.libs = random.libs,
with.replacement = with.replacement,
exclusion.radius = exclusion.radius,
epsilon = epsilon,
RNGseed = RNGseed,
silent = silent,
parallel = parallel,
parallelize.on.each.lag = parallelize.on.each.lag,
time.run = time.run,
print.to.console = print.to.console,
time.series.length.threshold = time.series.length.threshold,
library.column = library.column,
target.column = target.column,
surrogate.column = surrogate.column,
convergence.test = convergence.test,
n.libsizes.to.check = n.libsizes.to.check,
regression.convergence.plots = regression.convergence.plots,
always.run.surrogates = always.run.surrogates,
max.E = max.E,
max.tau = max.tau
)
# Add information about the parameters that are not recorded in deeper
# CCM functions.
ccm$max.E <- rep(max.E)
ccm$max.tau <- rep(max.tau)
ccm$n.libsizes.to.check <- rep(n.libsizes.to.check)
# Information about optimisation
ccm$which.exclusionradius.test <- rep(exclusionradiusoptim)
ccm$which.optimlag.test <- rep(lagoptimisation)
ccm$which.optimdim.test <- rep(ifelse(test = dimoptimisation,
yes = which.optimdim.test,
no = "none"))
ccm$optimise.simplex <- rep(ifelse(test = dimoptimisation,
yes = T, no = F))
ccm$optimise.FNNdim <- rep(ifelse(test = dimoptimisation,
yes = optimise.FNNdim, no = F))
ccm$optimise.boxcountdim <- rep(ifelse(test = dimoptimisation,
yes = optimise.boxcountdim, no = F))
# Store the result.
results[[i]] <- ccm
}
# Bind all results together.
results <- data.table::rbindlist(results)
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.