#' Perform convergent cross mapping using simplex projection
#'
#' \code{\link{ccm}} uses time delay embedding on one time series to generate an
#' attractor reconstruction, and then applies the simplex projection algorithm
#' to estimate concurrent values of another time series. This method is
#' typically applied, varying the library sizes, to determine if one time series
#' contains the necessary dynamic information to recover the influence of
#' another, causal variable.
#'
#' The default parameters are set so that passing a matrix as the only argument
#' will use E = 1 (embedding dimension), and leave-one-out cross-validation over
#' the whole time series to compute cross-mapping from the first column to the
#' second column, letting the library size vary from 10 to 100 in increments of
#' 10.
#'
#' \code{norm = 2} (default) uses the "L2 norm", Euclidean distance:
#' \deqn{distance(a,b) := \sqrt{\sum_i{(a_i - b_i)^2}}
#' }{distance(a, b) := \sqrt(\sum(a_i - b_i)^2)}
#' \code{norm = 1} uses the "L1 norm", Manhattan distance:
#' \deqn{distance(a,b) := \sum_i{|a_i - b_i|}
#' }{distance(a, b) := \sum|a_i - b_i|}
#' Other values generalize the L1 and L2 norm to use the given argument as the
#' exponent, P, as:
#' \deqn{distance(a,b) := \sum_i{(a_i - b_i)^P}^{1/P}
#' }{distance(a, b) := (\sum(a_i - b_i)^P)^(1/P)}
#'
#' @inheritParams block_lnlp
#' @inheritParams simplex
#' @param lib_sizes the vector of library sizes to try
#' @param random_libs indicates whether to use randomly sampled libs
#' @param num_samples is the number of random samples at each lib size (this
#' parameter is ignored if random_libs is FALSE)
#' @param replace indicates whether to sample vectors with replacement
#' @param lib_column the index (or name) of the column to cross map from
#' @param RNGseed will set a seed for the random number generator, enabling
#' reproducible runs of ccm with randomly generated libraries
#' @return A data.frame with forecast statistics for the different parameter
#' settings:
#' \tabular{ll}{
#' \code{L} \tab library length (number of vectors)\cr
#' \code{num_pred} \tab number of predictions\cr
#' \code{rho} \tab correlation coefficient between observations and predictions\cr
#' \code{mae} \tab mean absolute error\cr
#' \code{rmse} \tab root mean square error
#' }
#' @examples
#' data("sardine_anchovy_sst")
#' anchovy_xmap_sst <- ccm(sardine_anchovy_sst, E = 3,
#' lib_column = "anchovy", target_column = "np_sst",
#' lib_sizes = seq(10, 80, by = 10), num_samples = 100)
#'
ccm <- function(block, lib = c(1, NROW(block)), pred = lib,
norm = 2, E = 1,
tau = 1, tp = 0, num_neighbors = "e+1",
lib_sizes = seq(10, 100, by = 10), random_libs = TRUE,
num_samples = 100, replace = TRUE, lib_column = 1,
target_column = 2, first_column_time = FALSE, RNGseed = NULL,
exclusion_radius = NULL, epsilon = NULL,
stats_only = TRUE, silent = FALSE)
{
# make new model object
model <- new(Xmap)
# setup data
dat <- setup_time_and_block(block, first_column_time)
time <- dat$time
block <- dat$block
model$set_time(time)
model$set_block(block)
my_lib_column <- convert_to_column_indices(lib_column, block,
silent = silent)
model$set_lib_column(my_lib_column)
my_target_column <- convert_to_column_indices(target_column, block,
silent = silent)
model$set_target_column(my_target_column)
# setup norm type
model$set_norm(norm)
# setup lib and pred ranges
lib <- coerce_lib(lib, silent = silent)
pred <- coerce_lib(pred, silent = silent)
model$set_lib(lib)
model$set_pred(pred)
# check lib_sizes
prev_num_lib_sizes <- length(lib_sizes)
lib_sizes <- lib_sizes[lib_sizes >= 0]
lib_sizes <- unique(sort(lib_sizes))
if (length(lib_sizes) < 1)
stop("No valid lib sizes found among input", lib_sizes)
if (length(lib_sizes) < prev_num_lib_sizes)
rEDM_warning("Some requested lib sizes were redundant or bad and ignored.",
silent = silent)
model$set_lib_sizes(lib_sizes)
# handle exclusion radius
if (is.null(exclusion_radius))
{
exclusion_radius <- -1
}
model$set_exclusion_radius(exclusion_radius)
# TODO: handle epsilon
# handle silent flag
if (silent)
{
model$suppress_warnings()
}
rEDM_warning("Note: CCM results are typically interpreted in the opposite ",
"direction of causation. Please see 'Detecting causality in ",
"complex ecosystems' (Sugihara et al. 2012) for more details.",
silent = silent)
params <- data.frame(E, tau, tp, nn = num_neighbors, lib_column, target_column)
e_plus_1_index <- match(num_neighbors, c("e+1", "E+1", "e + 1", "E + 1"))
if (any(e_plus_1_index, na.rm = TRUE))
params$nn <- params$E + 1
params$nn <- as.numeric(params$nn)
if (!check_params_against_lib(params$E, params$tau, params$tp, lib,
silent = silent))
{
stop("Parameter combination was invalid, stopping.")
}
model$set_params(params$E, params$tau, params$tp, params$nn,
random_libs, num_samples, replace)
if (!is.null(RNGseed))
set.seed(RNGseed)
if (!stats_only)
model$enable_model_output()
model$run()
if (silent)
{
suppressWarnings( stats <- model$get_stats() )
} else {
stats <- model$get_stats()
}
out <- cbind(params, stats, row.names = NULL)
if (!stats_only)
{
out$model_output <- model$get_output()
}
return(out)
}
#' Take output from ccm and compute means as a function of library size.
#'
#' \code{\link{ccm_means}} is a utility function to summarize output from the
#' \code{\link{ccm}} function. If there is a `model_output` column (e.g. if
#' `ccm()` was run with `stats_only = FALSE`), then that column is dropped
#' before summaries are computed.
#'
#' @param ccm_df a data.frame, usually output from the \code{\link{ccm}}
#' function
#' @param FUN a function that aggregates the numerical statistics (by default,
#' uses the mean)
#' @param ... optional arguments to FUN
#' @return A data.frame with forecast statistics aggregated at each unique
#' library size
#' @examples
#' data("sardine_anchovy_sst")
#' anchovy_xmap_sst <- ccm(sardine_anchovy_sst, E = 3,
#' lib_column = "anchovy", target_column = "np_sst",
#' lib_sizes = seq(10, 80, by = 10), num_samples = 100)
#' a_xmap_t_means <- ccm_means(anchovy_xmap_sst)
#'
ccm_means <- function(ccm_df, FUN = mean, ...)
{
lib <- ccm_df$lib_column[!duplicated(ccm_df$lib_size)]
target <- ccm_df$target_column[!duplicated(ccm_df$lib_size)]
ccm_df$lib_column <- NULL
ccm_df$target_column <- NULL
ccm_df$model_output <- NULL
ccm_means <- aggregate(ccm_df, by = list(ccm_df$lib_size), FUN, ...)
col_idx <- which(names(ccm_means) == "lib_size")
ccm_means <- cbind(ccm_means[, 1:(col_idx - 1)],
lib_column = lib, target_column = target,
ccm_means[, col_idx:NCOL(ccm_means)])
return(ccm_means[, -1]) # drop Group.1 column
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.