Nothing
#' Extracts ecological memory features from the output of \code{\link{computeMemory}}.
#'
#' @description Computes the following features of the ecological memory patterns returned by \code{\link{computeMemory}}:
#' \itemize{
#' \item \code{memory strength} maximum difference in relative importance between each component (endogenous, exogenous, and concurrent) and the median of the random component. This is computed for exogenous, endogenous, and concurrent effect.
#' \item \code{memory length} proportion of lags over which the importance of a memory component is above the median of the random component. This is only computed for endogenous and exogenous memory.
#' \item \code{dominance} proportion of the lags above the median of the random term over which a memory component has a higher importance than the other component. This is only computed for endogenous and exogenous memory.
#' }
#'
#'
#' @param memory.pattern either a list resulting from \code{\link{computeMemory}}, or a dataframe with memory patterns of several taxa generated by \code{\link{experimentToTable}}. When using output from \code{\link{experimentToTable}}, filter to a specific sampling resolution before calling this function (e.g., \code{data[data$sampling == 25, ]}). Default: \code{NULL}.
#' @param exogenous.component character string or character vector,
#' name of the variable or variables defining the exogenous component.
#' When \code{memory.pattern} is output from \code{\link{computeMemory}},
#' this is automatically extracted from the \code{$drivers} slot if not provided.
#' Required when input is from \code{\link{experimentToTable}}. Default: \code{NULL}.
#' @param endogenous.component character string, name of the variable defining
#' the endogenous component.
#' When \code{memory.pattern} is output from \code{\link{computeMemory}},
#' this is automatically extracted from the \code{$response} slot if not provided.
#' Required when input is from \code{\link{experimentToTable}}. Default: \code{NULL}.
#' @param scale.strength boolean. If \code{TRUE}, the strength of the ecological memory components, which has the same units as the importance scores yielded by random Forest (percentage of increment in mean squared error when a variable is permuted), is scaled between 0 and 1. Default: \code{TRUE}.
#'
#' @details \strong{Warning:} this function only works when only one exogenous component (driver) is used to define the model in \code{\link{computeMemory}}. If more than one driver is provided through the argument \code{exogenous.component}, the maximum importance scores of all exogenous variables is considered. In other words, the importance of exogenous variables is not additive.
#'
#' @author Blas M. Benito <blasbenito@gmail.com>
#'
#' @return A dataframe with 8 columns and 1 row if \code{memory.pattern} is the output of \code{\link{computeMemory}} and 13 columns and as many rows as taxa are in the input if it is the output of \code{\link{experimentToTable}}. The columns are:
#'
#' \itemize{
#' \item \emph{label} character string to identify the taxon. It either inherits its values from \code{\link{experimentToTable}}, or sets the default ID as "1".
#' \item \emph{strength.endogenous} numeric, difference between the maximum importance of the endogenous component at any lag and the median of the random component (see details in \code{\link{computeMemory}}). When \code{scale.strength = TRUE} (default), values are scaled to [0, 1]; otherwise values are in importance units (percentage of increment in MSE).
#' \item \emph{strength.exogenous} numeric, same as above, but for the exogenous component.
#' \item \emph{strength.concurrent} numeric, same as above, but for the concurrent component (driver at lag 0).
#' \item \emph{length.endogenous} numeric in the range [0, 1], proportion of lags over which the importance of the endogenous memory component is above the median of the random component.
#' \item \emph{length.exogenous} numeric in the range [0, 1], same as above but for the exogenous memory component.
#' \item \emph{dominance.endogenous} numeric in the range [0, 1], proportion of the lags above the median of the random term over which a the endogenous memory component has a higher importance than the exogenous component.
#' \item \emph{dominance.exogenous}, opposite as above.
#' \item \emph{maximum.age}, numeric. As every column after this one, only provided if \code{memory.pattern} is the output of \code{\link{experimentToTable}}. Trait of the given taxon.
#' \item \emph{fecundity} numeric, trait of the given taxon.
#' \item \emph{niche.mean} numeric, trait of the given taxon.
#' \item \emph{niche.sd} numeric, trait of the given taxon.
#' }
#'
#' @seealso \code{\link{computeMemory}}
#'
#' @examples
#'
#' # Loading example data (output of computeMemory)
#' data(palaeodataMemory)
#'
#' # Simplified call - components auto-detected from computeMemory output
#' memory.features <- extractMemoryFeatures(
#' memory.pattern = palaeodataMemory
#' )
#'
#' # Explicit call - still supported for backwards compatibility
#' memory.features <- extractMemoryFeatures(
#' memory.pattern = palaeodataMemory,
#' exogenous.component = c(
#' "climate.temperatureAverage",
#' "climate.rainfallAverage"
#' ),
#' endogenous.component = "pollen.pinus"
#' )
#' @family memoria
#' @export
extractMemoryFeatures <- function(
memory.pattern = NULL,
exogenous.component = NULL,
endogenous.component = NULL,
scale.strength = TRUE
) {
#entry point
x <- memory.pattern
#checking if it is output of computeMemory or experimentToTable
#if x$memory does not exist, it is an output of experimentToTable
if (is.null(x$memory)) {
#factors to character
x$label <- as.character(x$label)
x$variable <- as.character(x$variable)
#identifying available groups
taxa <- unique(x$label)
#memory object switch to false
is.computeMemory.object <- FALSE
} else {
#it is a list containing results of a single taxa.
is.computeMemory.object <- TRUE
taxa <- 1
# Auto-extract component names from computeMemory output if not provided
if (is.null(endogenous.component) && !is.null(x$response)) {
endogenous.component <- x$response
}
if (is.null(exogenous.component) && !is.null(x$drivers)) {
exogenous.component <- x$drivers
}
}
# Validate required arguments
if (is.null(endogenous.component)) {
stop(
"Argument 'endogenous.component' is required. ",
"Provide it explicitly or ensure memory.pattern is output from computeMemory()."
)
}
if (is.null(exogenous.component)) {
stop(
"Argument 'exogenous.component' is required. ",
"Provide it explicitly or ensure memory.pattern is output from computeMemory()."
)
}
# Validate component names exist in the data (for experimentToTable input)
if (!is.computeMemory.object) {
available_vars <- unique(x$variable)
available_vars <- available_vars[available_vars != "random"]
# Check endogenous component
if (!endogenous.component %in% available_vars) {
stop(
"Endogenous component '",
endogenous.component,
"' not found in data. Available variables: ",
paste(available_vars, collapse = ", "),
"."
)
}
# Check exogenous components
missing_exo <- exogenous.component[!exogenous.component %in% available_vars]
if (length(missing_exo) > 0) {
stop(
"Exogenous component(s) not found in data: ",
paste(missing_exo, collapse = ", "),
". Available variables: ",
paste(available_vars, collapse = ", "),
"."
)
}
}
#dataframe to store results
nas <- rep(NA, (length(taxa)))
output.df <- data.frame(
label = nas,
strength.endogenous = nas,
strength.exogenous = nas,
strength.concurrent = nas,
length.endogenous = nas,
length.exogenous = nas,
dominance.endogenous = nas,
dominance.exogenous = nas,
maximum.age = nas,
fecundity = nas,
niche.mean = nas,
niche.sd = nas,
stringsAsFactors = FALSE
)
#row counter
row.counter <- 0
#iterating through taxa
for (taxon in taxa) {
#+1 to the row counter
row.counter <- row.counter + 1
#subsetting the taxon
if (!is.computeMemory.object) {
x.temp <- x[x$label == taxon, ]
} else {
x.temp <- x$memory
}
#random median
random.median <- round(
x.temp[x.temp$variable == "random", "median"][1],
2
)
#number of lags
lags <- unique(x.temp$lag)
lags <- lags[lags != 0]
# Validate that there are lags to analyze
if (length(lags) == 0) {
stop(
"No lags found beyond lag 0. ",
"Ecological memory analysis requires at least one non-zero lag."
)
}
#computing memory strength (difference betweenn component and median of the random term)
# strength.concurrent <- x.temp[x.temp$variable == exogenous.component & x.temp$lag == 0, "median"] - random.median
# x.temp <- x.temp[x.temp$lag!=0,] #removing lag 0
# strength.endogenous <- max(x.temp[x.temp$variable == endogenous.component, "median"]) - random.median
# strength.exogenous <- max(x.temp[x.temp$variable == exogenous.component, "median"]) - random.median
strength.concurrent <- max(x.temp[
x.temp$variable %in% exogenous.component & x.temp$lag == 0,
"median"
]) -
random.median
x.temp <- x.temp[x.temp$lag != 0, ] #removing lag 0
strength.endogenous <- max(x.temp[
x.temp$variable %in% endogenous.component,
"median"
]) -
random.median
strength.exogenous <- max(x.temp[
x.temp$variable %in% exogenous.component,
"median"
]) -
random.median
#to 0 if negative
if (strength.endogenous < 0) {
strength.endogenous <- 0
}
if (strength.exogenous < 0) {
strength.exogenous <- 0
}
#computing memory length: number of lags above the median of the random component
length.endogenous <- sum(
x.temp[x.temp$variable == endogenous.component, "median"] > random.median
) /
length(lags)
#getting medians of exogenous components
max.exogenous <- x.temp[
x.temp$variable == exogenous.component[1],
"median"
]
if (length(exogenous.component) > 1) {
for (j in exogenous.component[2:length(exogenous.component)]) {
max.exogenous <- pmax(
max.exogenous,
x.temp[x.temp$variable == j, "median"]
)
}
}
length.exogenous <- sum(max.exogenous > random.median) / length(lags)
#computing memory dominance lags of one component above other component
endogenous <- x.temp[x.temp$variable == endogenous.component, "median"]
exogenous <- max.exogenous
#values below random.median to zero
endogenous[endogenous < random.median] <- 0
exogenous[exogenous < random.median] <- 0
#values
dominance.endogenous <- sum(endogenous > exogenous) / length(lags)
dominance.exogenous <- sum(exogenous > endogenous) / length(lags)
#params
if (!is.computeMemory.object) {
maximum.age <- x.temp$maximum.age[1]
fecundity <- x.temp$fecundity[1]
niche.mean <- x.temp$niche.A.mean[1]
niche.sd <- x.temp$niche.A.sd[1]
} else {
maximum.age <- fecundity <- niche.mean <- niche.sd <- NA
}
#filling dataframe (use direct column assignment to avoid type coercion)
output.df$label[row.counter] <- taxon
output.df$strength.endogenous[row.counter] <- strength.endogenous
output.df$strength.exogenous[row.counter] <- strength.exogenous
output.df$strength.concurrent[row.counter] <- strength.concurrent
output.df$length.endogenous[row.counter] <- length.endogenous
output.df$length.exogenous[row.counter] <- length.exogenous
output.df$dominance.endogenous[row.counter] <- dominance.endogenous
output.df$dominance.exogenous[row.counter] <- dominance.exogenous
output.df$maximum.age[row.counter] <- maximum.age
output.df$fecundity[row.counter] <- fecundity
output.df$niche.mean[row.counter] <- niche.mean
output.df$niche.sd[row.counter] <- niche.sd
} #end of iteration through taxa
#to numeric
output.df[, 2:(ncol(output.df) - 1)] <- sapply(
output.df[, 2:(ncol(output.df) - 1)],
as.numeric
)
#removing trait columns if input is not a memory object
if (is.computeMemory.object) {
output.df <- output.df[, 1:8]
}
#rescaling strength components
if (length(taxa) > 1) {
if (scale.strength) {
output.df$strength.concurrent <- output.df$strength.concurrent /
max(output.df$strength.concurrent)
output.df$strength.exogenous <- output.df$strength.exogenous /
max(output.df$strength.exogenous)
output.df$strength.endogenous <- output.df$strength.endogenous /
max(output.df$strength.endogenous)
}
}
if (nrow(output.df) == 1) {
if (is.null(memory.pattern$response)) {
output.df$label <- NULL
}
output.df$label <- memory.pattern$response
}
return(output.df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.