# R/ccm-convergence-checks.R In kahaaga/tstools: Useful functions

#### Documented in ExponentalRegression2get_convergence_parameters

```#' Performs an exponential regression on the form p = p_max - p0*exp(-c*(L-L0))
#' The same as in van Nes et al. (2016). In this paper, we use it with data
#' where where p is the CCM predictive skill and L is the library size.
#'
#' @param data A data frame containing two columns.
ExponentalRegression2 <- function(data) {
exp.model <- stats::nls(data = data,
formula = rho ~ a * L / (b + L),
start = list(a = 0.1, b = 0.1))
return(exp.model)
}

#' Computes various parameters that can be used to determine if
#' cross-map skill increases with increasing library size.
#'
#' @param ccm.result A data frame
#' @param confidence.level The confidence level. Defaults to 0.99.
#' @param plot Plot the convergence analysis?
#' @importFrom magrittr "%>%"
#' @importFrom stats "coef" "nls" "qnorm"
get_convergence_parameters <- function(ccm.result,
confidence.level = 0.99,
plot = F) {

####################
# Set up for testing.
####################

# Select the relevant data columns
df <- ccm.result[, c("lib_size", "rho")]

# Extract the library sizes to use for regressions
library.sizes <- unique(df\$lib_size)

# Create a named vector to store the various test results
coeffs <- c(NA, NA, NA, NA,
NA, NA, NA, NA)
names(coeffs) <- c("k", "a", "b", "highlow.difference",
"p.value", "alpha", "confidence.level", "convergent")

if (is.null(ccm.result)) return(coeffs)

# Test 1:
# Check whether cross-map skill is higher at larger
# library sizes than at lower library sizes. Runs
# a smoother over the data before comparing the
# very largest and smallest library sizes.
grouping.variable <- rlang::sym("lib_size")

medians <- dplyr::group_by(df, !!grouping.variable) %>%
dplyr::summarise(median.rho <- stats::median(rho, na.rm = T)) %>%
as.data.frame()

# Smooth the data using a running mean
medians\$median.rho <- zoo::rollmean(x = medians\$median.rho,
k = 3, #floor(length(library.sizes)) / 5,
align = "right")
medians <- medians[stats::complete.cases(medians), ]

# Compute difference between high and low library sizes
indices.low <- 1:5
indices.high <- (nrow(medians) - 5):nrow(medians)

# Nonparametric hypothesis test to check whether ccm skill at largest
# library sizes is significantly higher than at the smallest library
# sizes. The difference in ccm skill must be larger than one standard
# deviation of the ccm skill at the largest library sizes (at lower
# library sizes, standard deviations are usually very high and thus
# not giving useful information). We therefore prefer to use the spread
# statistic for the highest library sizes.
wilcox <- stats::wilcox.test(x = medians[indices.high, "median.rho"],
y = medians[indices.low, "median.rho"],
alternative = "greater",
mu = stats::sd(medians[indices.high, "median.rho"]),
conf.level = confidence.level,
exact = FALSE)

coeffs["highlow.difference"] <- mean(medians[indices.high, "median.rho"]) -
mean(medians[indices.low, "median.rho"])
coeffs["p.value"] <- wilcox\$p.value
coeffs["alpha"] <- 1 - confidence.level

coeffs["confidence.level"] <- confidence.level
coeffs["convergent"] <- ifelse(test = wilcox\$p.value < 1 - confidence.level,
yes = TRUE,
no = FALSE)

# Test 2:
# Fit an exponential regression model and determine convergence
# by the estimated model parameter.
colnames(df) <- c("L", "rho")
df\$rho <- df\$rho
start.index <- which(df\$rho > 0)[1]

if (is.na(start.index)) {
slope <- NA
} else {
df <- df[start.index:nrow(df), ]
df\$rho0 <- df[1, "rho"]
df\$rho.max <- rep(stats::quantile(df\$rho, probs = 0.95, na.rm = T))
df\$L0 <- df[1, "L"]

# Logaritmic dataframe representation of the exponential expression,
# in case some values are negative.
dt <- do.call("cbind", list(log(df\$rho0),
log(df\$rho.max - df\$rho),
(df\$L - df\$L0))
)
dt <- as.data.frame(dt)

dt\$L <- df\$L
dt <- as.data.frame(dt <- dt[!is.infinite(rowSums(dt)), ])
dt\$logrhos <- dt[, 1] - dt[, 2]
dt\$L <- dt[, 3]

if (all(!is.finite(dt\$logrhos))) {
slope <- 0
} else {
slope <- stats::lm(dt\$logrhos ~ dt\$L)\$coefficients[2]
f2 <- suppressWarnings(rho ~ rho.max - rho0 * exp((-k) * (L - L0)))

exp.model2 <- try(stats::nls(data = df,
formula = f2,
start = list(k = .1)),
silent = T)

if (!class(exp.model2) == "try-error") {
L <- seq(min(library.sizes), max(library.sizes), 1)

predicted.rho.model2 <- stats::predict(exp.model2, list(L = L))
pred2 <- as.data.frame(cbind(L, predicted.rho.model2))
}
}

coeffs[1] <- slope

# Test 3:
# Fit a simple slowly converging model and determine convergence
# by the estimated model parameters.
f1 <- (rho ~ a * L / (b + L))

exp.model1 <- try(nls(data = df,
formula = f1,
start = list(a = 0.1, b = 0.1)),
silent = T)

if (!class(exp.model1) == "try-error") {
coeffs["a"] <- coef(exp.model1)["a"]
coeffs["b"] <- coef(exp.model1)["b"]

# Define training points (library sizes L) to predict rho for.
L <- seq(min(library.sizes), max(library.sizes), 1)
predicted.rho.model1 <- stats::predict(exp.model1, list(L = L))
pred1 <- cbind(L, predicted.rho.model1)
pred1 <- as.data.frame(pred1)

} else if (class(exp.model1) == "try-error") {
#warning("Exponential model could not be fitted")
}
}
if (plot) {
p <- ggplot2::ggplot() +
ggplot2::geom_boxplot(
data = reshape2::melt(df, id.vars = "L", measure.vars = "rho"),
mapping = ggplot2::aes(x = L, y = value, group = L),
alpha = 0.8, fill = "blue", col = "black", outlier.alpha = 0.05) +
ggplot2::geom_line(
data = pred1,
mapping = ggplot2::aes(x = L, y = predicted.rho.model1,
col = "Slowly converging model")) +
ggplot2::geom_line(
data = pred2,
mapping = ggplot2::aes(x = L, y = predicted.rho.model2,
col = "Exponential model"), size = 1) +

ggplot2::scale_y_continuous(limits = c(0, 0.7)) +
ggplot2::theme_bw() +
ggplot2::theme(panel.grid = ggplot2::element_blank(),
legend.title = ggplot2::element_blank()) +
ggplot2::xlab("Library size (L)") +
ggplot2::ylab("CCM skill (rho)")
print(p)
}

return(coeffs)
}
```
kahaaga/tstools documentation built on Nov. 30, 2017, 7:24 a.m.