## =============================================================================
#' Title
#'
#' @param x
#' @param n
#'
#' @return
#' @export
#'
#' @examples
moving_avg <- function(x, n = 5) {
out <- stats::filter(x, rep(1 / n, n), sides = 2)
as.numeric(out)
}
## =============================================================================
#' Title
#'
#' @param x
#' @param y
#' @param lag_max
#' @param lag_seq
#' @param method
#' @param int_fun
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
summarize_lagged_cors <- function(x, y, lag_max, lag_seq = 1,
method = c("discrete", "integrated"),
int_fun = mean, ...) {
method <- match.arg(method)
# Set vector of lags to be tested
lags <- seq(0, lag_max, by = lag_seq)
# Initialize output dataset using instantaneous correlation
out <- broom::tidy(cor.test(x, y))
# Discrete approach: y is correlated to x at the nth previous index
if (method == "discrete") {
for (i in 2:length(lags)) {
x_lag <- dplyr::lag(x, lags[i])
test <- cor.test(x_lag, y, ...) %>% broom::tidy()
out <- dplyr::bind_rows(out, test)
}
}
# Integrated appraoch: y is correlated to a summary of x from nth previous
# index to present
if (method == "integrated") {
for (i in seq_along(lags)) {
x_lag <- zoo::rollapplyr(x, width = lags[i], FUN = int_fun, fill = NA)
test <- cor.test(x_lag, y, ...) %>% broom::tidy()
out <- dplyr::bind_rows(out, test)
}
}
# Add lag index to data frame, return
out %>%
dplyr::mutate(lag = lags) %>%
dplyr::select(lag, dplyr::everything())
}
# THIS NEEDS HELP!!!!!
roll_nls <- function(data, a_frs, a_veg, a_wtr, norm = 0, frame = 1344,
screen = FALSE, method = c("gpp", "rref", "e0"),
rolltype = c("continuous", "window")) {
method <- match.arg(method)
rolltype <- match.arg(rolltype)
is.error <- function(x) inherits(x, "try-error")
# Map data onto a full year time vector
timesteps <- data.frame(
timestamp = create_timesteps(2018, shift_by = 30, tz = "Etc/GMT+5")
)
if (method == "gpp") {
data <- data %>% dplyr::mutate(flux = GPP, env = PPFD)
} else if (method %in% c("rref", "e0")) {
data <- data %>% dplyr::mutate(flux = Reco, env = Tair)
}
df <- data %>%
dplyr::left_join(timesteps, ., by = "timestamp") %>%
dplyr::mutate(
gnorm = seq(from = -norm, to = norm, length.out = nrow(timesteps)),
a1 = exp(-gnorm^2) * a_frs,
a2 = exp(-gnorm^2) * a_veg,
a3 = exp(-gnorm^2) * a_wtr
) %>%
dplyr::select(timestamp, flux, env, frs, veg, wtr, a1, a2, a3)
side <- frame / 2
start <- side + 1
n <- nrow(df)
end <- n - start
seq <- seq(start, end, by = 1)
if (rolltype == "window") {
# Reichstein et al. (2005) method
if (method == "e0") {
frame <- 720
side <- frame / 2
start <- side + 1
end <- nrow(timesteps) - start
seq <- seq(start, nrow(timesteps), by = 240)
n <- length(seq)
end <- seq[n]
} else if (method == "rref") {
frame <- 192
side <- frame / 2
start <- side + 1
end <- nrow(timesteps) - start
seq <- seq(start, nrow(timesteps), by = frame)
n <- length(seq)
end <- seq[n]
}
}
out <- data.frame(
timestamp = rep(NA, n),
g_frs = rep(NA, n), g_veg = rep(NA, n), g_wtr = rep(NA, n),
se_frs = rep(NA, n), se_veg = rep(NA, n), se_wtr = rep(NA, n),
p_frs = rep(NA, n), p_veg = rep(NA, n), p_wtr = rep(NA, n),
a_frs = rep(NA, n), a_veg = rep(NA, n), a_wtr = rep(NA, n),
fitted = rep(NA, n), resid = rep(NA, n),
rsq = rep(NA, n), rmse = rep(NA, n), mae = rep(NA, n)
)
if (method == "rref") {
out <- out %>%
dplyr::rename(
r_frs = g_frs, r_veg = g_veg, r_wtr = g_wtr,
e_frs = a_frs, e_veg = a_veg, e_wtr = a_wtr
)
} else if (method == "e0") {
out <- out %>%
dplyr::rename(
e_frs = g_frs, e_veg = g_veg, e_wtr = g_wtr,
r_frs = a_frs, r_veg = a_veg, r_wtr = a_wtr
)
}
pbar <- if (rolltype == "continuous") {
dplyr::progress_estimated(n - frame)
} else dplyr::progress_estimated(n)
for (i in seq_along(seq)) {
temp <- df[(seq[i] - side):(seq[i] + side), ]
# Pre-screening conditions
if (all(is.na(temp$flux)) | all(is.na(temp$veg)) | all(is.na(temp$env))) {
pbar$tick()$print()
next
} else if (method == "e0") {
range <- range(temp$env, na.rm = TRUE)
complete <- complete.cases(temp)
#if (range[2] - range[1] <= 5 | sum(complete, na.rm = TRUE) <= 6) {
# pbar$tick()$print()
# next
#}
}
# Run nonlinear regression
if (method == "gpp") {
mod <- try(nls(
flux ~
frs * ((g1 * -a_frs * env) / (a_frs * env + g1)) +
veg * ((g2 * -a_veg * env) / (a_veg * env + g2)) +
wtr * ((g3 * -a_wtr * env) / (a_wtr * env + g3)),
start = list(g1 = 1, g2 = 1, g3 = 1), data = temp,
control = nls.control(maxiter = 200)
), silent = TRUE)
} else if (method == "rref") {
mod <- try(nls(
flux ~
frs * r1 * exp(a_frs * (1 / (283.15 - 227.13) - 1 / (env - 227.13))) +
veg * r2 * exp(a_veg * (1 / (283.15 - 227.13) - 1 / (env - 227.13))) +
wtr * r3 * exp(a_wtr * (1 / (283.15 - 227.13) - 1 / (env - 227.13))),
start = list(r1 = 1, r2 = 1, r3 = 1), data = temp,
control = nls.control(maxiter = 200)
), silent = TRUE)
} else if (method == "e0") {
mod <- try(nls(
flux ~
frs * a_frs * exp(e1 * (1 / (283.15 - 227.13) - 1 / (env - 227.13))) +
veg * a_veg * exp(e2 * (1 / (283.15 - 227.13) - 1 / (env - 227.13))) +
wtr * a_wtr * exp(e3 * (1 / (283.15 - 227.13) - 1 / (env - 227.13))),
start = list(e1 = 100, e2 = 100, e3 = 100), data = temp,
control = nls.control(maxiter = 200)
), silent = TRUE)
}
# Gather model parameters if model is created without error
if (is.error(mod)) {
pbar$tick()$print()
next
} else {
coefs <- summary(mod)$coefficients
fitted <- fitted(mod)[start]
resid <- resid(mod)[start]
out[i, ] <- c(
NA,
coefs[1, 1], coefs[2, 1], coefs[3, 1],
coefs[1, 2], coefs[2, 2], coefs[3, 2],
coefs[1, 4], coefs[2, 4], coefs[3, 4],
df$a1[i], df$a2[i], df$a3[i],
fitted, resid, modelr::rsquare(mod, temp), modelr::rmse(mod, temp),
modelr::mae(mod, temp)
)
if (method == "rref" & rolltype == "window") {
out[i, "timestamp"] <- median(temp$timestamp[which(!is.na(temp$flux))])
} else {
out[i, "timestamp"] <- df$timestamp[seq[i]]
}
}
pbar$tick()$print()
}
out$timestamp <- lubridate::as_datetime(out$timestamp, tz = "Etc/GMT+5")
if (screen) {
out[, 4] <- dplyr::if_else(
out[, 4] > 0 & out[, 5] > 0 & out[, 6] > 0, out[, 4], NA_real_
)
out[, 5] <- dplyr::if_else(
out[, 4] > 0 & out[, 5] > 0 & out[, 6] > 0, out[, 5], NA_real_
)
out[, 6] <- dplyr::if_else(
out[, 4] > 0 & out[, 5] > 0 & out[, 6] > 0, out[, 6], NA_real_
)
}
out
}
## =============================================================================
#' Calculate Maximum Uncertainty in a Parameterized Equation
#'
#' @param formula
#' @param vals
#' @param uncs
#' @param unc_type
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
max_unc <- function(formula, vals, uncs, unc_type = c("corr", "uncorr"), ...) {
unc_type <- match.arg(unc_type)
vars <- all.vars(formula)
if (any(is.character(vals))) {
df <- as.data.frame(rlang::list2(...))
colnames(df) <- suppressWarnings(vals[which(is.na(as.numeric(vals)))])
constants <- vars[!vars %in% names(df)]
for (i in seq_along(constants)) {
val <- as.numeric(vals[which(vars == constants[i])])
df[, constants[i]] <- rep(val, nrow(df))
}
df <- df[vars]
} else {
df <- data.frame()
for (i in 1:length(vals)) {
df[1, i] <- vals[i]
}
colnames(df) <- vars
}
partials <- deriv(formula, vars)
evald <- as.data.frame(attr(eval(partials, df), "gradient"))
for (i in 1:length(evald)) {
evald[, i] <- abs(evald[, i]) * uncs[which(vars == names(evald)[i])]
if (unc_type == "uncorr") evald[, i] <- evald[, i]^2
}
if (unc_type == "corr") {
rowSums(evald)
} else if (unc_type == "uncorr") {
sqrt(rowSums(evald))
}
}
## =============================================================================
#' Title
#'
#' @param x
#' @param y
#' @param n
#' @param zmax
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
roll_corr <- function(x, y, n, zmax = 0.25 * n, ...) {
df <- data.frame(x, y)
df$z <- zoo::rollapply(
data[, c("tair", "fch4")], n,
function(x) length(x[!is.na(x[, 1]) & !is.na(x[, 2])]) / 2,
by.column = FALSE, fill = NA
)
df$x <- dplyr::if_else(df$z < zmax, NA_real_, df$x)
df$y <- dplyr::if_else(df$z < zmax, NA_real_, df$y)
df$z <- NULL
out <- zoo::rollapply(
df, n, function(x) cor(x[, 1], x[, 2], use = "pairwise.complete.obs"),
by.column = FALSE, fill = NA
)
out <- dplyr::if_else(out == 1, NA_real_, out)
out[which(!is.na(out))][1:4] <- NA
out
}
## =============================================================================
#' Title
#'
#' @param x
#' @param y
#' @param z
#' @param method
#'
#' @return
#' @export
#'
#' @examples
get_outliers <- function(x, y = NULL, z,
method = c("default", "mad", "doublemad", "iqr",
"spikes", "cooks", "mahalanobis", "aq",
"pcout")) {
method <- match.arg(method)
if (is.null(y)) {
if (method %in% c("default", "mad")) {
# default mad z = 7
if (missing(z)) z <- 7
min <- median(x, na.rm = TRUE) - mad(x, na.rm = TRUE) * z
max <- median(x, na.rm = TRUE) + mad(x, na.rm = TRUE) * z
!dplyr::between(x, min, max)
} else if (method == "doublemad") {
# default mad z = 7
if (missing(z)) z <- 7
abs_dev <- abs(x[!is.na(x)] - median(x[!is.na(x)]))
left <- median(abs_dev[x[!is.na(x)] <= median(x[!is.na(x)])])
right <- median(abs_dev[x[!is.na(x)] >= median(x[!is.na(x)])])
x_mad <- dplyr::if_else(x < median(x, na.rm = TRUE), left, right)
mad_dist <- abs(x - median(x, na.rm = TRUE)) / x_mad
mad_dist > z
} else if (method == "iqr") {
# AKA Tukey's rule: default iqr z = 1.5
if (missing(z)) z <- 1.5
quant <- quantile(x, probs = c(.25, .75), na.rm = TRUE)
iqr <- z * IQR(x, na.rm = TRUE)
min <- quant[1] - iqr
max <- quant[2] + iqr
!dplyr::between(x, min, max)
} else if (method == "spikes") {
# x is flagged if it spikes beyond what is expected given variability for
# time period
if (missing(z)) z <- 7
df <- data.frame(x = x)
n <- 624
#browser()
df <- df %>%
# calculate immediate differences, cut out 2-week windows
dplyr::mutate(
di = (x - dplyr::lag(x, 1)) - (dplyr::lead(x, 1) - x),
window = ggplot2::cut_interval(dplyr::row_number(), length = n),
window = forcats::fct_explicit_na(window)
) %>%
# separate daytime and nighttime fluxes
dplyr::group_by(window) %>%
# calculate mad and median of the differences
dplyr::mutate(
med = median(di, na.rm = TRUE),
mad = median(abs(di - med), na.rm = TRUE) * z / 0.6745,
#mad = mad(di, na.rm = TRUE) * z / 0.6745,
min = med - mad,
max = med + mad
)
#!dplyr::between(df$di, df$min, df$max)
df$di < df$min | df$di > df$max & !is.na(df$di)
}
} else {
if (method %in% c("default", "cooks")) {
# default cooks z = 4
if (missing(z)) z <- 4
cooksd <- cooks.distance(lm(x ~ y, na.action = na.exclude))
lim <- mean(cooksd, na.rm = TRUE) * z
cooksd > lim
} else if (method == "mahalanobis") {
# default mahalanobis z = 12
if (missing(z)) z <- 12
m_df <- dplyr::bind_cols(list(x, y))
m_dist <- mahalanobis(
m_df,
colMeans(m_df, na.rm = TRUE),
cov(m_df, use = "pairwise.complete.obs")
)
m_dist > z
} else if (method == "aq") {
# default z = 0.05
# mvoutlier::aq()
} else if (method == "pcout") {
# mvoutlier::pcout() - $wfinal01: 0 = outlier, $wfinal: smaller values =
# more likely to be outliers
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.