#' trrrends
#'
#' Applies a family of trend statistics: The Mann-Kendall S statistic, the Sen-Theil slope estimator, and Kendall's tau.
#'
#' @param x Timeseries vector.
#' @param conf_level Confidence level used for calculating the upper and lower bounds of the Sen-Theil slope estimator. Defaults to 95%.
#' @param alt_hypo Alternative hypothesis applied in the Mann-Kendall S statistic.
#' @param continuity Logical whether to apply continuity correction to Mann-Kendall statistics. Same correction is always applied to Sen-Theil slope estimation.
#' @param na Method reference for handling missing values. Missing values are dropped by default, but can also be replaced using a linear approximation, the nearest value, or a summary statistic (mean, median, min, max).
#'
#' @return Dataframe of all relevant trend statistics from the Sen-Theil, Mann-Kendall, and Kendall families of statistics.
#' @export
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' \dontrun{
#' dummy <- tibble::tibble(YEAR = c(2000:2020)) %>%
#' dplyr::rowwise() %>%
#' dplyr::mutate(RANDOM = runif(1, -33, 100)) %>%
#' dplyr::ungroup() %>%
#' dplyr::mutate(GRP = 'GROUP A') %>%
#' dplyr::bind_rows(tibble::tibble(YEAR = c(2005:2020)) %>%
#' dplyr::rowwise() %>%
#' dplyr::mutate(RANDOM = runif(1, 33, 100)) %>%
#' dplyr::ungroup() %>%
#' dplyr::mutate(GRP = 'GROUP B')) %>%
#' dplyr::bind_rows(tibble::tibble(YEAR = c(2000:2020)) %>%
#' dplyr::rowwise() %>%
#' dplyr::mutate(RANDOM = runif(1, 0, 1)) %>%
#' dplyr::ungroup() %>%
#' dplyr::mutate(GRP = 'GROUP C'))
#' dummy %>%
#' dplyr::group_by(GRP) %>%
#' dplyr::mutate(RANDOM = ifelse(RANDOM >= 80,
#' NA,
#' RANDOM)) %>%
#' dplyr::summarise(trrrends(RANDOM,
#' conf_level = 0.95,
#' alt_hypo = 'two-tailed',
#' continuity = TRUE,
#' na = 'median'))
#'
#' }
#'
trrrends <- function(x,
conf_level = 0.95,
alt_hypo = c("two-tailed",
"greater",
"less"),
continuity = TRUE,
na = c('rm', 'drop',
'approx', 'lm',
'fill', 'extend',
'aggregate', 'mean',
'median',
'min',
'max',
'fail')) {
## TESTS TO ENSURE DATA INPUT IS CORRECT ####
## Is input numeric?
if (!is.numeric(x)) {
stop("'x' must be a numeric vector.")
}
## Beta: Testing returning 0 for 0 variability
if (var(x) > 0) { ## Cut here if this breaks!
## HELPER FUNCTIONS VIA `trend` PACKAGE ####
## Calculate trend variability
.varmk <- function(t, .n){
tadjs <- sum(t * (t - 1) * (2 * t + 5))
.mks_var <- (.n * (.n-1) * (2 * .n + 5) - tadjs) / 18
return(.mks_var)
}
## Calculate Mann-Kendall S statistics
.mkScore <- function(x){
.n <- length(x)
.mks <- 0.0
for(j in 1:.n) {
.mks <- .mks + sum(sign(x[j] - x[1:j]))
}
return(.mks)
}
## Calculate Kendall's tau D statistic
.Dfn <- function (t, .n){
tadjd <- sum(t * (t - 1))
.ktau_d <- sqrt(1/2 * .n * (.n - 1) - 1/2 * tadjd) * sqrt(1/2 * .n *
(.n - 1))
return(.ktau_d)
}
.alt_hypo <- match.arg(alt_hypo)
.na <- match.arg(na)
## DEAL WITH MISSING VALUES ####
## TODO: Consider expanding on NA handling methods via zoo::na.approx or dplyr::replace_na ####
if (.na %in% c('drop', 'rm')) {
x <- na.omit(x)
} else if (.na %in% c('approx', 'lm')) {
x <- tryCatch(zoo::na.approx(x,
na.rm = FALSE),
error = function(cond) {
print(paste0('`zoo::na.approx` failed. Removing NAs instead.'))
return(na.omit(x))
})
if (sum(is.na(x)) >= 1) {
warning('NA replacement method did not replace all NAs. Remaining NAs are being dropped.')
}
x <- na.omit(x)
} else if (.na %in% c('fill', 'extend')) {
x <- tryCatch(zoo::na.fill(x,
'extend'),
error = function(cond) {
print(paste0('`zoo::na.fill` failed. Removing NAs instead.'))
return(na.omit(x))
})
} else if (.na %in% c('aggregate', 'mean')) {
x <- tryCatch(zoo::na.aggregate(x,
FUN = mean,
na.rm = FALSE),
error = function(cond) {
print(paste0('`zoo::na.aggregate` failed. Removing NAs instead.'))
return(na.omit(x))
})
} else if (.na == 'median') {
x <- tryCatch(zoo::na.aggregate(x,
FUN = median,
na.rm = FALSE),
error = function(cond) {
print(paste0('`zoo::na.aggregate` failed. Removing NAs instead.'))
return(na.omit(x))
})
} else if (.na == 'max') {
x <- tryCatch(zoo::na.aggregate(x,
FUN = max,
na.rm = FALSE),
error = function(cond) {
print(paste0('`zoo::na.aggregate` failed. Removing NAs instead.'))
return(na.omit(x))
})
} else if (.na == 'min') {
x <- tryCatch(zoo::na.aggregate(x,
FUN = min,
na.rm = FALSE),
error = function(cond) {
print(paste0('`zoo::na.aggregate` failed. Removing NAs instead.'))
return(na.omit(x))
})
} else if (.na == 'fail') {
na.fail(x)
}
## CALCULATE LENGTH OF TIMESERIES DATA ####
.n <- length(x)
## Is input longer than 3 rows?
## N.B. Originally only applicable to Mann-Kendall statistics, but why would we use this for a short timeseries, anyway!?
if (.n < 3) {
stop("'x' must have at least 3 elements")
}
## CALCULATE BASIC STATS FOR TREND CALCULATION ####
## table() unique trend values
t <- table(x)
## Remove reference to actual unique values and just get value occurences
## N.B. Not sure this step actually does anything important...
names(t) <- NULL
## Calculate trend variability for S statistics
.mks_var <- .varmk(t, .n)
## Set up starting point for comparing all trend values
k <- 0
## Create empty vector to fill with year-by-year value comparisons
d <- rep(NA, .n * (.n - 1)/2)
## Fill empty matrix by comparing all years to one another
for (i in 1:(.n - 1)) {
for (j in (i + 1):.n) {
k <- k + 1
d[k] <- (x[j] - x[i])/(j - i)
}
}
## CALCULATE SEN-THEIL SLOPE ESTIMATOR ####
.bsen <- median(d, na.rm = TRUE)
## Calculate confidence interval for Sen-Theil slope
C <- qnorm(1 - (1 - conf_level)/2) * sqrt(.mks_var)
rank.bsen_upr <- round((k + C)/2 + 1)
rank.bsen_lwr <- round((k - C)/2)
rank.d <- sort(d)
.bsen_lwr <- rank.d[rank.bsen_lwr]
.bsen_upr <- rank.d[rank.bsen_upr]
## CALCULATE MANN-KENDALL S STATISTIC ####
.mks <- .mkScore(x)
## Calculate Kendall's tau D statistic
.ktau_d <- .Dfn(t, .n)
## CALCULATE KENDALL'S TAU ####
.ktau <- .mks/.ktau_d
## CALCULATE MANN-KENDALL Z STATISTICS ####
## Apply continuity correction if applicable
## N.B. Sen-Theil slope estimator p-values always assume `continuity = TRUE`
if (continuity) {
## Calculate signum of S statistic
.mks_sg <- sign(.mks)
## Calculate Z statistic to represent both Mann-Kendall S statistic and Sen-Theil slope estimator
.mks_z <- .mks_sg * (abs(.mks) - 1)/sqrt(.mks_var)
## Copy-paste Z statistic for Sen (just for p-value purposes)
.bsen_z <- .mks_z
## Or don't apply continuity correction and calculate two distinct Z statistics for Mann-Kendall S statistics and Sen-Theil slope estimators
} else {
## calculate signum of Mann-Kendall S statistic
## N.B. Used only for Sen-Theil slope estimator p-value calculation, which assumes `continuity = TRUE`
.mks_sg <- sign(.mks)
## Calculate Mann-Kendall Z statistic
.mks_z <- .mks/sqrt(.mks_var)
## Calculate Sen-Theil slope estimator Z statistic
.bsen_z <- .mks_sg * (abs(.mks) - 1)/sqrt(.mks_var)
}
## CALCULATE P-VALUES ####
## Calculate Sen-Theil slope estimator p-value
## N.B. Always assumes `continuity = TRUE`
.bsen_p <- 2 * min(0.5, pnorm(abs(.bsen_z), lower.tail = FALSE))
## Calculate Mann-Kendall Z statistic p-value
## N.B. Dependent on `continuity` and the selected `alternative` hypothesis for trend direction.
.mks_p <- switch(.alt_hypo,
'two-tailed' = 2 * min(0.5, pnorm(abs(.mks_z), lower.tail = FALSE)),
greater = pnorm(.mks_z, lower.tail = FALSE),
less = pnorm(.mks_z, lower.tail = TRUE))
## Cut here if broken!
} else {
## Return no trend data if there is 0 variance in the timeseries
.n = length(x)
.bsen = 0
.bsen_lwr = 0
.bsen_upr = 0
.bsen_z = 0
.bsen_p = NA_real_
.mks = 0
.mks_sg = 0
.mks_var = 0
.mks_z = 0
.mks_p = NA_real_
.ktau = 0
.ktau_d = 0
}
## If Sen-Theil and Mann-Kendall p-values are identical, combine them in the output.
## N.B. This really should just be based on continuity, which is the only control for calculating different significance tests for Sen-Theil and Mann-Kendall -- this will prevent errors if there are any calculations with zero variance.
# if (.bsen_p == .mks_p & .bsen_z == .mks_z) {
if (continuity) {
df_output <- tibble::tibble(.n,
.bsen,
.bsen_lwr,
.bsen_upr,
.mks,
.mks_sg,
.mks_var,
.ktau,
.ktau_d,
.z = .mks_z,
.p = .mks_p)
## Otherwise, spit out both methods p-values.
## N.B. Not sure which one represents Kendall's tau.
## Kendall's tau is originally only calculated in trend::mk.test, but is not influenced by `continuity`. `mk.test` only spits out one p-value.
## trend::sens.slope *always* takes `continuity = TRUE` into account when calculating the p-value for the Sen-Theil slope estimator.
} else {
df_output <- tibble::tibble(.n,
.bsen,
.bsen_lwr,
.bsen_upr,
.bsen_z,
.bsen_p,
.mks,
.mks_sg,
.mks_var,
.mks_z,
.mks_p,
.ktau,
.ktau_d)
}
## RETURN TIBBLE OUTPUT ####
return(df_output)
}
#' FUNCTION : trend_stats
#' AUTHOR : todd.ellis@utas.edu.au
#' DATE : 2021-10-09 : 2021-01-15 (est.)
#' DESCRIPTION : Dataframe-friendly adaptation of Sen-Theil, Mann-Kendall, and Kendall trend statistics.
#' NOTES : Heavily drawn from the `trend` package, which only works on single vectors and returns a uniquely-defined list object. (Ew!)
#' TODO : Is there a better way to implement this function rather than within dplyr::summarise(...)?
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.