
Defines functions gr_test_vars

Documented in gr_test_vars

#' Test hydrograph changes
#' Use this function to test interannual changes or hydrograph separation variables returned by [grwat::gr_summarize()]. Pettitt test is used to detect the change year — i.e. the year which divides the time series into the statistically most differing samples. Student (Welch) and Fisher tests are used to estimate the significance of mean and variance differences of these samples. Theil-Sen test calculates the trend slope value. Mann-Kendall test is performed to reveal the significance of the trend. 
#' Number of observations formally required for various tests: Pettitt > 0, Mann-Kendall > 2, Theil-Sen > 1, Student > 1, Fisher > 1.
#' @param df `data.frame` as produced by [grwat::gr_summarize()] function.
#' @param ... Names of the tested variables (quoted).
#' @param year Integer value of year used to divide series in two samples compared by Student and Fisher tests. Defaults to `NULL` which means that the year is calculated automatically by Pettitt test.
#' @param exclude Integer vector of years to be excluded from tests.
#' @return `list` of testing results with following elements:
#' | __Element__  | __Description__ |
#' | ------------ | --------------- |
#' | `ptt`        | Pettitt tests for change year |
#' | `mkt`        | Mann-Kendall test for trend significance |
#' | `tst`        | Theil-Sen test for slope estimation |
#' | `ts_fit`     | Theil-Sen linear model fit |
#' | `tt`         | Student (Welch) test for significance of mean differences between two periods |
#' | `ft`         | Fisher test for significance of variance differences between two periods |
#' | `year`       | Integer value of year used to divide series in two samples compared by Student and Fisher tests |
#' | `maxval`     | Maximum value for the variable along the full time series | 
#' | `fixed_year` | Boolean `TRUE` or `FALSE` value indicating if the year was fixed |
#' | `pvalues`    | p-values of all tests summarized as a single table for all variables |
#' @export
#' @example inst/examples/gr_test_vars.R
gr_test_vars <- function(df, ..., year = NULL, exclude = NULL){
  fields = rlang::exprs(...) %>% as.character()
  if(length(fields) == 0)
    fields = params_out %>% 
      dplyr::filter(.data$Order != 0) %>% 
      dplyr::arrange(.data$Order) %>% 
  prms = params_out %>% 
    dplyr::filter(.data$Name %in% fields) %>% 
    dplyr::slice(match(fields, .data$Name))
  fixed = !is.null(year)
  desc = switch(grenv$loc,
                'RU' = prms$Desc,
                'UA' = prms$Descua,
                'EN' = prms$Descen)
  nn = nrow(prms)
  ch_year = setNames(rep(NA, nn), prms$Name) # change years
  mean1 = setNames(vector(mode = 'list', length = nn), prms$Name) # means for first period
  mean2 = setNames(vector(mode = 'list', length = nn), prms$Name) # means for second period
  mratio = setNames(rep(NA, nn), prms$Name) # variance for first period
  sd1 = setNames(rep(NA, nn), prms$Name) # variance for first period
  sd2 = setNames(rep(NA, nn), prms$Name) # variance for first period
  maxval = setNames(vector(mode = 'list', length = nn), prms$Name) # maximum values
  ptt = setNames(vector(mode = 'list', length = nn), prms$Name) # Pettitt test
  mkt = setNames(vector(mode = 'list', length = nn), prms$Name) # Mann-Kendall test
  tst = setNames(vector(mode = 'list', length = nn), prms$Name) # Theil-Sen slope estimation
  ts_fit = setNames(vector(mode = 'list', length = nn), prms$Name) # Theil-Sen regression
  tt = setNames(vector(mode = 'list', length = nn), prms$Name) # Student t test for periods
  ft = setNames(vector(mode = 'list', length = nn), prms$Name) # Fisher F test for periods
  df = df %>% 
    dplyr::filter(!(.data$Year1 %in% exclude)) %>% 
    dplyr::mutate_if(params_out$Winter == 1, 
  cli::cli_progress_bar('Testing variables', total = nn)
  for (i in 1:nn) {
    vl = dplyr::pull(df, prms$Name[i])
    uvals = unique(vl)
    if (length(uvals[!is.na(uvals)]) < 3)
    isdate = FALSE
    if(prms$Unitsen[i] %in% c('Date', 'Month')) {
      isdate = TRUE
      lubridate::year(vl) = 2000
      if (prms$Winter[i] == 1) {
        vl = replace_year(vl)
    vl_cmp = !is.na(vl)
    vl_cmp_sum = cumsum(vl_cmp)
    ptt[[i]] = trend::pettitt.test(vl[vl_cmp])
    nyear = match(ptt[[i]]$estimate, vl_cmp_sum)
    ch_year[i] = ifelse(is.numeric(year), 
                        as.vector(as.matrix(df[nyear, "Year1"]))[1])
    maxval[[i]] = max(vl, na.rm = TRUE)
    if(length(vl_cmp) > 2) { # mk.test requires at least 3 observations
        mkt[[i]] = trend::mk.test(vl[vl_cmp] %>% as.integer())
      } else {
        mkt[[i]] = trend::mk.test(vl[vl_cmp])
    df.theil = df %>% 
      dplyr::select('Year1', prms$Name[i])
    # values = df.theil[[prms$Name[i]]] %>% 
      # as.matrix() %>%
      # as.vector()
      # values = values %>% 
      #   as.Date() %>% 
      #   as.integer()
      df.theil[2] = as.integer(vl)
    frml = substitute(y ~ x,
                      list(y = as.name(prms$Name[i]),
                           x = as.name('Year1')))
    if (nrow(df.theil) > 1) { # slope testing requires at least two observations
      fltr = ! (is.infinite(df.theil[[2]]) | is.nan(df.theil[[2]]) | is.na(df.theil[[2]]))
      ts_fit[[i]]= mblm::mblm(eval(frml), data = df.theil[fltr, ], repeated = FALSE)
      tst[[i]] = trend::sens.slope(df.theil[[2]][fltr])
    vl_int = vl
    if(isdate) vl_int = as.integer(vl)
    d1 = vl_int[df$Year1 < ch_year[i]]
    d2 = vl_int[df$Year1 >= ch_year[i]]
    d1 = d1[!(is.infinite(d1) | is.na(d1) | is.nan(d1))]
    d2 = d2[!(is.infinite(d2) | is.na(d2) | is.nan(d2))]
    mean1[[i]] = round(mean(d1, na.rm = TRUE), 5)
    mean2[[i]] = round(mean(d2, na.rm = TRUE), 5)
    sd1[i] = round(sd(d1, na.rm = TRUE), 5)
    sd2[i] = round(sd(d2, na.rm = TRUE), 5)
    if(isdate) {
      mean1[[i]] = mean1[[i]] %>% as.integer() %>% as.Date(origin = '1970-01-01')
      mean2[[i]] = mean2[[i]] %>% as.integer() %>% as.Date(origin = '1970-01-01')
      mratio[i] = lubridate::yday(mean2[[i]]) - lubridate::yday(mean1[[i]])
      mean1[[i]] = mean1[[i]] %>% format("%d-%b")
      mean2[[i]] = mean2[[i]] %>% format("%d-%b")
      sd1[i] = as.integer(sd1[i])
      sd2[i] = as.integer(sd2[i])
    } else {
      mratio[i] = round(100 * (mean2[[i]] - mean1[[i]]) / mean1[[i]], 1)
    # Student and Fisher tests requre at least two observations in each sample
    if (length(d1) > 1 & length(d2) > 1) { 
      if ((sum(abs(diff(d1)), na.rm = TRUE) != 0) && (sum(abs(diff(d2)), na.rm = TRUE) != 0)) {
        tt[[i]] = t.test(d1, d2)
        ft[[i]] = var.test(d1, d2)
  pvalues = data.frame(
    N = 1:nn,
    Variable = desc,
    Change.Year = ch_year,
    Trend = sapply(ts_fit, function(X) { if(is.null(X)) NA else round(coef(X)[2], 5) }),
    M1 = sapply(mean1, function(X) { if(is.null(X)) NA else X }),
    M2 = sapply(mean2, function(X) { if(is.null(X)) NA else X }),
    MeanRatio = mratio,
    sd1 = sd1,
    sd2 = sd2,
    sdRatio = round(100 * (sd2 - sd1) / sd1, 1),
    Mann.Kendall = sapply(mkt, function(X) { if(is.null(X)) NA else round(X$p.value, 5) }),
    Pettitt = sapply(ptt, function(X) { if(is.null(X)) NA else round(X$p.value, 5) }),
    Student = sapply(tt, function(X) { if(is.null(X)) NA else round(X$p.value, 5) }),
    Fisher = sapply(ft, function(X) { if(is.null(X)) NA else round(X$p.value, 5) })
  row.names(pvalues) = 1:nn
  tests = list(ptt = ptt,
               mkt = mkt,
               tst = tst,
               ts_fit = ts_fit,
               tt = tt,
               ft = ft,
               year = ch_year,
               maxval = maxval,
               fixed_year = fixed,
               pvalues = pvalues)

Try the grwat package in your browser

Any scripts or data that you put into this service are public.

grwat documentation built on Nov. 2, 2023, 5:21 p.m.