R/summarise_handoff_files.R

Defines functions summarise_handoff_files

Documented in summarise_handoff_files

#' Summarise Hand-off Files from Reserve Level Reports
#'
#' Summarise the seasonal kendall results from reserve level report hand-off files
#'
#' @param  path chr string of full path to .csv handoff files
#' @param param chr string of variable to summarise
#' @param res_region a \code{data.frame} of look-up values that match 3-letter NERR site ids with regions
#'
#' @importFrom dplyr bind_rows group_by summarise
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom tidyr pivot_longer
#'
#' @export
#'
#' @details This function is intended for use with the NERRS national level reporting scripts. It returns a \code{data.frame} that summarises the result of the reserve level seasonal kendall trend analyses found in the hand-off files generated by the reserve level reporting scripts. The summary groups reserves into regional classifications based on user-specified regions given in \code{res_region}.
#'
#' @author Julie Padilla
#'
#' @concept reporting
#'
#' @return Returns a \code{data.frame}
#'

summarise_handoff_files <- function(path, param, res_region = NULL) {

  par_regex <- paste(param, '.csv$', sep = '')
  files <- list.files('handoff_files', pattern = par_regex)
  files <- paste0(path, '/', files, sep = '')

  # generate a list of reserve abbreviations from found files
  abbrev <- substr(files, 15, 17)

  # check for existence of res_region
  if(is.null(res_region))
    stop('res_region must be specified. res_region should be a data.frame with two columns: one that consists of unique NERR reserve ids, and one that regional classifications.')

  # load data for parameter of interest
  x <- lapply(files, read.csv, header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') %>% bind_rows

  x$NERR.Site.ID <- abbrev
  x <- left_join(res_region, x)

  # convert to tidy format
  # x <- gather(x, key = .data$station, value = .data$trend, 4:length(names(x)))
  # names(x)[c(4:5)] <- c('station', 'trend') # no longer needed with pivot_longer
  x <- pivot_longer(x, 4:length(names(x)),
                    names_to = 'station', values_to = 'trend')

  # remove results where trend is NA
  x <- x[!is.na(x$trend), ]

  # reassign values that dplyr can use
  x$trend <- gsub('i', 'dec', x$trend)
  x$trend <- gsub('h', 'inc', x$trend)
  x$trend <- gsub('\u2014', 'insig', x$trend)
  x$trend <- gsub('x', 'insuff', x$trend)

  # summarise
  x_summary <- x %>%
    group_by(.data$Region) %>%
    summarise(station_ct = n()
              , decreasing = sum(.data$trend == 'dec')
              , increasing = sum(.data$trend == 'inc')
              , no_trend = sum(.data$trend == 'insig')
              , insuff_data = sum(.data$trend == 'insuff'))

  # tack on the parameter
  x_summary$parameter <- param

  # return the summary
  return(x_summary)
}
padilla410/SWMPrExtension documentation built on Dec. 29, 2021, 5:48 a.m.