inst/doc/fasstr_under_the_hood.R

## ----options, include=FALSE---------------------------------------------------
knitr::opts_chunk$set(eval = nzchar(Sys.getenv('hydat_eval')))
library(fasstr)


## ---- eval=FALSE--------------------------------------------------------------
#  # Check if data is provided and import it
#  flow_data <- flowdata_import(data = data,
#                               station_number = station_number)
#  
#  # Save the original columns (to check for STATION_NUMBER col at end) and ungroup if necessary
#  orig_cols <- names(flow_data)
#  flow_data <- dplyr::ungroup(flow_data)
#  
#  # Check and rename columns
#  flow_data <- format_all_cols(data = flow_data,
#                               dates = as.character(substitute(dates)),
#                               values = as.character(substitute(values)),
#                               groups = as.character(substitute(groups)),
#                               rm_other_cols = TRUE)

## ---- eval=FALSE--------------------------------------------------------------
#  ## SET UP BASIN AREA
#  suppressWarnings(flow_data <- add_basin_area(flow_data, basin_area = basin_area))
#  flow_data$Basin_Area_sqkm_temp <- flow_data$Basin_Area_sqkm
#  
#  ## ADD YIELD COLUMN
#  flow_data <- dplyr::mutate(flow_data, Yield_mm = Value * 86400 / (Basin_Area_sqkm_temp * 1000))
#  
#  # Return the original names of the Date and Value columns
#  names(flow_data)[names(flow_data) == 'Value'] <- as.character(substitute(values))
#  names(flow_data)[names(flow_data) == 'STATION_NUMBER'] <- as.character(substitute(groups))
#  
#  
#  ## Reformat to original names and groups
#  ## -------------------------------------
#  
#  # Return columns to original order plus new column
#  if('Yield_mm' %in% orig_cols){
#    flow_data <-  flow_data[, c(orig_cols)]
#  } else {
#    flow_data <-  flow_data[, c(orig_cols, paste('Yield_mm'))]
#  }
#  
#  dplyr::as_tibble(flow_data)

## ---- eval=FALSE--------------------------------------------------------------
#  # Fill missing dates, add date variables
#  flow_data <- analysis_prep(data = flow_data,
#                             water_year_start = water_year_start)
#  
#  # Add rolling means to end of dataframe
#  flow_data <- add_rolling_means(data = flow_data, roll_days = roll_days, roll_align = roll_align)
#  colnames(flow_data)[ncol(flow_data)] <- 'RollingValue'

## ---- eval=FALSE--------------------------------------------------------------
#  # Filter for the selected year (remove excluded years after)
#  flow_data <- dplyr::filter(flow_data, WaterYear >= start_year & WaterYear <= end_year)
#  flow_data <- dplyr::filter(flow_data, Month %in% months)

## ---- eval=FALSE--------------------------------------------------------------
#  # Calculate basic stats
#  annual_stats <-   dplyr::summarize(dplyr::group_by(flow_data, STATION_NUMBER, WaterYear),
#                                     Mean = mean(RollingValue, na.rm = ignore_missing),
#                                     Median = stats::median(RollingValue, na.rm = ignore_missing),
#                                     Maximum = max (RollingValue, na.rm = ignore_missing),
#                                     Minimum = min (RollingValue, na.rm = ignore_missing))
#  annual_stats <- dplyr::ungroup(annual_stats)
#  
#  #Remove Nans and Infs
#  annual_stats$Mean[is.nan(annual_stats$Mean)] <- NA
#  annual_stats$Maximum[is.infinite(annual_stats$Maximum)] <- NA
#  annual_stats$Minimum[is.infinite(annual_stats$Minimum)] <- NA
#  
#  # Calculate annual percentiles
#  if(!all(is.na(percentiles))) {
#    for (ptile in percentiles) {
#      # Calculate percentiles
#      annual_stats_ptile <- dplyr::summarise(dplyr::group_by(flow_data, STATION_NUMBER, WaterYear),
#                                             Percentile = stats::quantile(RollingValue, ptile / 100, na.rm = TRUE))
#      annual_stats_ptile <- dplyr::ungroup(annual_stats_ptile)
#      names(annual_stats_ptile)[names(annual_stats_ptile) == 'Percentile'] <- paste0('P', ptile)
#  
#      # Merge with stats
#      annual_stats <- merge(annual_stats, annual_stats_ptile, by = c('STATION_NUMBER', 'WaterYear'))
#  
#      # Remove percentile if mean is NA (workaround for na.rm=FALSE in quantile)
#      annual_stats[, ncol(annual_stats)] <- ifelse(is.na(annual_stats$Mean), NA, annual_stats[, ncol(annual_stats)])
#    }
#  }

## ---- eval=FALSE--------------------------------------------------------------
#  # Rename year column
#  annual_stats <- dplyr::rename(annual_stats, Year = WaterYear)
#  
#  # Remove selected excluded years
#  annual_stats[annual_stats$Year %in% exclude_years, -(1:2)] <- NA
#  
#  
#  # If transpose if selected
#  if (transpose) {
#    # Get list of columns to order the Statistic column after transposing
#    stat_levels <- names(annual_stats[-(1:2)])
#  
#    # Transpose the columns for rows
#    annual_stats <- tidyr::gather(annual_stats, Statistic, Value, -STATION_NUMBER, -Year)
#    annual_stats <- tidyr::spread(annual_stats, Year, Value)
#  
#    # Order the columns
#    annual_stats$Statistic <- factor(annual_stats$Statistic, levels = stat_levels)
#    annual_stats <- dplyr::arrange(annual_stats, STATION_NUMBER, Statistic)
#  }
#  
#  # Give warning if any NA values
#  missing_values_warning(annual_stats[, 3:ncol(annual_stats)])
#  
#  
#  # Recheck if station_number/grouping was in original data and rename or remove as necessary
#  if(as.character(substitute(groups)) %in% orig_cols) {
#    names(annual_stats)[names(annual_stats) == 'STATION_NUMBER'] <- as.character(substitute(groups))
#  } else {
#    annual_stats <- dplyr::select(annual_stats, -STATION_NUMBER)
#  }
#  
#  dplyr::as_tibble(annual_stats)

## ---- fig.height = 3, fig.width = 7, comment=NA-------------------------------
# Calculate the statistics
annual_stats <- calc_annual_stats(station_number = c('08NM116', '08NM240'),
                                  start_year = 1985, end_year = 2015)

# Wrangle statistics for plotting
annual_stats <- tidyr::gather(annual_stats, Statistic, Value, -Year, -STATION_NUMBER)

# Group data by grouping
tidy_plots <- dplyr::group_by(annual_stats, STATION_NUMBER)

# Create a tibble with a column of STATION_NUMBERs and a column of data for each STATION_NUMBER
tidy_plots <- tidyr::nest(tidy_plots)

# Create a new column of plots using mutate and purrr::map2
tidy_plots <- dplyr::mutate(tidy_plots,
                            plot = purrr::map2(data, STATION_NUMBER, 
                                               ~ggplot2::ggplot(data = ., ggplot2::aes(x = Year, y = Value, color = Statistic)) +
                                                 ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) +
                                                 ggplot2::geom_line(alpha = 0.5, na.rm = TRUE) +
                                                 ggplot2::geom_point(na.rm = TRUE) +
                                                 ggplot2::ylab('Discharge (cms)')

                            ))


# Create a list of named plots extracted from the tibble
plots <- tidy_plots$plot
if (nrow(tidy_plots) == 1) {
  names(plots) <- 'Annual_Statistics'
} else {
  names(plots) <- paste0(tidy_plots$STATION_NUMBER, '_Annual_Statistics')
}

# Return the plots
plots

Try the fasstr package in your browser

Any scripts or data that you put into this service are public.

fasstr documentation built on March 31, 2023, 10:25 p.m.