wrapwrangler: Process WinWRAP data.

Documented in extract_storage

#' extract_storage
#'
#' @param control_path path to control points shapefile
#' @param storage_path path to WinWRAP control point reservoir storage file
#' @details extracts values from control point storage output file into a usable time series
#' @return A time series of values.
#' @import readr
#' @import dplyr
#' @import sf
#' @import rgdal
#' @import purrr
#' @import sqldf
#' @import janitor
#' @import tidyr
#' @import ggplot2
#' @import av
#' @import gganimate
#' @export

extract_storage <- function(control_path = "C:/Users/nels863/OneDrive - PNNL/Documents/wrapwrangler/inst/extdata/Colorado_ControlPoints.shp",
                            storage_path = "C:/Users/nels863/OneDrive - PNNL/Documents/wrapwrangler/inst/extdata/C3_Res.TOU"){

  #
  seq(-180 + 0.125 / 2, 180 - 0.125 / 2, 0.125) -> lon_seq
  seq(-90 + 0.125 / 2, 90 - 0.125 / 2, 0.125) -> lat_seq
  snap_to_grid <- function(x){
    x %>% mutate(x = lon_seq[which.min(abs(LON - lon_seq))],
                 y = lat_seq[which.min(abs(LAT - lat_seq))])
  }

  # Read in the control points shapefile. We only need the CP_ID and the coordinates.
  message("Loading in control points.")
  read_sf(control_path) %>%
    as_tibble() %>%
    unique() %>%
    dplyr::select(2,3,4) %>% unique() -> control_points

  # This shapefile contains duplicate control points with coordinates off by .0001, so lets remove any duplicates.
  duplicates <- control_points[order(control_points$CP_ID, decreasing=TRUE),]

  controls <- duplicates[!duplicated(duplicates$CP_ID),]

  # Scan the output text file that is produced from WinWRAP.
  # The code below splits each control point dataset into its own list.
  message("Loading in WinWRAP output.")
  a <- suppressMessages(scan(storage_path,
                             what="", sep="\n",
                             skip = 2,
                             blank.lines.skip = TRUE,
                             skipNul = TRUE))
  b <- strsplit(a, "[[:space:]]+")
  c <- b[lengths(b)>2]
  d <- split(c, ceiling(seq_along(c)/80))

  # Create the final list where all of the new dataframes will be placed.
  control_df_list <- list()

  # Map through each of the control point datasets and create a new dataframe.
  map_cycles <- 1:length(d)
  message("Cycling through data chunks.")
  for(i in map_cycles){

    # Load in the cycle and find what control point we are looking at.
    d[[i]] -> set
    set[[1]] -> title_row
    title_row[[10]] -> control_point

    # Create a table from the rest of the rows and make the first row the names of columns.
    data.frame(matrix(unlist(set[c(2:80)]),
                      nrow=79,
                      byrow=T),stringsAsFactors=FALSE) -> unlisted
    names(unlisted)<-c("YEAR", month.abb, "MEAN")
    # Remove the MEAN row and TOTAL column.
    unlisted[-c(79,1),-c(14)] -> control_table

    # Pivot this table so that the year, month, and shortage are all on the same row.
    control_table %>%
      pivot_longer(!YEAR, names_to = "MONTH",
                   values_to = "RESERVOIR_STORAGE_CFS") %>%
      # Add in the CP_ID into a new column.
      dplyr::mutate(CP_ID = control_point) -> long_table
    # Join this with the control points shapefile to get the coordinates attached.
    long_table %>%
      dplyr::left_join(controls, by = "CP_ID") %>%
      as_tibble() %>% unique() %>% dplyr::select(4,1:3,5,6)-> final_table

    i -> number
    # Add this table to the list of control dataframes.
    final_table -> control_df_list[[number]]
  }

  # Join all of these tables together into a single control point dataframe.
  bind_rows(control_df_list) %>%
    filter(!is.na(LAT)) -> final_CP_table


  final_CP_table %>% dplyr::select(CP_ID, LAT, LON = LONG) %>%
    unique() -> CP_data

  CP_data %>% filter(!is.na(LAT)) %>%
    rowwise() %>%
    snap_to_grid() %>% ungroup() %>%
    select(CP_ID, x, y) -> CP_ID_grids

  CP_ID_grids %>% left_join(final_CP_table) %>%
    group_by(CP_ID, x, y, YEAR, MONTH) %>%
    mutate(RESERVOIR_STORAGE_CFS = as.numeric(RESERVOIR_STORAGE_CFS),
           YEAR = as.numeric(YEAR)) %>%
    filter(!is.na(RESERVOIR_STORAGE_CFS)) %>%
    summarise(storage_cfs = sum(RESERVOIR_STORAGE_CFS)) ->
    grid_stortages

  return(grid_stortages)
}