wrapwrangler: Process WinWRAP data.

Documented in extract_wrap_flow

#' extract_wrap_flow
#'
#' @param control_path path to control points shapefile
#' @param flo_path path to WinWRAP FLO file
#' @details extracts values from FLO file into a usable time series
#' @return A time series of values.
#' @import readr
#' @import dplyr
#' @import sf
#' @import rgdal
#' @import purrr
#' @import sqldf
#' @import janitor
#' @import tidyr
#' @import ggplot2
#' @import av
#' @import gganimate
#' @export

extract_wrap_flow <- function(control_path = "C:/Users/nels863/OneDrive - PNNL/Documents/wrapwrangler/inst/extdata/Colorado_ControlPoints.shp",
                         flo_path = "C:/Users/nels863/OneDrive - PNNL/Documents/wrapwrangler/inst/extdata/C3.FLO"){

  #
  seq(-180 + 0.125 / 2, 180 - 0.125 / 2, 0.125) -> lon_seq
  seq(-90 + 0.125 / 2, 90 - 0.125 / 2, 0.125) -> lat_seq
  snap_to_grid <- function(x){
    x %>% mutate(x = lon_seq[which.min(abs(LON - lon_seq))],
                 y = lat_seq[which.min(abs(LAT - lat_seq))])
  }

  # Read in the control points shapefile. We only need the CP_ID and the coordinates.
  message("Loading in control points.")
  read_sf(control_path) %>%
    as_tibble() %>%
    unique() %>%
    dplyr::select(2,3,4) %>% unique() -> control_points

  # This shapefile contains duplicate control points with coordinates off by .0001, so lets remove any duplicates.
  duplicates <- control_points[order(control_points$CP_ID, decreasing=TRUE),]

  controls <- duplicates[!duplicated(duplicates$CP_ID),]

  # Scan the output text file that is produced from WinWRAP.
  # The code below splits each control point dataset into its own list.
  message("Loading in WinWRAP output.")
  a <- suppressMessages(scan(flo_path,
                             what="", sep="\n",
                             skip = 3,
                             blank.lines.skip = TRUE,
                             skipNul = TRUE))
  b <- strsplit(a, "[[:space:]]+")
  c <- b[lengths(b)>13]
  c2 <- c[lengths(c)<15]
  d <- split(c2, ceiling(seq_along(c2)/45))
  d2 <- d[lengths(d)>44]
  # Create the final list where all of the new dataframes will be placed.
  control_df_list <- list()

  # Map through each of the control point datasets and create a new dataframe.
  map_cycles <- 1:length(d)
  message("Cycling through data chunks.")
  for(i in map_cycles){

    # Create a table from the rest of the rows and make the first row the names of columns.
    data.frame(matrix(unlist(d2[[i]]),
                      nrow=45,
                      byrow=T),stringsAsFactors=FALSE) -> flow_table

    names(flow_table)<-c("CP_ID","YEAR", month.abb)

    # Pivot this table so that the year, month, and shortage are all on the same row.
     flow_table%>%
      pivot_longer(!c(CP_ID,YEAR), names_to = "MONTH",
                   values_to = "NATURAL_FLOWS_M3") -> long_table

      long_table$CP_ID <- substring(long_table$CP_ID,3)
      long_table$NATURAL_FLOWS_M3 <- (as.double(long_table$NATURAL_FLOWS_M3) * 1233.48)
    # Join this with the control points shapefile to get the coordinates attached.
    long_table %>%
      dplyr::left_join(controls, by = "CP_ID") %>%
      as_tibble() %>% unique()-> final_table

    i -> number
    # Add this table to the list of control dataframes.
    final_table -> control_df_list[[number]]
  }

  # Join all of these tables together into a single control point dataframe.
  bind_rows(control_df_list) %>%
    filter(CP_ID != c("L10000", "L20000"))-> final_CP_table


  final_CP_table %>% dplyr::select(CP_ID, LAT, LON = LONG) %>%
    unique() -> CP_data

  CP_data %>% filter(!is.na(LAT)) %>%
    rowwise() %>%
    snap_to_grid() %>% ungroup() %>%
    select(CP_ID, x, y) -> CP_ID_grids

  final_CP_table %>% left_join(CP_ID_grids) %>%
    group_by(CP_ID, x, y, LAT, LONG, YEAR, MONTH) %>%
    mutate(NATURAL_FLOWS_M3 = as.double(NATURAL_FLOWS_M3),
           YEAR = as.numeric(YEAR)) %>%
    filter(!is.na(NATURAL_FLOWS_M3)) %>%
    filter(!is.na(LAT)) %>%
    dplyr::select(1,2,3,4,5,6,7,8)-> grid_flows
    #summarise(natural_flows_m3 = sum(NATURAL_FLOWS_M3)) ->

  return(grid_flows)
}