RWDataPlyr: Read and Manipulate Data from 'RiverWare'

Documented in read_rw_csv

#' Read RiverWare/RiverSMART produced csv files
#' 
#' `read_rw_csv()` reads in a CSV file created from RiverWare. If the CSV 
#' file does not contain column names that RiverWare always uses (see Details), 
#' then it assumes that the CSV file was not created from RiverWare and throws 
#' an error. It also removes spaces from the column names, and adjusts the 
#' `Object.Slot` and `Slot Value` columns to be `ObjectSlot` and `Value`, 
#' respectively.
#' 
#' The required column names are: `Run Number`, `Trace Number`, `Object.Slot`, 
#' `Timestep`, `Slot Value`. See the CSV output section of the 
#' [RiverWare documentation](http://www.riverware.org/HelpSystem/index.html#page/SolutionApproaches/Solutions_MRM.4.5.html#ww477402)
#' for more information on the other optional column names.
#' 
#' This function uses [data.table::fread()] to read in 
#' the CSV file, and forces it to expect a CSV file, expect headers, and return
#' `data.frame`.
#' 
#' @param file The name of the file which the data are to be read from. Either 
#'   an absolute or relative path.
#' 
#' @return A tibble (data frame) containing the data in the csv.
#' 
#' @examples 
#' zz <- read_rw_csv(system.file(
#'   "extdata/Scenario/ISM1988_2014,2007Dems,IG,Most",
#'   "KeySlots.csv",
#'   package = "RWDataPlyr"
#' ))
#' 
#' @seealso [read.rdf()]
#' 
#' @export

read_rw_csv <- function(file) {
  # read in the file
  zz <- data.table::fread(file, sep = ',', header = TRUE, data.table = FALSE)
  
  # check column names
  reqColumns <- c("Run Number", "Trace Number", "Object.Slot", "Timestep", 
                  "Slot Value")
  
  if (!all(reqColumns %in% colnames(zz)))
    stop("The ", file, 
         " csv does not have all required column names.\n",
         "It is unlikely this is a csv file from RiverWare.\n",
         "Required column names are: ", paste(reqColumns, collapse = ", "))
  
  
  # update column names so they contain no spaces
  colnames(zz) <- gsub(" ", "", colnames(zz), fixed = TRUE)
  # remove . from Object.Slot
  i <- which(colnames(zz) == "Object.Slot")
  colnames(zz)[i] <- "ObjectSlot"
  # change slot value to just Value
  i <- which(colnames(zz) == "SlotValue")
  colnames(zz)[i] <- "Value"
  
  zz <- zz %>%
    dplyr::mutate_at(.vars = "Timestep", .funs = convert_rw_csv_ts)
  
  tibble::as_tibble(zz)
}

#' convert the timestep in the rw csv from mm-dd-yy hh:mm:ss to yyyy-m-dd 
#' hh:mm:ss format. It does not modify the hh:mm at all, but expect there to 
#' be some hh:mm
#' 
#' @noRd

convert_rw_csv_ts <- function(ts)
{
  date_time <- simplify2array(strsplit(ts, " "))
  if (is.null(nrow(date_time)) || nrow(date_time) != 2)
    stop("Unexpected Timestep format encountered when reading csv.\n",
         "Year, month, day and time should be seperated by a space.")
  
  yrmon <- simplify2array(strsplit(date_time[1,], "-"))
  if (is.null(nrow(yrmon)) || nrow(yrmon) != 3)
    stop("Unexpected Timestep format encountered when reading csv.\n",
         "Year, month, and day should be seperated by '-'")
  
  if (!all(nchar(yrmon[3,]) == 4))
    stop("Unexpected Timestep format encountered when reading csv.\n",
         "Year is expected to be the 3rd element of the month, day, year.")
  
  paste0(yrmon[3,], "-", as.integer(yrmon[1,]), "-", yrmon[2,], " ", 
         date_time[2,])
}

#' optional column names included in csv files created by RiverWare
#' 
#' leaving this for now, as we may want to determine scenario dimensions by 
#' figuring out which columns are not the optional columns autogenerated
#' 
#' @noRd
rwCsvOptionalCols <- function() {
  c("Object Name", "Object Type", "Slot Name", "Slot Name with Unit", "Unit", 
    "Timestep Size", "Year", "Month", "Model Name", "Ruleset File Name", 
    "Input DMI Name", "MRM Config Name", "MRM Descriptors")
}