#' @title Retrieve R-friendly hydrology data from DBHYDRO using DB keys
#'
#' @description Downloads and lightly processes DBHYDRO data. This function uses DB keys to download datasets and is therefore more versatile than getDBHYDRO.
#'
#' @usage getHydro(dbkey = "03638", startDate = "19600101",
#' endDate = Sys.Date())
#'
#' @param dbkey DBkey for station and parameter of interest. Use 'getDBkey()' to find DBkeys associated with a station.
#' @param startDate beginning of period of record, in form YYYYMMDD or YYYY-MM-DD
#' @param endDate end of period of record, in form YYYYMMDD or YYYY-MM-DD
#'
#' @return dataframe \code{getHydro} returns a dataframe of data
#'
#'
#' @examples
#' \dontrun{
#' ### example workflow:
#' getDBkey(stn = "S333", type = "FLOW") # find DBkey for station/parameter of interest
#' s333.daily <- getHydro(dbkey = "15042") # download data for DBkey
#' tail(s333.daily)
#'
#' ### check that instantaneous data is also loaded properly
#' ### downloading large datasets may take a very long time
#' s333.inst <- getHydro(dbkey = "65086",
#' startDate = Sys.Date())
#' tail(s333.inst)
#' }
#'
#' @importFrom httr GET
#' @importFrom utils read.csv
#'
#' @export
getHydro <- function(dbkey = "03638", startDate = "19600101",
endDate = Sys.Date()) { # format(x = strptime(x = as.character(Sys.Date()), format = "%Y-%m-%d"), "%Y-%m-%d")
if(grepl(x = startDate, pattern = "-")) {
startDate <- gsub(x = startDate, pattern = "-", replacement = "")
cat("hyphens in startDate are being removed; date format is assumed to be %Y-%m-%d\n")
}
if(grepl(x = endDate, pattern = "-")) {
endDate <- gsub(x = endDate, pattern = "-", replacement = "")
cat("hyphens in endDate are being removed; date format is assumed to be %Y-%m-%d\n")
}
urlDL <- paste0("http://my.sfwmd.gov/dbhydroplsql/web_io.report_process?v_period=uspec&v_start_date=", startDate, "&v_end_date=", endDate, "&v_report_type=format6&v_target_code=file_csv&v_run_mode=onLine&v_js_flag=Y&v_db_request_id=5753707&v_where_clause=&v_dbkey=", dbkey, "&v_os_code=Unix&v_interval_count=5&v_cutover_datum=1")
# "http://my.sfwmd.gov/dbhydroplsql/web_io.report_process?v_period=uspec&v_start_date=" "&v_end_date=" "&v_report_type=format6&v_target_code=file_csv&v_run_mode=onLine&v_js_flag=Y&v_db_request_id=5753707&v_where_clause=&v_dbkey=" "&v_os_code=Unix&v_interval_count=5&v_cutover_datum=1"
fileLoc <- tempfile()
httr::GET(urlDL, httr::write_disk(fileLoc, overwrite = TRUE), httr::timeout(99999))
### TODO: identify instantaneous data (e.g., dbkey = "65086") and process differently
###
### 1. load file, check if line 3: stat = INST or MEAN
output_temp <- utils::read.csv(fileLoc, stringsAsFactors = FALSE, skip = 1, header = TRUE, row.names=NULL)
# row names are shifted right 2
if (output_temp$UNITS[1] == "DA") { # pretty fragile code right here.
dataType <- "daily"
} else if (output_temp$UNITS[1] == "BK") {
dataType <- "inst"
}
if (dataType %in% "inst") {
unitName <- output_temp$TYPE[1]
param <- output_temp$STATION[1]
# output_temp <- utils::read.csv(fileLoc, stringsAsFactors = FALSE, skip = 0, header = FALSE)
skip_arg <- min(which(output_temp[, 3] == dbkey)) ### this is a fragile approach to specifying number of columns to skip
colNames <- c("date", "code", "dbkey", "value", "blank", "QA") ### Fragile! These may change for other parameters or datasets
output_temp_2 <- utils::read.csv(fileLoc, stringsAsFactors = FALSE, skip = skip_arg, header = FALSE, row.names = NULL)
output <- output_temp_2[, 1:length(colNames)]
### remove blank rows (sometimes at start and end of dataset)
blank_rows <- which(rowSums(apply(MARGIN = 2, X = output, FUN = function(x) is.na(x) | grepl(x, pattern = '^\\s*$'))) == ncol(output))
output <- output[-blank_rows, ]
names(output) <- colNames
output <- output[, grep(x = names(output), pattern = "blank", invert = TRUE)] # remove empty column
output$parameter <- param
output$units <- unitName
### process dates
output$date <- as.POSIXct(as.character(tolower(output$date)), format = "%d-%b-%Y %X")
# output$date <- gsub(pattern = "([a-zA-Z]{2}-.*)", replacement = "\\L\\1", x = output$date, perl=TRUE)
# output$date <- as.POSIXct(strptime(output$date, format = "%d-%b-%Y"))
output$year <- as.numeric(substr(output$date, 1, 4))
output$mo <- as.numeric(substr(output$date, 6, 7))
output$day <- as.numeric(substr(output$date, 9, 10))
output$stn <- sapply(strsplit(x = output$code[1], split = "-"), "[[", 1)
### remove marginalia at bottom of file
pre_trim <- nrow(output)
output <- output[!is.na(output$value), ]
### TODO: optionally, replace "value" with name of parameter obtained from DBKey query?
output <- output[, c("stn", "date", "year", "mo", "day", "value", "parameter", "units", "QA", "dbkey")]
} else {
output_temp <- utils::read.csv(fileLoc, stringsAsFactors = FALSE, skip = 0, header = FALSE)
skip_arg <- min(which(output_temp[, 2] == dbkey)) - 2 ### this is potentially a fragile approach to specifying DBKEY column
# skip_arg <- min(which(output_temp[, 2] == dbkey)) - 3 ### this is potentially a fragile approach to specifying DBKEY column
output <- utils::read.csv(fileLoc, stringsAsFactors = FALSE, skip = skip_arg)
names(output) <- tolower(names(output))
names(output)[1] <- "stn"
names(output)[4] <- "value"
### process date data
output$date <- gsub(pattern = "([a-zA-Z]{2}-.*)", replacement = "\\L\\1", x = output$daily.date, perl=TRUE)
output$date <- as.POSIXct(strptime(output$date, format = "%d-%b-%Y"))
output$year <- as.numeric(substr(output$date, 1, 4))
output$mo <- as.numeric(substr(output$date, 6, 7))
output$day <- as.numeric(substr(output$date, 9, 10))
output$site <- gsub(pattern = "_.*", replacement = "\\1", x = output$stn, perl=TRUE) # remove anything after the underscore
### remove marginalia at bottom of file, as seen in 'a <- getDBHYDROhydro(dbkey = "VV474")'
pre_trim <- nrow(output)
output <- output[!is.na(output$date), ]
### TODO: optionally, replace "value" with name of parameter obtained from DBKey query?
output <- output[, c("stn", 'site', "date", "year", "mo", "day", "value")]
}
if (!nrow(output) == pre_trim) {
message(paste(pre_trim - nrow(output), "rows with date = NA trimmed from", dbkey, "dataset \n"))
}
unlink(fileLoc)
invisible(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.