#'@name clean_wq
#'@title Clean raw water quality DBHYDRO data retrievals
#'@description Removes extra columns associated with QA flags and QA blanks
#' which are used to check on potential sources of contamination. If raw is set
#' to TRUE, \code{\link{get_wq}} results are converted from long (each piece of
#' data on its own row) to \code{wide} format (each site x variable combination
#' in its own column).
#'@aliases cleanwq
#'@export
#'@importFrom reshape2 dcast
#'@param dt data.frame output of \code{\link{getwq}}
#'@param raw logical default is FALSE, set to TRUE to return data in "long"
#' format with all comments, qa information, and database codes included
#'@param mdl_handling character string specifying the handling of measurement
#' values below the minimum detection limit (MDL). Example choices for this
#' argument include:
#'\itemize{
#'\item \code{raw}: Returns values exactly as they are stored in the database.
#' Current practice is to return values below the MDL as 0 minus the uncertainty
#' estimate.
#'\item \code{half}: Returns values below the MDL as half the MDL
#'\item \code{full}: Returns values below the MDL as the MDL
#'}
#'
#'@examples \dontrun{
#'#check handling of values below MDL
#' dt <- getwq("FLAB01", "2014-09-14", "2014-09-18", "NITRATE+NITRITE-N",
#' raw = TRUE)
#' clean_wq(dt, mdl_handling = "raw")
#' clean_wq(dt, mdl_handling = "half")
#'}
#'
#'dt <- read.csv(system.file("extdata", "testwq.csv", package = "dbhydroR"))
#'clean_wq(dt)
clean_wq <- function(dt, raw = FALSE, mdl_handling = "raw"){
if(!(mdl_handling %in% c("raw", "half", "full"))){
stop("mdl_handling must be one of 'raw', 'half', or 'full'")
}
dt <- dt[dt$Matrix != "DI",]
dt$date <- as.POSIXct(strptime(dt$Collection_Date, format = "%d-%b-%Y"),
tz = "America/New_York")
correct_mdl <- function(dt, mdl_handling){
if(any(dt$Value < 0 & !is.na(dt$Value)) & mdl_handling != "raw"){
if(mdl_handling == "half"){
dt[dt$Value < 0 & !is.na(dt$Value), "Value"] <-
dt[dt$Value < 0 & !is.na(dt$Value), "MDL"] / 2
}else{
dt[dt$Value < 0 & !is.na(dt$Value), "Value"] <-
dt[dt$Value < 0 & !is.na(dt$Value), "MDL"]
}
}
dt
}
dt <- correct_mdl(dt, mdl_handling)
if(raw == TRUE){
dt
}else{
dt <- dt[,c(1:23, which(names(dt) == "date"))]
dwide <- reshape2::dcast(dt, date ~ Station.ID + Test.Name + Units,
value.var = "Value", add.missing = TRUE, fun.aggregate = mean)
#if(any(names(dwide)=="_")){dwide<-dwide[,-which(names(dwide)=="_")]}
# if(ncol(dwide) > 2){
# dwide <- dwide[,-2]
# }
if(nrow(dwide[is.na(dwide[,1]),]) > 0){
dwide <- dwide[-which(is.na(dwide[,1])),]
}
dwide
}
}
#'@export
cleanwq <- function(dt, raw = FALSE, mdl_handling = "raw"){
.Deprecated("clean_wq")
clean_wq(dt = dt, raw = raw, mdl_handling = mdl_handling)
}
#'@name clean_hydro
#'@title Clean raw hydrologic DBHYDRO data retrievals
#'@description Converts output of \code{\link{get_hydro}} from long (each piece
#' of data on its own row) to wide format (each site x variable combination in
#' its own column). Metadata (station-name, variable, measurement units) is
#' parsed so that it is wholly contained in column names.
#'@aliases cleanhydro
#'@export
#'@importFrom reshape2 dcast
#'@param dt data.frame output of \code{\link[dbhydroR]{gethydro}}
#'@examples
#'\dontrun{
#'clean_hydro(gethydro(dbkey = "15081", date_min = "2013-01-01", date_max = "2013-02-02", raw = TRUE))
#'}
clean_hydro <- function(dt){
reshape2::dcast(dt, date ~ station + type + units, value.var = "data.value",
add.missing = TRUE, fun.aggregate = mean)
}
cleanhydro <- function(dt){
.Deprecated("clean_hydro")
clean_hydro(dt = dt)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.