smwrQW: Tools for censored data analysis

Documented in importQW

#' Water-Quality Data
#'
#' Converts a dataset containing stacked discrete water-quality sample data to a
#'dataset representing those data as class "qw."
#'
#' Only \code{values} and \code{remark.codes} are required. All others can be
#'interpreted as constant values if the column name is not in \code{data}.  For
#'automatic generation of column names, see \code{\link{makeColNames}}.\cr
#' For \code{reporting.level}, it is better to use \code{NA} than to use an
#'arbitrary small value because the functions to convert to objects for analysis
#'will create reasonable reporting level values if they are not known.\cr
#' For other columns when the actual value is not known, the actual value 
#'is less important for analysis and more important for the user, so 
#'arbitrary values can be used.
#'
#' @param data the dataset.
#' @param keep the names of the columns that represent a single sample and any
#'other common information.
#' @param values the name of the column containing the reported numeric values.
#' @param remark.codes he name of the column containing the remark codes.
#' @param value.codes the name of the column containing any value modifiers, or
#'the character string to use. See \bold{Details}.
#' @param reporting.level the name of the column containing the reporting level,
#'or the numeric value to use. See \bold{Details}.
#' @param reporting.method the name of the column containing the type of the
#'reporting level, or the character string to use. See \bold{Details}.
#' @param reporting.units the name of the column containing the measurement
#'units, or the character string to use. See \bold{Details}.
#' @param analyte.method the name of the column containing the analytic method,
#'or the character string to use. See \bold{Details}.
#' @param analyte.name the name of the column containing the name of the
#'analyte, or the character string to use. See \bold{Details}.
#' @param unique.code the name of the column containing any unique codes, or the
#'character string to use. See \bold{Details}.
#' @param ColNames the name of the column containing the column name to use to
#'make column names, the character string to use if only a single analyte, or
#'"Auto" if the data in \code{unique.code} are USGS parameter codes. See
#'\bold{Details}.
#' @return A data frame containing the columns in \code{keep} and those
#'generated by converting the other columns into class "qw."
#' @note Need some notes
#'
#' @seealso \code{\link{readNWISqw}}, \code{\link{importNWISqw}},
#'\code{\link{makeColNames}}
#' @references Lorenz, D.L., 2014, USGSqw OFR.\cr See information about discrete
#'samples at \url{https://nwis.waterdata.usgs.gov/usa/nwis/qw}.
#' @keywords datasets IO
#' @examples
#'\dontrun{
#'# Convert the stacked qw data supplied in smwrData
#'library(smwrData)
#'data(QWstacked)
#'QWstacked$result_va <- as.numeric(QWstacked$result_va) # raw data are character
#'# The units are both mg/l, no analyte name, generate column names from parameter codes
#'head(importQW(QWstacked, c("site_no", "sample_dt", "sample_tm", "medium_cd"),
#'  "result_va", "remark_cd", "val_qual_tx", "rpt_lev_va", "rpt_lev_cd", 
#'  "mg/l", "meth_cd", "", "parm_cd", "parm_cd"))
#'# A simple example having the minimum information
#'data(QW05078470)
#'importQW(QW05078470, c("DATES", "TIMES"), "P00665", "R00665", "", 0.005, "User", 
#' "mg/l", "Unk", "Dissolved Phosporus", "00665", "DissP")
#'}
#' @export
importQW <- function(data, keep=c("STAID", "DATES", "TIMES", "MEDIM"),
                     values="VALUE", remark.codes="REMRK",
                     value.codes="NWIS", reporting.level="RPLEV", 
                     reporting.method="RLTYP", reporting.units="UNITS",
                     analyte.method="METHD", analyte.name="PSNAM", 
                     unique.code="PCODE", ColNames="Auto") {
  ## Coding history:
  ##    2011Mar14 DLLorenz Original Coding
  ##    2012Oct16 DLLorenz Revision to class "qw"
  ##    2012Dec28 DLLorenz Roxygenized
  ##    2012Dec28          This version
  ## 
  if(missing(data) || class(data) != "data.frame")
    stop("importQW requires an existing dataset.")
  ## Check to generate column names and value codes
  if(ColNames == "Auto") {
    params <- unique(data[[unique.code]])
    Extra <- pcodeNWISqw(params, group=FALSE, name=FALSE, CASRN=FALSE,
                       short=FALSE, units=FALSE, col.name=TRUE)
    data <- merge(data, Extra, by.x=unique.code, by.y="parameter_cd")
    ColNames <-  "col_name"
  } else if(is.null(data[[ColNames]])) { # must be a single column of data
    data <- cbind(data, .col_name=make.names(ColNames))
    ColNames <- ".col_name"
  } else { # Force valid names
    data[[ColNames]] <- make.names(data[[ColNames]])
  }
  if(value.codes == "NWIS") {
    ## paste the 3 columns of values codes
    data$NWIS <- paste(data$QUAL1, data$QUAL2, data$QUAL3, sep='')
  }
  ## Get all of the data needed for class "qw"
  ## The columns values and remark codes are required
  values <- as.numeric(data[[values]])
  remark.codes <- as.character(data[[remark.codes]])
  if(!is.null(data[[value.codes]]))
     value.codes <- as.character(data[[value.codes]])
  if(is.character(reporting.level) && !is.null(data[[reporting.level]])) {
    reporting.level <- as.numeric(data[[reporting.level]]) # force numeric
  } else {
  	reporting.level <- as.numeric(reporting.level) # for NA to numeric if necessary
  }
  if(!is.null(data[[reporting.method]]))
    reporting.method <- as.character(data[[reporting.method]])
  if(!is.null(data[[reporting.units]]))
    reporting.units <- as.character(data[[reporting.units]])
  if(!is.null(data[[analyte.method]]))
    analyte.method <- as.character(data[[analyte.method]])
  if(!is.null(data[[analyte.name]]))
    analyte.name <- as.character(data[[analyte.name]])
  if(!is.null(data[[unique.code]]))
    unique.code <- as.character(data[[unique.code]])
  ## If qw not part of data, then error generated  by later subset
  ##  becuase the names attribute lengths do not match!
  data$qw <- as.qw(values, remark.codes, value.codes, reporting.level, 
                   reporting.method, reporting.units, analyte.method,
                   analyte.name, unique.code)
  ## The function group2row cannot handle complicated data structures like qw
  ##  work around by creating index to values and then extract the actual data
  ## See importNWISqw too
  data$Seq <- seq(nrow(data))
  ## Create the data
  retval <- group2row(data, keep, ColNames, "Seq")
  ## Sort by date if possible
  Dt <- which(sapply(retval, isDateLike))
  if(length(Dt) == 1L) { # Found one
  	Seq <- order(retval[[Dt]], na.last=TRUE)
  	retval <- retval[Seq,]
  }
  for(i in grep(".Seq", names(retval), value=TRUE, fixed=TRUE))
    retval[[i]] <- data$qw[retval[[i]]]
  names(retval) <- gsub(".Seq", "", names(retval), fixed=TRUE)
  return(retval)
}