#' Assign precision Data Quality Levels (DQLs) to continuous data
#'
#' This function assigns a precision data quality level to parameters measured in the field using replicate measurements
#' such as field duplicates, field audits, or split samples. The data quality levels are assigned following DEQ’s Data Quality Matrix (DEQ, 2013).
#' The data quality matrix defines the accuracy and precision criteria for equipment calibration and field audits respectively respectively.
#'
#' use \code{\link{dql_accuracy}} to assign the data quality level using calibration verifications and/or pre- and post-deployment checks.
#'
#' Inputs into this function must retrieved from Oregon DEQ's continuous data submission template xlsx file v2.03.
#' Use \code{\link{contin_import}} to get the list that hold the data frames used for this function's inputs.
#'
#' Oregon Department of Environmental Quality (DEQ). 2013. "Data validation criteria for water quality parameters measured in the field.
#' DEQ04-LAB-0003-QAG Version5.0." \url{http://www.oregon.gov/deq/FilterDocs/DataQualMatrix.pdf}
#'
#' @param audits Data frame of the audit data generated using \code{\link{contin_import}}.
#' @param results Data frame of the results data generated using \code{\link{contin_import}}.
#' @param deployment Data frame of the deployment data generated using \code{\link{contin_import}}.
#' @param audits_only Boolean to indicate if the audit data frame should be returned with new columns for the audit sample DQL,
#' corresponding result value, result units, absolute difference between the result and audit. Default is FALSE.
#' @export
#' @return Vector of the precision DQL indexed in the same order as the result input. Or if audit_only=TRUE the audit data frame with the precision DQL for each audit.
dql_precision <- function(audits, results, deployment, audits_only=FALSE) {
#results=df3.results
#audits=df3.audits
#deployment=df1.deployment
if(nrow(audits)==0 & !audits_only) {
print("There are no audit data, precDQL = E")
results$precDQL <- "E"
return(results$precDQL)
}
if(nrow(audits)==0 & audits_only) {
warning("There are no audit data")
}
audit.cols <- names(audits)
conqc <- odeqcdr::conqc
# join results and deployments
df.results <- results %>%
dplyr::mutate(row.in=dplyr::row_number()) %>%
dplyr::rename(Logger.datetime = datetime,
Logger.Result.Value = Result.Value,
Logger.Result.Status.ID = Result.Status.ID,
Logger.Result.Comment = Result.Comment) %>%
dplyr::left_join(deployment, by=c("Monitoring.Location.ID", "Equipment.ID",
"Characteristic.Name")) %>%
dplyr::mutate(deployed=dplyr::if_else(Logger.datetime >= Deployment.Start.Date &
Logger.datetime <= Deployment.End.Date, TRUE, FALSE))
df.audits.grab <- audits %>%
dplyr::filter(Sample.Collection.Method %in% c("Composite","Field Meter","Grab","Staff Gage")) %>%
dplyr::select(audit.datetime.start, Project.ID, Monitoring.Location.ID,
Characteristic.Name, Equipment.ID, Result.Value, Result.Unit,
Result.Status.ID, row.audits)
# Grade the grab sample Audits
df.audits.grab.dql <- results %>%
dplyr::select(Monitoring.Location.ID, Characteristic.Name, Equipment.ID,
Logger.datetime = datetime,
Logger.Result.Value = Result.Value,
Result.Unit) %>%
dplyr::full_join(df.audits.grab, by=c("Monitoring.Location.ID", "Characteristic.Name", "Equipment.ID", "Result.Unit")) %>%
dplyr::group_by(Monitoring.Location.ID, Characteristic.Name, Equipment.ID, row.audits) %>%
dplyr::slice(which.min(abs(Logger.datetime - audit.datetime.start))) %>%
dplyr::mutate(diff.minutes=abs(as.numeric(difftime(Logger.datetime, audit.datetime.start, units="mins")))) %>%
dplyr::left_join(conqc, by=c("Characteristic.Name", "Result.Unit")) %>%
dplyr::mutate(diff.Result=abs(Logger.Result.Value - Result.Value),
precDQL=dplyr::case_when(diff.Result < prec_A & diff.minutes <=30 ~ "A",
diff.Result < prec_B & diff.minutes <=30 ~ "B",
diff.Result >= prec_B & diff.minutes <=30 ~ "C",
diff.minutes > 30 ~ "E",
TRUE ~ "E")
) %>%
dplyr::ungroup() %>%
dplyr::mutate(rDQL=precDQL) %>%
dplyr::select(Logger.datetime, Logger.Result.Value, dplyr::any_of(audit.cols), diff.minutes, diff.Result,
precDQL, rDQL, row.audits) %>%
as.data.frame()
if(audits_only) {
df.audits.grab.dql2 <- df.audits.grab.dql %>%
dplyr::select(Logger.datetime, diff.minutes, Logger.Result.Value, Result.Value, diff.Result, precDQL, rDQL, row.audits)
df.audits.return <- audits %>%
dplyr::select(-precDQL, -rDQL, -Result.Value) %>%
dplyr::left_join(df.audits.grab.dql2, by=c("row.audits")) %>%
as.data.frame()
return(df.audits.return)
}
audits_deploy <- df.audits.grab.dql %>%
dplyr::filter(!Result.Status.ID=="Rejected") %>%
dplyr::mutate(audits_deploy = paste0("[",Monitoring.Location.ID," - ",Equipment.ID," - ",Characteristic.Name,"]")) %>%
dplyr::pull(audits_deploy) %>%
unique()
df.results.grade <- df.audits.grab.dql %>%
dplyr::filter(!Result.Status.ID=="Rejected" | diff.minutes > 30) %>%
dplyr::select(Monitoring.Location.ID, Equipment.ID, Characteristic.Name,
Logger.Result.Value, Result.Unit, Logger.datetime, DQL_prec=precDQL) %>%
dplyr::arrange(Monitoring.Location.ID, Equipment.ID, Characteristic.Name, Logger.datetime) %>%
dplyr::group_by(Monitoring.Location.ID, Equipment.ID, Characteristic.Name) %>%
# choose lower grade in order of index
dplyr::mutate(DQL_prec2=pmax(DQL_prec, dplyr::lead(DQL_prec, n=1), na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::right_join(df.results, by = c("Monitoring.Location.ID", "Equipment.ID",
"Characteristic.Name", "Logger.Result.Value",
"Result.Unit", "Logger.datetime")) %>%
# Keep original calculated audit grade for datetime of audit. The rest will be filled in from lower grade in DQL_prec2
dplyr::mutate(results_deploy=paste0("[",Monitoring.Location.ID," - ",Equipment.ID," - ", Characteristic.Name,"]"),
precDQL=DQL_prec) %>%
dplyr::group_by(Monitoring.Location.ID, Equipment.ID, Characteristic.Name) %>%
dplyr::arrange(Monitoring.Location.ID, Equipment.ID, Characteristic.Name, Logger.datetime) %>%
tidyr::fill(DQL_prec2, .direction = "downup") %>%
dplyr::ungroup() %>%
dplyr::mutate(precDQL=dplyr::if_else(is.na(precDQL), DQL_prec2, precDQL)) %>%
# set precDQL=E outside of deployment period and for any deployments without audits
dplyr::mutate(precDQL=dplyr::if_else(deployed, precDQL, "E"),
precDQL=dplyr::case_when(results_deploy %in% audits_deploy ~ precDQL,
TRUE ~ "E")) %>%
dplyr::group_by(row.in) %>%
# if there are two audits for the same sample take the lowest precDQL
dplyr::mutate(precDQL=max(precDQL, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::distinct(Monitoring.Location.ID, Equipment.ID, Characteristic.Name, row.in, precDQL) %>%
dplyr::arrange(row.in) %>%
as.data.frame()
return(df.results.grade$precDQL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.