Nothing
#' @keywords WRCC
#' @export
#' @import MazamaCoreUtils
#'
#' @title Create WRCC data dataframe
#'
#' @param tbl single site WRCC tibble created by \code{wrcc_clustering()}
#' @param meta WRCC meta datafra,e created by \code{wrcc_createMetaDataframe()}
#' @description After quality control has been applied to an WRCC tibble,
#' we can extract the PM2.5 values and store them in a \code{data} tibble
#' organized as time-by-deployment (aka time-by-site).
#'
#' The first column of the returned dataframe is named \code{'datetime'} and
#' contains a \code{POSIXct} time in UTC. Additional columns contain data
#' for each separate deployment of a monitor.
#'
#' @return A \code{data} dataframe for use in a \emph{ws_monitor} object.
wrcc_createDataDataframe <- function(tbl, meta) {
logger.debug(" ----- wrcc_createDataDataframe() ----- ")
# Sanity check -- tbl must have deploymentID
if ( !'deploymentID' %in% names(tbl) ) {
logger.error("No 'deploymentID' column found in 'tbl' tibble with columns: %s", paste0(names(tbl), collapse=", "))
stop(paste0("No 'deploymentID' column found in 'tbl' tibble. Have you run addClustering()?"))
}
# Sanity check -- tbl must have datetime
if ( !'datetime' %in% names(tbl) ) {
logger.error("No 'datetime' column found in 'tbl' tibble with columns: %s", paste0(names(tbl), collapse=", "))
stop(paste0("No 'datetime' column found in 'tbl' tibble."))
}
# Sanity check -- meta must have a monitorType
if ( !'monitorType' %in% names(meta) ) {
logger.error("No 'monitorType' column found in 'meta' dataframe with columns: %s", paste0(names(meta), collapse=", "))
stop(paste0("No 'monitorType' column found in 'meta' dataframe."))
}
monitorType <- unique(meta$monitorType)
# Sanity check -- only a single monitorType is allowed
if ( length(monitorType) > 1 ) {
logger.error("Multiple monitor types found in 'meta' dataframe: %s", paste0(monitorType, collapse=", "))
stop(paste0("Multiple monitor types found in 'meta' dataframe."))
}
# Create monitorID the same way we did in wrcc_createMetaDataframe()
# Should only have a single instrumentID
instrumentIDs <- sort(unique(meta$instrumentID))
if ( length(instrumentIDs) > 1 ) {
logger.warn('Multiple instrumentIDs encountered: %s', paste0(instrumentIDs,collapse=", "))
}
instrumentID <- instrumentIDs[1]
tbl$monitorID <- paste(as.character(tbl$deploymentID), instrumentID, sep='_')
if ( monitorType == 'EBAM' ) {
pm25Var <- 'ConcRT'
} else if ( monitorType == 'ESAM' ) {
pm25Var <- 'ConcRT'
} else {
logger.error("Dataframe creation is not supported for %s", monitorType)
stop(paste0("Dataframe creation is not supported for ", monitorType))
}
# Create minimal subset with the the variables we need for rows, columns and data
subTbl <- tbl[,c('datetime','monitorID',pm25Var)]
melted <- reshape2::melt(subTbl, id.vars=c('datetime','monitorID'), measure.vars=pm25Var)
# Unit conversion as needed (mg/m3 ==> ug/m3)
if ( monitorType == 'EBAM' ) melted$value <- melted$value * 1 # no conversion needed
if ( monitorType == 'ESAM' ) melted$value <- melted$value * 1 # no conversion needed
# Use median if multiple values are found
# Sanity check -- only one pm25DF measure per hour
valueCountPerCell <- reshape2::dcast(melted, datetime ~ monitorID, length)
maxCount <- max(valueCountPerCell[,-1])
if (maxCount > 1) logger.warn("Up to %s measurements per hour -- median used",maxCount)
# NOTE: The resulting dataframe is [datetime,monitorIDs] with monitorIDs in alphabetical order
pm25DF <- reshape2::dcast(melted, datetime ~ monitorID, stats::median)
# Reorder data columns to match the order of monitorIDs in 'meta'
pm25DF <- pm25DF[,c('datetime',meta$monitorID)]
# Create an empty hourlyDF dataframe with a full time axis (no missing hours)
datetime <- seq(min(tbl$datetime), max(tbl$datetime), by="hours")
hourlyDF <- data.frame(datetime=datetime)
# Merge pm25DF into the houlyDF dataframe, inserting NA's where necessary
# NOTE: dplyr returns objects of class "tbl_df" which can be confusing. We undo that.
data <- as.data.frame( dplyr::left_join(hourlyDF, pm25DF, by='datetime'), stringsAsFactors=FALSE )
logger.trace("Created 'data' dataframe with %d rows and %d columns", nrow(data), ncol(data))
return(data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.