#' Single function to call when extracting variables from each monitor.
#'
#' @description
#' When following the recommended use of \code{\link{read_monitor}}
#' (data frame listing files and types, iterate with `purrr::map2` to read),
#' the resulting object will include a list column containing onw data frame
#' with the data for each file (nested data).
#'
#' `collect_data` processes the nested data and produces a data frame with all
#' observations of `date_time`, `concentration` and `flow` from all the read
#' files.
#'
#' Additional columns should be included ad a key to identify the data from each
#' instrument in the resulting data frame.
#'
#' @details
#'
#' Since some monitors can potentially produce files with record date/times
#' using different date formats (e.g. d/m/y, m/d/y, y/m/d) this function
#' tries to guess the correct format but can sometimes miss.
#' It is recommended to provide a reference date for each file so you can check
#' whether date/times were correctly interpreded.
#'
#' There is no clear column in the UPAS files to extract the concentration data,
#' for now this functions returns `NA` for this concentration.
#'
#' @param .data Dataframe containing a nested column with the data from each
#' file. Ussually a product of iterating `read_monitor` using `purrr::map2`.
#' @param type Type of monitor with which the data was recorded.
#' Use "ecm-full" to get all variable available in the ECM files.
#' @param data_col Column containing the nested data.
#' @param ... Additional columns present in `.data` to preserve in the output.
#' Useful to define the key to identify each data set.
#' @return A data frame with `type`, specified key columns and collected
#' measures for `date_time`, `concentration` and `flow`.
#' @export
#' @examples
#' \dontrun{
#' # Load required packages
#' library(package = "pemr")
#'
#' # Define files and types. This can be any data frame, so composing data in a
#' # spreadsheet and reading it in works too
#' data_frame(
#' files_col = c("path/to/file1.csv", "path/to/file2.csv"),
#' types_col = c("ecm", "upas")
#' ) %>%
#' mutate(
#' data = map2(files_col, types_col, read_monitor)
#' ) %>%
#' collect_data(
#' type = types_col, data_col = data,
#' files_col
#' )
#' }
collect_data <- function(.data, type, data_col, ...){
# Work with tidy evaluation
quo_type <- enquo(type)
quo_data_col <- enquo(data_col)
vars <- quos(...)
#----------------------------------------------------------------------------*
# Do for each type of monitor ----
#----------------------------------------------------------------------------*
monitor_types <- .data %>% pull(!!quo_type) %>% unique
if(length(monitor_types) > 1){
# Get data for each monitor type
monitor_data <- .data %>%
split(.[, rlang::quo_name(quo_type)]) %>%
map(
collect_data, !!quo_type, !!quo_data_col, !!!vars
) %>%
bind_rows()
} else {
# Pick function
data_em_ <- switch(
monitor_types,
"ecm-raw" = data_em_ecm_raw,
"ecm" = data_em_ecm,
"ecm-full" = data_em_ecm_full,
"patsp" = data_em_patsp,
"upas" = data_em_upas,
"sums" = data_em_na,
data_em_na
)
monitor_data <- .data %>%
unnest(!!quo_data_col) %>%
data_em_(.data = ., type = quo_type, vars = vars)
}
return(monitor_data)
}
#------------------------------------------------------------------------------*
# Extract variables of interest for each monitor ----
#------------------------------------------------------------------------------*
# ecm-raw
data_em_ecm_raw <- function(.data, type, vars){
.data %>%
# Force missing variables
mutate(
date_time = as.POSIXct(NA),
concentration = NA_real_,
flow = NA_real_
) %>%
# Keep monitor type, any requested variables, and data variables
select(
!!!type, !!!vars,
date_time, concentration, flow
)
}
# unknown / not applicable
data_em_na <- function(.data, type, vars){
.data %>%
# Force missing variables
mutate(
date_time = as.POSIXct(NA),
concentration = NA_real_,
flow = NA_real_
) %>%
# Keep monitor type, any requested variables, and data variables
select(
!!!type, !!!vars,
date_time, concentration, flow
)
}
# ecm-processed
data_em_ecm <- function(.data, type, vars){
.data %>%
# Keep monitor type, any requested variables, and data variables
select(
!!!type, !!!vars,
date_time, concentration = `RH-Corrected Nephelometer`, flow = Flow
)
}
# Rename all ecm variables
ecm_vars <- c(
date = "Date",
time = "Time",
concentration = "RH-Corrected Nephelometer",
conc_hr = "RH-Corrected Nephelometer HR",
temperature = "Temp",
relative_humidity = "RH",
battery = "Battery",
inlet_pressure = "Inlet Press",
orifice_pressure = "Orifice Press",
flow = "Flow",
x_axis = "X-axis",
y_axis = "Y-axis",
z_axis = "Z-axis",
accelerometer = "Vector Sum Composite",
shutdown = "ShutDownReason",
wc = "Wearing Compliance",
vwc = "ValidityWearingCompliance validation"
)
# ecm-processed-full
data_em_ecm_full <- function(.data, type, vars){
.data %>%
# Keep monitor type, any requested variables, and data variables
select(!!!type, !!!vars, !!!ecm_vars)
}
# upas
data_em_upas <- function(.data, type, vars){
.data %>%
# Force missing variables
mutate(
concentration = NA
) %>%
# Keep monitor type, any requested variables, and data variables
select(
!!!type, !!!vars,
date_time, concentration = atmoRho, flow = volflow
)
}
# pats+
data_em_patsp <- function(.data, type, vars){
.data %>%
# Force missing variables
mutate(
flow = NA_real_,
PM_Estimate = as.numeric(PM_Estimate)
) %>%
# Keep monitor type, any requested variables, and data variables
select(
!!!type, !!!vars,
date_time, concentration = PM_Estimate, flow
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.