#' Check initial dataframe
#'
#' Throws errors if the initial dataframe of pathogen level data from
#' BioFire database. If check doesn't pass subsequent functions my
#' break in unexpected ways. If error was encountered and then problem
#' solved, run again to see if additional problems are encountered
#'
#' @param df this is the raw dataframe to be fed into
#' subsequent functions starting with the "pre_process"
#' @param target_PouchTitle Name of the PouchTitle of interest--insures
#' it is present.
#' @return Either an error if problems are encountered, or "passed check"
#' message
#' @examples
#' initial_check(rp_raw)
#' initial_check(rp_raw, target_PouchTitle = "Gastro_Intestinal")
#' @export
initial_check <- function(df,
target_PouchTitle = "Respiratory_Panel") {
stopifnot(
is.data.frame(df),
is.character(target_PouchTitle),
length(target_PouchTitle) == 1
)
# check for the existence of the required columns
required_cols <- c('RunDataID', 'StartTime', 'InstrumentSerialNumber',
'FlaggedAsValidation', 'SiteID', 'ZipCode', 'TargetName',
'TargetShortName', 'AssayName', 'ResultType', 'TargetResult',
'AssayResult', 'Region', 'Country', 'InstrumentVersion',
'PouchTitle')
cols <- names(df)
cols_missing <- required_cols[!required_cols %in% cols]
if (length(cols_missing > 0)) {
stop(paste0("Expecting more columns. Columns missing:\n",
paste(cols_missing, collapse = "\n ")), call. = FALSE)
}
cols_extra <- cols[!cols %in% required_cols]
if (length(cols_extra > 0)) {
stop(paste0("Remove extra columns:\n", paste(cols_extra, collapse = "\n")),
call. = FALSE)
}
# checking InstrumentVersion
inst <- unique(df$InstrumentVersion)
if(length(inst) != 3) {
stop(paste0("Expecting 3 unique InstrumentVersion values, these provided:\n",
paste(inst, collapse = "\n")), call. = FALSE)
}
required_inst <- c("FA1.5", "FA 1.5", "FA2.0", "FA 2.0", "Torch")
if(!all(inst %in% required_inst)) {
stop(paste0("Unexpected instrument versions, only accept:\n",
paste(required_inst, collapse = "\n")), call. = FALSE)
}
# checking TargetName--must have control spelled out or later code breaks
if(!any(stringr::str_detect(df$TargetName, "[Cc]ontrol"))) {
stop("no control target names provided", call. = FALSE)
}
# checking PouchTitle
if (all(df$PouchTitle != target_PouchTitle)) {
stop("no ", target_PouchTitle," PouchTitle values present.\n",
"values present:\n",
paste0(unique(df$PouchTitle), collapse = "\n"),
call. = FALSE
)
}
if (all(df$PouchTitle == target_PouchTitle)) {
stop("all PouchTitles are ", target_PouchTitle, " --include non ",
target_PouchTitle,
" tests also so proportion tests can be calculated",
call. = FALSE)
}
# flagged as validation
if(!all(df$FlaggedAsValidation %in% c(0, 1))) {
stop("FlaggedAsValidation should only take on values of 0 and 1",
call. = FALSE)
}
# ResultType
if (!all(df$ResultType %in% c("organism", "control"))) {
stop("ResultType expected to only have values of 'organism' and 'control'")
}
message("check passed")
}
# check column input ------------------------------------------------------
check_cols <- function(df, required_cols, name = NULL) {
# args:
# df--dataframe
# required_cols--vector of column names the df should have
# name to add into warning message
# returns:
# error if not all columns present
stopifnot(is.data.frame(df),
is.null(name) | is.character(name),
is.character(required_cols))
if(!all(required_cols %in% names(df))) {
missing <- required_cols[!required_cols %in% names(df)]
stop(paste0(name, " input requires following columns: \n",
paste(required_cols, collapse = "\n"),
"\nmissing columns:\n",
paste0(missing, collapse = "\n")))
}
}
# check_cols(cars, letters[1:5])
# check_quosure input -----------------------------------------------------
check_quosures <- function(.vars) {
# args:
# .vars--argument used in function, to check if it is actually quosure
# returns:
# TRUE if all good otherwise throws error. strictly for use in other functions
if (class(.vars)[1] != "quosures") {
stop("arguments needs use vars() e.g. argument = vars(date, InstrumentVersion)")
} else {TRUE}
}
# check input for other function ----------------------------------------------
check_path_count_input <- function(df) {
# args:
# dataframe
# returns:
# throws an error if input isn't in correct format to be used
# by path_count_by_site function
required_cols <- c("RunDataID", "SiteID", "TargetName", "date")
check_cols(df, required_cols)
stopifnot(lubridate::is.Date(df$date))
}
# check_path_TURN_sum -----------------------------------------------------
check_path_TURN <- function(df, group_vars) {
# args:
# df--dataframe with TURN by pathogen and total pathogen
# group_vars--grouping vars. wrapped in vars()
# returns:
# error if sum pathogen TURN doesn't equal total turn else TRUE
check_quosures(group_vars)
check_cols(df,
required_cols = c(quo2char(group_vars), "path_Y_prime_3wma",
"Y_prime_3wma", "epidate"),
name = "check_path_TURN")
group_vars2 <- c(vars(epidate), group_vars)
check <- df %>%
group_by(!!!group_vars2) %>%
mutate(path_3wma_sum = sum(.data$path_Y_prime_3wma),
diff = abs(.data$path_3wma_sum - .data$Y_prime_3wma))
if(any(check$diff[!is.na(check$diff)] > 0.0001)) {
stop("TURN of individuals doesn't sum to total TURN (only applies
when co-detection pathogen is created)")
}
message("check passed")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.