R/checks.R

Defines functions check_path_TURN check_path_count_input check_quosures check_cols initial_check

Documented in initial_check

#' Check initial dataframe
#'
#' Throws errors if the initial dataframe of pathogen level data from
#'   BioFire database. If check doesn't pass subsequent functions my
#'   break in unexpected ways. If error was encountered and then problem
#'   solved, run again to see if additional problems are encountered
#'
#' @param df this is the raw dataframe to be fed into
#'   subsequent functions starting with the "pre_process"
#' @param target_PouchTitle Name of the PouchTitle of interest--insures
#'       it is present.
#' @return Either an error if problems are encountered, or "passed check"
#'   message
#' @examples
#' initial_check(rp_raw)
#' initial_check(rp_raw, target_PouchTitle = "Gastro_Intestinal")
#' @export

initial_check <- function(df,
                          target_PouchTitle = "Respiratory_Panel") {

    stopifnot(
        is.data.frame(df),
        is.character(target_PouchTitle),
        length(target_PouchTitle) == 1
    )

    # check for the existence of the required columns

    required_cols <- c('RunDataID', 'StartTime', 'InstrumentSerialNumber',
                       'FlaggedAsValidation', 'SiteID', 'ZipCode', 'TargetName',
                       'TargetShortName', 'AssayName', 'ResultType', 'TargetResult',
                       'AssayResult', 'Region', 'Country', 'InstrumentVersion',
                       'PouchTitle')

    cols <- names(df)
    cols_missing <- required_cols[!required_cols %in% cols]

    if (length(cols_missing > 0)) {
        stop(paste0("Expecting more columns. Columns missing:\n",
                    paste(cols_missing, collapse = "\n ")), call. = FALSE)
    }

    cols_extra <- cols[!cols %in% required_cols]

    if (length(cols_extra > 0)) {
        stop(paste0("Remove extra columns:\n", paste(cols_extra, collapse = "\n")),
             call. = FALSE)
    }

    # checking InstrumentVersion
    inst <- unique(df$InstrumentVersion)

    if(length(inst) != 3) {
        stop(paste0("Expecting 3 unique InstrumentVersion values, these provided:\n",
                    paste(inst, collapse = "\n")), call. = FALSE)
    }
    required_inst <- c("FA1.5", "FA 1.5", "FA2.0", "FA 2.0", "Torch")

    if(!all(inst %in% required_inst)) {
        stop(paste0("Unexpected instrument versions, only accept:\n",
                    paste(required_inst, collapse = "\n")), call. = FALSE)
    }

    # checking TargetName--must have control spelled out or later code breaks
    if(!any(stringr::str_detect(df$TargetName, "[Cc]ontrol"))) {
        stop("no control target names provided", call. = FALSE)
    }

    # checking PouchTitle
    if (all(df$PouchTitle != target_PouchTitle)) {
        stop("no ", target_PouchTitle," PouchTitle values present.\n",
             "values present:\n",
             paste0(unique(df$PouchTitle), collapse = "\n"),
             call. = FALSE
        )

    }

    if (all(df$PouchTitle == target_PouchTitle)) {
        stop("all PouchTitles are ", target_PouchTitle, " --include non ",
             target_PouchTitle,
             " tests also so proportion tests can be calculated",
             call. = FALSE)
    }

    # flagged as validation

    if(!all(df$FlaggedAsValidation %in% c(0, 1))) {
        stop("FlaggedAsValidation should only take on values of 0 and 1",
             call. = FALSE)
    }

    # ResultType
    if (!all(df$ResultType %in% c("organism", "control"))) {
        stop("ResultType expected to only have values of 'organism' and 'control'")
    }

    message("check passed")
}



# check column input ------------------------------------------------------

check_cols <- function(df, required_cols, name = NULL) {
    # args:
    #  df--dataframe
    #  required_cols--vector of column names the df should have
    #  name to add into warning message
    # returns:
    #   error if not all columns present
    stopifnot(is.data.frame(df),
              is.null(name) | is.character(name),
              is.character(required_cols))
    if(!all(required_cols %in% names(df))) {
        missing <- required_cols[!required_cols %in% names(df)]
        stop(paste0(name, " input requires following columns: \n",
                    paste(required_cols, collapse = "\n"),
                    "\nmissing columns:\n",
                    paste0(missing, collapse = "\n")))
    }
}
# check_cols(cars, letters[1:5])
# check_quosure input -----------------------------------------------------

check_quosures <- function(.vars) {
    # args:
    #   .vars--argument used in function, to check if it is actually quosure
    # returns:
    #   TRUE if all good otherwise throws error. strictly for use in other functions
    if (class(.vars)[1] != "quosures") {
        stop("arguments needs use vars() e.g. argument = vars(date, InstrumentVersion)")
    } else {TRUE}
}

# check input for other function ----------------------------------------------

check_path_count_input <- function(df) {
    # args:
    #   dataframe
    # returns:
    #   throws an error if input isn't in correct format to be used
    # by path_count_by_site function
    required_cols <- c("RunDataID", "SiteID", "TargetName", "date")

    check_cols(df, required_cols)

    stopifnot(lubridate::is.Date(df$date))

}



# check_path_TURN_sum -----------------------------------------------------

check_path_TURN <- function(df, group_vars) {
    # args:
    #   df--dataframe with TURN by pathogen and total pathogen
    #   group_vars--grouping vars. wrapped in vars()
    # returns:
    #   error if sum pathogen TURN doesn't equal total turn else TRUE
    check_quosures(group_vars)


    check_cols(df,
               required_cols = c(quo2char(group_vars), "path_Y_prime_3wma",
                                 "Y_prime_3wma", "epidate"),
               name = "check_path_TURN")

    group_vars2 <- c(vars(epidate), group_vars)
    check <- df %>%
        group_by(!!!group_vars2) %>%
        mutate(path_3wma_sum = sum(.data$path_Y_prime_3wma),
               diff = abs(.data$path_3wma_sum - .data$Y_prime_3wma))

    if(any(check$diff[!is.na(check$diff)] > 0.0001)) {
        stop("TURN of individuals doesn't sum to total TURN (only applies
             when co-detection pathogen is created)")
    }

    message("check passed")
}
MartinHoldrege/turnr documentation built on May 16, 2020, 10:39 a.m.