R/check_grain.R

Defines functions check_grain

Documented in check_grain

#' @title
#'   Determine the grain of each resistance outcome in timber and validate the grains' fields
#'
#' @description
#'   The \code{check_grain()} function checks that a resistance outcome format
#'   is specified, determines its grain and calls functions to validate the
#'   grains' fields. See details for more information.
#'
#' @param timber
#'   A tibble of timber.
#'
#' @details
#'
#'   The odds ratio (and its confidence intervals) are the universal measure of
#'   CEDAR and sawmill. A resistance outcome may be specified using the odds ratio
#'   directly, or by using a two-by-two table of counts or prevalences. sawmill
#'   supports four resistance outcome formats:
#'
#'   \itemize{
#'            \item{Contingency table}
#'            \item{Prevalence table}
#'            \item{Odds ratio}
#'            \item{Risk ratio}
#'   }
#'
#'
#'   \subsection{Resistance Outcome Formats}{
#'
#'      There are two types of two-by-two tables supported by sawmill:
#'      contingency tables (containing count data), and prevalence tables
#'      (containing prevalence (%) data).
#'
#'      \emph{Contingency Table}
#'
#'      \tabular{lccc}{
#'                 \tab AMR+ \tab AMR- \tab Total \cr
#'        Exposed  \tab A    \tab B    \tab M1    \cr
#'        Referent \tab C    \tab D    \tab M2    \cr
#'      }
#'
#'      \emph{Prevalence Table}
#'
#'      \tabular{lccc}{
#'                 \tab AMR+ \tab AMR- \tab Total \cr
#'        Exposed  \tab P\%  \tab R\%  \tab M1    \cr
#'        Referent \tab Q\%  \tab S\%  \tab M2    \cr
#'      }
#'
#'      There are two types of measures of association supported by sawmill:
#'      odds ratios and risk ratios.
#'
#'      \emph{Odds Ratio}
#'
#'      \tabular{lccc}{
#'        \tab Lower CI \tab Odds Ratio \tab Upper CI   \cr
#'        \tab odds_lo  \tab odds       \tab odds_up    \cr
#'      }
#'
#'      \emph{Risk Ratio}
#'
#'      \tabular{lccc}{
#'        \tab Lower CI  \tab Risk Ratio \tab Upper CI  \cr
#'        \tab rratio_lo \tab rratio     \tab rratio_up \cr
#'      }
#'
#'   }
#'
#'   \subsection{Supported Grains}{
#'
#'     A 'grain' is a set of fields from which we can derive an odds ratio and
#'     its confidence intervals. For tabular data, supported grains are a set of
#'     fields from which we can first compute a complete table of counts (A, B,
#'     C, D), then compute an odds ratio and its confidence intervals.
#'
#'     There are currently seven supported grains:
#'
#'     \itemize{
#'            \item{con_table_pos_neg: A, B, C, D}
#'            \item{con_table_pos_tot: A, C, M1, M2}
#'            \item{con_table_neg_tot: A, C, M1, M2}
#'            \item{prev_table_pos_tot: P, Q, M1, M2}
#'            \item{prev_table_neg_tot: R, S, M1, M2}
#'            \item{odds_ratio: odds_ratio_lo, odds_ratio, odds_ratio_up}
#'            \item{risk_ratio: risk_ratio_lo, risk_ratio, risk_ratio_up}
#'     }
#'   }
#'
#'   \subsection{Unsupported Grains}{
#'
#'     An 'unsupported grain' is a set of fields from which we cannot derive an
#'     odds ratio and it's confidence intervals. These include grains where any
#'     requisite fields are NA.
#'
#'     The following grains are explicitly unsupported:
#'
#'     \itemize{
#'            \item{prev_table_pos_neg: P, Q, R, S}
#'     }
#'   }
#'
#'   \subsection{sawmill fields}{
#'
#'     TODO: Link to documentation on sawmill fields.
#'    }
#'
#' @return
#'   A tibble of timber with sawmill status fields and additional field
#'   \emph{grain}. Where checks fail, grain is NA.
#'
#' @importFrom dplyr case_when mutate rowwise ungroup
#' @importFrom magrittr %>%
#'
#' @export



check_grain <- function(timber) {
  # Check if columns exist; if not, create them. -------------------------------

  # The user must supply the resistance outcome type.
  # If `res_format` does not exist, stop.
  if (!("res_format" %in% names(timber))) {
    stop("timber is missing column: res_format.", call. = FALSE)
  }

  # The `grain` is sawmill's interpretation of the resistance outcome sub-type.
  # Column `grain` is created through `mutate()` later; it is specified
  # explicitly here for clarity and to preserve preferred column order.
  if (!("grain" %in% names(timber))) {
    timber[, "grain"] <- NA_character_
  }

  # # Column `sawmill_pass` indicates whether sawmill should operate on the
  # # resistance outcome. Set `sawmill_pass = FALSE` after other co-occurring
  # # events during check failure.
  # if (!('sawmill_pass' %in% names(timber))) {
  #   timber[ , 'sawmill_pass'] <- TRUE
  #   message("Column 'sawmill_pass' did not exist and was created.")
  # }
  #
  # # Column `sawmill_status` indicates the current sawmill status.
  # # Set `sawmill_status` after every sawmill operation.
  # if (!('sawmill_status' %in% names(timber))) {
  #   timber[ , 'sawmill_status'] <- 'Initialized.'
  #   message("Column 'sawmill_status' did not exist and was created.")
  # }


  # Check if `res_format` is a supported grain --------------------------------

  # Define supported `res_formats`.
  supported_odds <- c("Odds Ratio", "odds_ratio")
  supported_risk <- c("Risk Ratio", "risk_ratio")
  supported_cont <- c("Contingency Table", "cont_table")
  supported_prev <- c("Prevalence Table", "prev_table")

  # Standardize `res_format` in `grain`.
  timber <-
    timber %>%
    dplyr::mutate(
      grain = dplyr::case_when(
        res_format %in% supported_odds ~ "odds_ratio",
        res_format %in% supported_risk ~ "risk_ratio",
        res_format %in% supported_cont ~ "cont_table",
        res_format %in% supported_prev ~ "prev_table",
        TRUE ~ NA_character_
      )
    )

  # Check if `grain` is NA (i.e., `res_format` was not a supported grain).
  EMSG <- "Error: resistance outcome format not specified or supported."
  timber <-
    timber %>%
    dplyr::mutate(
      sawmill_status = dplyr::if_else(is.na(grain), EMSG, sawmill_status),
      sawmill_pass   = dplyr::if_else(is.na(grain), TRUE, sawmill_pass)
    )

  # Run validation for each grain.
  # Set sawmill_pass last to report status and grain.
  timber <-
    timber %>%
    dplyr::rowwise() %>%
    dplyr::mutate(
      # Validate odds ratios.
      sawmill_status = ifelse(sawmill_pass & grain == "odds_ratio",
        validate_odds_ratio(odds, oddslo, oddsup)[[2]],
        sawmill_status
      ),
      sawmill_pass = ifelse(sawmill_pass & grain == "odds_ratio",
        validate_odds_ratio(odds, oddslo, oddsup)[[1]],
        sawmill_pass
      ),
      # Validate risk ratios.
      sawmill_status = ifelse(sawmill_pass & grain == "risk_ratio",
        validate_risk_ratio(rratio, rratio_lo, rratio_up)[[2]],
        sawmill_status
      ),
      sawmill_pass = ifelse(sawmill_pass & grain == "risk_ratio",
        validate_risk_ratio(rratio, rratio_lo, rratio_up)[[1]],
        sawmill_pass
      ),
      # Validate contingency tables and refine grain.
      sawmill_status = ifelse(sawmill_pass & grain == "cont_table",
        validate_contingency_table(A, B, C, D, nexp, nref)[[2]],
        sawmill_status
      ),
      grain = ifelse(sawmill_pass & grain == "cont_table",
        validate_contingency_table(A, B, C, D, nexp, nref)[[3]],
        grain
      ),
      sawmill_pass = ifelse(sawmill_pass & grain == "cont_table",
        validate_contingency_table(A, B, C, D, nexp, nref)[[1]],
        sawmill_pass
      ),
      # Validate prevalence tables and refine grain.
      sawmill_status = ifelse(sawmill_pass & grain == "prev_table",
        validate_prevalence_table(P, Q, R, S, nexp, nref)[[2]],
        sawmill_status
      ),
      grain = ifelse(sawmill_pass & grain == "prev_table",
        validate_prevalence_table(P, Q, R, S, nexp, nref)[[3]],
        grain
      ),
      sawmill_pass = ifelse(sawmill_pass & grain == "prev_table",
        validate_prevalence_table(P, Q, R, S, nexp, nref)[[1]],
        sawmill_pass
      ),
      # Set grain to NA where checks fail.
      grain = ifelse(sawmill_pass,
        grain,
        NA_character_
      ),
    )

  # Return --------------------------------------------------------------------

  # Ungroup to reverse rowwise().
  dplyr::ungroup(timber)

  # Return timber.
  return(timber)
}
iAM-AMR/sawmill documentation built on June 30, 2024, 2:25 a.m.