dxr: Diagnostic Product Evaluation in R

Documented in build_evaluation_sheet

#' Build Evaluation Sheet
#'
#' A function to build an excel file with fill-in-the-blank cells for common
#' information needed about a sample evaluation.
#'
#' @param evaluation_name The name of the evaluation being used for the
#'   evaluation. Must be a character string (not a multi-level vector) and must
#'   not be \code{NA}.
#' @param evaluation_description An optional sentence or paragraph describing
#'   the evaluation. Defaults to \code{NA}. If provided, it must be a string
#'   rather than a multi-level vector.
#' @param developer The developer of the assay under evaluation. A character
#'   string. Defaults to \code{NA}
#' @param assay The name of the assay under evaluation. A character string.
#'   Defaults to \code{NA}
#' @param lot_numbers All lot numbers associated with the evaluation. May be a
#'   character vector of length 1 or more.  Defaults to \code{NA}
#' @param panel_data The output of \code{build_panel_sheet}. Either this or a
#'   panel_data_filepath must be provided, but not both. Defaults to \code{NA}.
#' @param panel_data_filepath An excel file generated by
#'   \code{write_panel_sheet()}. Either this or panel_data mupst be provided,
#'   but not both. Defaults to \code{NA}.
#' @param analytes The analyte or analytes to be included in the evaluation.
#'   This must be provided with at least one level (not \code{NA}). This
#'   function will stop with an error if the analytes to be evaluated arte not
#'   included in the sample panel.
#' @param targets The target or targets for which the assay under evaluation.
#'   For example, for a SARS-CoV-2 assay, this could be \code{"Spike"} or
#'   \code{"RBD"}. Multiple targets can also be used, such as \code{c("Spike",
#'   "Nucleocapsid")}. This must be provided with at least one level (not
#'   \code{NA}).
#' @param qualitative_outcomes The valid qualitative outcomes associated with
#'   the evaluation. Typically, this should be \code{c("Positive", "Negative")}.
#'   Note that while the assay being evaluated may also provide an "Equivocal"
#'   result level, equivocal results will be counted against the assay in terms
#'   of performance. That is, an equivocal result on a positive sample will be
#'   called as a false negative result and an equivocal result on a negative
#'   sample will be called as a false positive result.
#' @param semiquantitative_outcomes The valid semi-quantitative outcomes
#'   associated with the evaluation. Defaults to \code{NA}.
#' @param quantitative_units If quantitative outcomes have been established for
#'   the evaluation, this is a character string describing the units of those
#'   quantitative results. Defaults to \code{NA}. If the results are unit-less,
#'   \code{"Unit-less"} can be used.
#' @param randomize Defaults to \code{TRUE}, meaning sample IDs from the panel
#'   data will be put in a random order for this evaluation.
#' @param blind Defaults to \code{TRUE}, meaning sample IDs from the panel data
#'   will be assigned a number unique to this evaluation.
#' @return Returns a list including the \code{evaluation_metadata},
#'   \code{sample_blinding}, and \code{evaluation_table}
#' @export
#'
#' @examples
#' build_evaluation_sheet(
#'   evaluation_name = "Example Evaluation",
#'   evaluation_description = NA_character_,
#'   developer = "ACME Test Corp.",
#'   assay = "Test Assay #1",
#'   lot_numbers = "20200101",
#'   panel_data =
#'     build_panel_sheet(
#'       panel_name = "Example Panel",
#'       panel_description = "An example panel.",
#'       n_samples = 5L,
#'       sample_groups = "Samples",
#'       sample_matrices = "Serum",
#'       analytes = c("IgM", "IgG", "Pan-Ig"),
#'       targets = "Spike",
#'       qualitative_outcomes = c("Positive", "Negative"),
#'       qualitative_comparators = "Authorized NAAT and CDC Assay",
#'       semiquantitative_outcomes = NA,
#'       semiquantitative_comparators = NA,
#'       quantitative_units = NA,
#'       quantitative_comparators = NA
#'     ),
#'   analytes = c("IgM", "IgG", "Pan-Ig"),
#'   targets = "Spike",
#'   qualitative_outcomes = c("Positive", "Negative"),
#'   semiquantitative_outcomes = NA_character_,
#'   quantitative_units = NA_character_,
#'   randomize = FALSE,
#'   blind = FALSE
#' )
build_evaluation_sheet <- function(
  evaluation_name,
  evaluation_description = NA_character_,
  developer = NA_character_,
  assay = NA_character_,
  lot_numbers = NA_character_,
  panel_data = NA,
  panel_data_filepath = NA_character_,
  analytes,
  targets,
  qualitative_outcomes = c("Positive", "Negative"),
  semiquantitative_outcomes = NA_character_,
  quantitative_units = NA_character_,
  randomize = TRUE,
  blind = TRUE
) {
  # TODO:
  # * Add a flag to facilitate double-entry of results -- or maybe this should
  #   go in the write function for excel.
  #
  # Check inputs ---------------------------------------------------------------
  stopifnot(
    # evaluation_name must be a character string, not a vector
    is.character(evaluation_name),
    length(evaluation_name) == 1,
    !is.na(evaluation_name),
    !is.null(evaluation_name),
    # evaluation description must be a character string (not a vector)
    is.character(evaluation_description),
    length(evaluation_description) == 1,
    !is.null(evaluation_description),
    # developer, assay, and lot numbers are characters, length 1
    is.character(developer),
    length(developer) == 1,
    is.character(assay),
    length(assay) == 1,
    # Lot numbers are characters
    is.character(lot_numbers),
    # qualitative_outcomes is a character vector with at least one level
    is.vector(qualitative_outcomes, mode = "character"),
    !is.na(qualitative_outcomes),
    # semiquantitative_outcomes must be a character vector or NA
    any(
      is.vector(semiquantitative_outcomes, mode = "character"),
      is.na(semiquantitative_outcomes)
    ),
    # quantitative_units must be a character string (not a vector) or NA
    any(
      all(
        is.character(quantitative_units),
        length(quantitative_units) == 1
        ),
      is.na(quantitative_units)
    ),
    # Randomize and blind must be boolean
    is.logical(randomize),
    is.logical(blind)
  )
  # Check Panel Data -----------------------------------------------------------
  # Either panel_data or panel_data_filepath needs to be present
  stopifnot(
    any(
      # panel_data is here
      all(
        # Not NA
        !all(is.na(panel_data)),
        # Not Null
        !is.null(panel_data),
        # Not an empty string
        !all(identical(panel_data, ""))
      ),
      # Or panel_data_filepath is here
      all(
        # Not NA
        !all(is.na(panel_data_filepath)),
        # Not Null
        !is.null(panel_data_filepath),
        # Not an empty string
        !all(identical(panel_data_filepath, ""))
      )
    )
  )
  # Either panel_data or panel_data_filepath should be NA or missing. Both
  # should not be supplied.
  if (
    # If panel_data is supplied
    all(
      # Not NA
      !all(is.na(panel_data)),
      # Not Null
      !is.null(panel_data),
      # Not an empty string
      !all(identical(panel_data, ""))
    )
  ) {
    # Then panel_data_filepath should not be supplied
    if (
      !any(
        # NA
        all(is.na(panel_data_filepath)),
        # Null
        is.null(panel_data_filepath),
        # Empty string
        all(identical(panel_data_filepath, ""))
      )
    ) {
      stop("You supplied both a panel_data object and a panel_data_filepath ",
           "Please provide only one or the other.")
    }
    panel_data_filepath <- NA
  }
  if  (
    # If panel_data_filepath  is supplied
    all(
      # Not NA
      !all(is.na(panel_data_filepath)),
      # Not Null
      !is.null(panel_data_filepath),
      # Not an empty string
      !all(identical(panel_data_filepath, ""))
    )
  ) {
    # Then panel_data can be wiped
    panel_data <- NA
    # And we should double check that the filepath looks valid
    stopifnot(
      stringr::str_detect(
        string = panel_data_filepath,
        pattern = "\\.xlsx$"
      ),
      file.exists(panel_data_filepath)
    )
    # And I guess now is as good a time as any to grab the panel data from that
    # file:
    panel_data <- read_panel(filepath = panel_data_filepath)
  }

  # Make panel_data$panel_metadata a list for easier access
  panel_metadata <-
    panel_data$panel_metadata %>%
    tidyr::pivot_wider()

  # Check Compatibility --------------------------------------------------------
  # TODO: I should probably extract this functionality into its own function.
  # That will be useful for the results comparison portion.
  #
  # We want to ensure that the proposed evaluation is compatible with the
  # panel that is to be used.
  # Start by initiating a problems vector where we can store messages to the
  # user about potential incompatibilities
  problems <- vector(mode = "character")
  possible_problems <- vector(mode = "character")

  # Are all of the evaluation analytes present in the panel?
  panel_analytes <- panel_metadata$analytes[[1]]
  if (!all(analytes %in% panel_analytes)) {
    problems <-
      c(
        problems,
        paste0(
          "At least one of the analytes you identified for this evaluation (",
          "which include: ",
          paste(analytes, collapse = ", "),
        ") does not have an established ground-truth in the panel you have ",
        "selected for this evaluation. Valid analytes for \"",
        panel_metadata$panel_name[[1]],
        "\" include: ",
        paste(panel_analytes, collapse = ", "),
        ". Please ensure that the analytes for use in the evaluation are all ",
        "present in the sample panel data. "
        )
      )
  }

  # Are all of the qualitative outcomes present in the panel?
  panel_qual_outcomes_valid <- panel_metadata$qualitative_outcomes[[1]]
  if (!all(qualitative_outcomes %in% panel_qual_outcomes_valid)) {
    possible_problems <-
      c(
        possible_problems,
        paste0(
          "At least one of the qualitative outcomes you identified for this ",
          "evaluation (",
          paste(qualitative_outcomes, collapse = ", "),
          ") is not a valid qualitative outcome for the panel: \"",
          panel_metadata$panel_name[[1]],
          "\". Valid qualitative outcomes for this panel include: ",
          paste(panel_qual_outcomes_valid, collapse = ", "),
          ". The evaluation outcome(s) that do not match those that are ",
          "valid for this panel (",
          qualitative_outcomes %>%
            magrittr::extract(
              !(qualitative_outcomes %in% panel_qual_outcomes_valid)
            ) %>%
              paste(collapse = ", "),
          ") will be called as false results."
        )
      )
  }

  # Warn and Stop for compatibility issues -------------------------------------
  if (!identical(possible_problems, character(0L))) {
    possible_problems <-  paste(possible_problems, collapse = "\n  ")
    warning(possible_problems)
  }
  if (!identical(problems, character(0L))) {
    problems <- paste(problems, collapse = "\n  ")
    stop(
      problems
    )
  }

  # Randomize and blind --------------------------------------------------------
  all_samples <- panel_data$panel_table$sample %>% unique()
  if (randomize == TRUE) {
    all_samples <-
      sample(all_samples)
  }
  if (blind == TRUE) {
    names(all_samples) <-
      seq_along(all_samples) %>%
      as.character() %>%
      stringr::str_pad(
        string = .,
        width = max(nchar(as.character(seq_along(all_samples)))),
        side = c("left"),
        pad = "0"
      )
  } else {
    names(all_samples) <- all_samples
  }
  sample_blinding <-
    tibble::enframe(
      all_samples,
      name = "evaluation_sample_id",
      value = "panel_sample_id"
    )
  # Build sheet ----------------------------------------------------------------

  metadata <-
    list(
      evaluation_name = evaluation_name,
      evaluation_description = evaluation_description,
      developer = developer,
      assay = assay,
      lot_numbers = lot_numbers,
      analytes = analytes,
      targets = targets,
      qualitative_outcomes = qualitative_outcomes,
      semiquantitative_outcomes = semiquantitative_outcomes,
      quantitative_units = quantitative_units,
      blinded = blind
    ) %>%
    tibble::enframe()

  evaluation_table <-
    tidyr::expand_grid(
      sample = sample_blinding$evaluation_sample_id,
      analyte = analytes,
      target = targets
    ) %>%
    dplyr::mutate(
      datetime_observation = lubridate::as_datetime(NA),
      qualitative_result = NA_character_,
      notes_and_anomalies = NA_character_
    )

  if (length(lot_numbers) == 1) {
    evaluation_table <-
      evaluation_table %>%
      dplyr::mutate(lot_number = lot_numbers[[1]])
  } else {
    evaluation_table <-
      evaluation_table %>%
      dplyr::mutate(lot_number = NA_character_)
  }

  # Add the semiquantitative result column if applicable
  if (!all(is.na(semiquantitative_outcomes))) {
    evaluation_table <-
      evaluation_table %>%
      dplyr::mutate(
        semiquantitative_result = NA_character_
      )
  }
  # Add the quantitative result column if applicable
  if (!all(is.na(quantitative_units))) {
    evaluation_table <-
      evaluation_table %>%
      dplyr::mutate(
        quantitative_result = NA_complex_,
        quantitative_units = quantitative_units[[1]]
      )
  }

  # Put columns in order
  column_order <-
    c(
      "sample",
      "analyte",
      "target",
      "lot_number",
      "datetime_observation",
      "qualitative_result",
      "semiquantitative_result",
      "quantitative_result",
      "quantitative_units",
      "notes_and_anomalies"
    )

  evaluation_table <-
    evaluation_table %>%
    dplyr::select(
      column_order[column_order %in% colnames(evaluation_table)],
      dplyr::everything() # Just in case...
    )

  # Finish
  list(
    evaluation_metadata = metadata,
    sample_blinding = sample_blinding,
    evaluation_table = evaluation_table
  )
}