R/utils.R

Defines functions remove_empty_rows db_has_repeat_forms try_redcapr strip_html_field_embedding get_record_id_field force_cast invert_vec apply_labs_haven apply_labs_factor parse_logical_cols multi_choice_to_labels update_data_col_names update_field_names parse_labels link_arms create_repeat_instance_vars add_partial_keys

Documented in add_partial_keys apply_labs_factor apply_labs_haven create_repeat_instance_vars db_has_repeat_forms get_record_id_field invert_vec link_arms multi_choice_to_labels parse_labels parse_logical_cols remove_empty_rows strip_html_field_embedding try_redcapr update_data_col_names update_field_names

#' @title
#' Add partial key helper variables to dataframes
#'
#' @description
#' Make helper variables \code{redcap_event} and \code{redcap_arm} available as
#' branches from \code{var} for later use.
#'
#' @returns Two appended columns, \code{redcap_event} and \code{redcap_arm}
#' to the end of \code{read_redcap} output \code{tibble}s.
#'
#' @param db_data The REDCap database output defined by
#' \code{REDCapR::redcap_read_oneshot()$data}
#' @param var The unquoted name of the field containing event and arm
#' identifiers. Default \code{NULL}.
#'
#' @keywords internal

add_partial_keys <- function(db_data,
                             var = NULL) {
  if (!is.null(enexpr(var))) {
    # Include handling for instances where REDCap appends with "_1b" or similar
    pattern <- "^(\\w+?)_arm_(\\d+\\w?)$"

    db_data <- db_data %>%
      mutate(
        redcap_event = sub(pattern, "\\1", {{ var }}),
        redcap_arm   = as.character(sub(pattern, "\\2", {{ var }}))
      )
  }

  db_data <- db_data %>%
    create_repeat_instance_vars()

  db_data
}

#' @title
#' Utility function to convert redcap repeat instance columns into appropriate
#' form and event columns
#'
#' @details
#' The output of a standard REDCap export with repeating forms and/or events
#' makes use of `redcap_repeat_instance` in combination with
#' `redcap_repeat_instrument` and whether or not data exists in both. Instead,
#' rename and separate `redcap_repeat_instance` into `redcap_form_instance` and
#' `redcap_event_instance`.
#'
#' @param db_data The REDCap database output generated by
#' \code{REDCapR::redcap_read_oneshot()$data}
#'
#' @return
#' A dataframe.
#'
#' @keywords internal

create_repeat_instance_vars <- function(db_data) {
  out <- db_data

  # Detect if repeat forms exist

  has_repeat_forms <- db_has_repeat_forms(db_data)

  if (has_repeat_forms) {
    out <- rename(db_data, redcap_form_instance = "redcap_repeat_instance")
  }

  # Detect if repeat events exist
  # First determined if redcap_event_instance added during mixed structure handling
  # See: convert_mixed_instrument
  has_repeat_events <- "redcap_event_instance" %in% names(out)

  # Next determined by non-NA vals in new "redcap_form_instance" alongside
  # NA vals in "redcap_repeat_instrument"
  # `has_repeat_forms` will always be TRUE for events to exist
  if (has_repeat_forms && !has_repeat_events) {
    has_repeat_events <- any(
      is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance)
    )
  }

  if (has_repeat_events) {
    # In cases where there are repeating events but they were not added by
    # convert_mixed_instrument(), add an empty redcap_event_instance column
    if (!"redcap_event_instance" %in% names(out)) {
      out$redcap_event_instance <- NA
    }

    out <- out %>%
      mutate(
        redcap_event_instance = case_when(
          is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ redcap_form_instance,
          # Else leave NA or the value given by conver_mixed_instrument()
          TRUE ~ redcap_event_instance
        ),
        redcap_form_instance = case_when(
          is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ NA,
          TRUE ~ redcap_form_instance
        )
      )

    out <- relocate(out,
      "redcap_event_instance",
      .after = "redcap_form_instance"
    )
  }

  # return
  out
}

#' @title
#' Link longitudinal REDCap instruments with their events/arms
#'
#' @description
#' For REDCap databases containing arms and events, it is necessary to determine
#' how these are linked and what variables belong to them.
#'
#' @returns
#' Returns a \code{tibble} of \code{redcap_event_name}s with list elements
#' containing a vector of associated instruments.
#'
#' @param redcap_uri The REDCap URI
#' @param token The REDCap API token
#' @param suppress_redcapr_messages A logical to control whether to suppress messages
#' from REDCapR API calls. Default `TRUE`.
#'
#' @keywords internal

link_arms <- function(redcap_uri,
                      token,
                      suppress_redcapr_messages = TRUE) {
  arms <- try_redcapr(
    {
      redcap_arm_export(redcap_uri, token, verbose = !suppress_redcapr_messages)
    },
    call = caller_env()
  ) %>%
    # match field name of redcap_event_instruments() output
    rename(arm_num = "arm_number")

  db_event_instruments <- try_redcapr(
    {
      redcap_event_instruments(
        redcap_uri = redcap_uri,
        token = token,
        arms = NULL, # get all arms
        verbose = !suppress_redcapr_messages
      )
    },
    call = caller_env()
  )

  db_event_labels <- try_redcapr(
    {
      redcap_event_read(
        redcap_uri = redcap_uri,
        token = token,
        verbose = !suppress_redcapr_messages
      )
    },
    call = caller_env()
  )

  left_join(db_event_instruments, arms, by = "arm_num") %>%
    left_join(db_event_labels, by = c("arm_num", "unique_event_name")) %>%
    mutate(
      across(any_of("unique_event_name"), ~ fct_inorder(.x)),
      across(any_of("event_name"), ~ fct_inorder(.x))
    )
}

#' @title
#' Parse labels from REDCap metadata into usable formats
#'
#' @description
#' Takes a string separated by \code{,}s and/or \code{|}s (i.e. comma/tab
#' separated values) containing key value pairs (\code{raw} and \code{label})
#' and returns a tidy \code{tibble}.
#'
#' @details
#' The associated \code{string} comes from metadata outputs.
#'
#' @returns A tidy \code{tibble} from a matrix giving raw and label outputs to
#' be used in later functions if \code{return_vector = FALSE}, the default.
#' Otherwise a vector result in a c(raw = label) format to use with
#' dplyr::recode
#'
#' @param string A \code{db_metadata$select_choices_or_calculations} field
#' pre-filtered for checkbox \code{field_type}
#' @param return_vector logical for whether to return result as a vector
#' @param return_stripped_text_flag logical for whether to return a flag indicating whether or not
#' text was stripped from labels
#'
#' @keywords internal

parse_labels <- function(string, return_vector = FALSE, return_stripped_text_flag = FALSE) {
  # If string is empty/NA, throw a warning
  if (is.na(string)) {
    cli_warn("Empty string detected for a given multiple choice label.",
      class = c("empty_parse_warning", "REDCapTidieR_cond")
    )
  }

  out <- string %>%
    strsplit("\\|") %>% # Split by "|"
    lapply(trimws) # Trim trailing and leading whitespace in list elements

  parse_err_msg <- c(
    "x" = "Cannot parse the `select_choices_or_calculations` field from REDCap metadata.",
    "i" = "This may happen if there is a missing comma or pipe character `|` inside the label.",
    "i" = "`select_choices_or_calculations`: {string}"
  )
  parse_err_class <- c("label_parse_error", "REDCapTidieR_cond")

  # Check there is a comma in all | delimited strsplit elements
  if (!all(grepl(",", out[[1]]))) {
    # If this is a misattributed data field or blank, throw warning in
    # multi_choice_to_labels
    if (length(out[[1]]) > 1 && !all(is.na(out[[1]]))) {
      cli_abort(parse_err_msg, class = parse_err_class)
    }
  }

  # split on the _first_ comma in each element
  out <- out %>%
    unlist() %>%
    stri_split_fixed(pattern = ",", n = 2) %>% # Split by first ","
    lapply(trimws) %>%
    unlist()

  # Check if vector is even for matrix creation. If not, then fail.
  if (length(out) %% 2 != 0) {
    # If this is a misattributed data field or blank, throw warning in
    # multi_choice_to_labels
    if (length(out) > 1 && !all(is.na(out))) {
      cli_abort(parse_err_msg, class = parse_err_class)
    }
  }

  # strip html and field embedding
  out_stripped <- strip_html_field_embedding(out)

  # Record whether we actually changed any labels to report if return_stripped_text_flag is TRUE
  stripped_text_flag <- any(out_stripped != out, na.rm = TRUE)

  out <- out_stripped %>%
    matrix(
      ncol = 2,
      byrow = TRUE,
      dimnames = list(
        c(), # row names
        c("raw", "label") # column names
      )
    )

  if (return_vector) {
    if (all(is.na(out))) {
      # handle no label case
      out <- c(`NA` = NA_character_)
    } else {
      tmp <- out
      out <- tmp[, "label"]
      names(out) <- tmp[, "raw"]
    }
  } else {
    out <- as_tibble(out)
  }

  # If stripped_text_flag was requested return a list with output and flag
  if (return_stripped_text_flag) {
    return(list(out, stripped_text_flag))
  }

  out
}

#' @title
#' Update metadata field names for checkbox handling
#'
#' @description
#' Takes a \code{db_metadata} object and:
#' \itemize{
#'   \item replaces checkbox field rows with a set of rows, one for each
#'   checkbox option
#'   \item appends a \code{field_name_updated} field to the end for checkbox
#'   variable handling
#'   \item updates \code{field_label} for any new checkbox rows to include the
#'   specific option in "field_label: option label" format
#'   \item strips html and field embedding logic from \code{field_label}
#' }
#'
#' @returns Column \code{db_metadata} with \code{field_name_updated} appended
#' and \code{field_label} updated for new rows corresponding to checkbox options
#'
#' @param db_metadata The REDCap metadata output defined by
#' \code{REDCapR::redcap_metadata_read()$data}
#'
#' @details
#' Assumes \code{db_metadata}:
#' \itemize{
#'   \item has non-zero number of rows
#'   \item contains \code{field_name} and \code{field_label} columns
#' }
#'
#' @keywords internal

update_field_names <- function(db_metadata) {
  out <- db_metadata %>%
    mutate(
      updated_metadata = list(c())
    )

  for (i in seq_len(nrow(out))) {
    if (out$field_type[i] == "checkbox") {
      # If checkbox field, parse labels fill updated_metadata with a tibble
      # containing updated field names and labels
      parsed_labs <- parse_labels(out$select_choices_or_calculations[i])

      # Build updated field names and labs
      clean_names <- paste(
        out$field_name[i],
        tolower(parsed_labs$raw),
        sep = "___"
      )

      # If field_label or options_labels are missing don't label
      if (is.na(out$field_label[i]) || any(is.na(parsed_labs$label))) {
        clean_labs <- NA_character_
      } else {
        # Otherwise build labs
        field_label <- out$field_label[i] %>%
          strip_html_field_embedding() %>%
          # Remove terminal colons since we add them in the next step
          str_replace(":$", "")

        clean_labs <- paste(field_label, parsed_labs$label, sep = ": ")
      }

      out$updated_metadata[i] <- list(
        tibble(
          field_name_updated = clean_names,
          field_label_updated = clean_labs
        )
      )
    } else {
      # Otherwise carry through existing field name and label
      out$updated_metadata[i] <- list(
        tibble(
          field_name_updated = out$field_name[i],
          field_label_updated = out$field_label[i]
        )
      )
    }
  }

  # Unnest and expand checkbox list elements + overwrite field_labels with
  # updated field labels

  out %>%
    unnest(cols = "updated_metadata") %>%
    mutate(field_label = strip_html_field_embedding(.data$field_label_updated)) %>%
    select(-"field_label_updated")
}

#' @title
#' Correctly label variables belonging to checkboxes with minus signs
#'
#' @description
#' Using \code{db_data} and \code{db_metadata}, temporarily create a conversion
#' column that reverts automatic REDCap behavior where database column names
#' have "-"s converted to "_"s.
#'
#' @details
#' This is an issue with checkbox fields since analysts should be able to verify
#' checkbox variable suffices with their label counterparts.
#'
#' @param db_data The REDCap database output defined by
#' \code{REDCapR::redcap_read_oneshot()$data}
#' @param db_metadata The REDCap metadata output defined by
#' \code{REDCapR::redcap_metadata_read()$data}
#'
#' @return Updated \code{db_data} column names for checkboxes where "-"s were
#' replaced by "_"s.
#'
#' @keywords internal

update_data_col_names <- function(db_data, db_metadata) {
  # Resolve checkbox conversion ----
  # Note: REDCap auto-exports and enforces changes from "-" to "_". This is not
  # useful when analysts want to reference negative values or other naming
  # conventions for checkboxes.
  db_metadata$checkbox_conversion <- db_metadata$field_name_updated
  db_metadata$checkbox_conversion <- str_replace_all(
    db_metadata$checkbox_conversion, "-", "_"
  )

  changed_names <- db_metadata %>%
    select("field_name_updated", "checkbox_conversion") %>%
    filter(.data$field_name_updated != .data$checkbox_conversion)

  for (i in seq_len(nrow(changed_names))) {
    names(db_data)[names(db_data) %in% changed_names$checkbox_conversion[i]] <- changed_names$field_name_updated[i]
  }

  db_data
}

#' @title
#' Update multiple choice fields with label data
#'
#' @description
#' Update REDCap variables with multi-choice types to standard form labels taken
#' from REDCap metadata.
#'
#' @details
#' Coerce variables of \code{field_type} "truefalse", "yesno", and "checkbox" to
#' logical. Introduce \code{form_status_complete} column and append to end of
#' \code{tibble} outputs. Ensure \code{field_type}s "dropdown" and "radio" are
#' converted appropriately since label appendings are important and unique to
#' these.
#'
#' @param db_data A REDCap database object
#' @param db_metadata A REDCap metadata object
#' @param call call for conditions
#' @inheritParams read_redcap
#'
#' @keywords internal

multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label", call = caller_env()) {
  if (raw_or_label == "label") {
    label_handler <- apply_labs_factor
  } else if (raw_or_label == "haven") {
    label_handler <- apply_labs_haven
  }
  # form_status_complete Column Handling ----
  # Must be done before the creation of form_status_complete
  # select columns that don't appear in field_name_updated and end with
  # "_complete"
  form_status_cols <- db_data %>%
    select(!any_of(db_metadata$field_name_updated) & ends_with("_complete")) %>%
    names()

  db_data <- db_data %>%
    mutate(
      across(
        .cols = all_of(form_status_cols),
        .fns = ~ label_handler(., c("0" = "Incomplete", "1" = "Unverified", "2" = "Complete"), integer(0))
      )
    )


  # Logical Column Handling ----
  # Handle columns where we change 0/1 to FALSE/TRUE (logical)
  db_data <- parse_logical_cols(db_data, db_metadata, call = call)

  # Buffer for fields with extra field values to be populated by check_extra_field_values
  extra_field_values <- vector("list", length = nrow(db_metadata))
  names(extra_field_values) <- db_metadata$field_name_updated

  for (i in seq_len(nrow(db_metadata))) {
    # Extract metadata field name and database corresponding column name
    field_name <- db_metadata$field_name_updated[i]

    # dropdown and radio datatype handling ----
    if (db_metadata$field_type[i] %in% c("dropdown", "radio")) {
      # Check for empty selection strings indicating missing data or incorrect
      # data field attribute types in REDCap
      if (is.na(db_metadata$select_choices_or_calculations[i])) {
        msg <- paste(
          "The field {field_name} in {db_metadata$form_name[i]} is a",
          "{db_metadata$field_type[i]} field type, however it does not have",
          "any categories."
        )
        cli_warn(
          c("!" = msg),
          class = c("field_missing_categories", "REDCapTidieR_cond")
        )
      }

      # Retrieve parse_labels key for given field_name
      parse_labels_output <- parse_labels(
        db_metadata$select_choices_or_calculations[i],
        return_vector = TRUE,
        return_stripped_text_flag = TRUE
      )

      # parse_labels returns list with output and flag since we set return_stripped_text_flag so unpack those
      stripped_text_flag <- parse_labels_output[[2]]
      parse_labels_output <- parse_labels_output[[1]]

      check_parsed_labels(
        parse_labels_output,
        field_name,
        warn_stripped_text = stripped_text_flag
      )

      if (!getOption("redcaptidier.allow.mdc", FALSE)) {
        extra_field_values[i] <- check_extra_field_values(
          db_data[[field_name]],
          names(parse_labels_output)
        )
      }

      # Replace values from db_data$(field_name) with label values from
      # parse_labels key

      db_data[[field_name]] <- label_handler(
        x = db_data[[field_name]],
        labels = parse_labels_output,
        ptype = db_data[[field_name]]
      )
    }
  }

  check_extra_field_values_message(extra_field_values, call = call)

  db_data
}

#' @title
#' Convert yesno, truefalse, and checkbox fields to logical
#'
#' @inheritParams multi_choice_to_labels
#'
#' @keywords internal
parse_logical_cols <- function(db_data, db_metadata, call = caller_env()) {
  logical_cols <- db_metadata %>%
    filter(.data$field_type %in% c("yesno", "truefalse", "checkbox"))

  if (nrow(logical_cols) == 0) {
    return(db_data)
  }

  parsed <- map(db_data[logical_cols$field_name_updated], check_field_is_logical)

  out <- db_data

  out[logical_cols$field_name_updated] <- map(parsed, "parsed")

  if (!getOption("redcaptidier.allow.mdc", FALSE)) {
    problems <- parsed %>%
      map("problems") %>%
      discard(is.null)

    if (length(problems) > 0) {
      fields <- names(problems)
      values <- flatten_chr(problems) %>% unique()

      msg <- c(
        `!` = "{.code {fields}} {?is/are} logical but contain{?s/} non-logical values: {values}",
        i = "These were converted to {.code NA} resulting in possible data loss",
        i = "Does your REDCap project utilize missing data codes?",
        i = paste(
          "Silence this warning with {.code options(redcaptidier.allow.mdc = TRUE)} or",
          "set {.code raw_or_label = 'raw'} to access missing data codes"
        )
      )
      cli_warn(
        msg,
        class = c("field_is_logical", "REDCapTidieR_cond"),
        call = call,
        fields = fields,
        problems = values
      )
    }
  }

  out
}

#' @title
#' Apply factor labels to a vector
#'
#' @details
#' Dots are needed to ignore `ptype` argument that may be passed to `apply_labs_haven`
#'
#'
#' @param x a vector to label
#' @param labels a named vector of labels in the format `c(value = label)`
#' @param \dots unused, needed to ignore extra arguments that may be passed
#'
#' @return
#' factor
#'
#' @keywords internal
apply_labs_factor <- function(x, labels, ...) {
  as.character(x) %>%
    recode(!!!labels) %>%
    factor(levels = unique(labels))
}

#' @title
#' Apply haven value labels to a vector
#'
#' @details
#' Assumes a check_installed() has been run for `labelled`. Since `haven` preserves the
#' underlying data values we need to make sure the data type of the value options in the metadata matches
#' the data type of the values in the actual data. This function accepts a prototype, usually a column
#' from db_data, and uses `force_cast()` to do a best-effort casting of the value options in the metadata
#' to the same data type as `ptype`. The fallback is to convert `x` and the value labels to character.
#'
#' @param x a vector to label
#' @param labels a named vector of labels in the format `c(value = label)`
#' @param ptype vector to serve as prototype for label values
#' @param \dots unused, needed to ignore extra arguments that may be passed
#'
#' @return
#' `haven_labelled` vector
#'
#' @keywords internal
#'
apply_labs_haven <- function(x, labels, ptype, ...) {
  # set_value_labels expects labels in c(label = value) format so reverse them
  labels <- invert_vec(labels)
  ptype <- vec_ptype(ptype)
  # Try to cast values to match data type in data, catching any parsing warnings
  cnd <- NULL
  labels_cast <- withCallingHandlers(
    {
      force_cast(labels, ptype)
    },
    warning = function(w) {
      cnd <<- w
      cnd_muffle(w)
    }
  )
  if (!is.null(attr(labels_cast, "problems"))) {
    # If there was parsing problem fall back to character
    labels_cast <- force_cast(labels, character())
  } else if (!is.null(cnd)) {
    # If there was some other warning we didn't mean to catch it, so re-raise
    cli_warn(cnd)
  }

  # If labels were parsed to something other than character it was based on ptype so we can assume x is the right type
  # If labels are character it may have been a fallback to ensure x is character
  if (is.character(labels_cast)) {
    x <- as.character(x)
  }

  labelled::set_value_labels(x, .labels = labels_cast)
}

#' @title
#' Swap vector names for values
#'
#' @param x a vector
#'
#' @return
#' Vector with names and values reversed
#'
#' @keywords internal
#'
invert_vec <- function(x) {
  out <- names(x)
  # If there were no names do nothing
  if (is.null(out)) {
    return(x)
  }
  names(out) <- x
  out
}

# Handling only integer and double since haven explicitly doesn't support other types
force_cast <- function(x, ptype) {
  if (is.integer(ptype)) {
    out <- parse_integer(x)
  } else if (is.numeric(ptype)) {
    out <- parse_double(x)
  } else {
    out <- parse_character(x)
  }

  names(out) <- names(x)
  out
}

#' @title
#' Utility function to extract the name of the project identifier field for
#' a tibble of REDCap data
#'
#' @details
#' The current implementation assumes that the first field in the data is the
#' project identifier
#'
#' @param data a tibble of REDCap data
#'
#' @return
#' The name of the identifier field in the data
#'
#' @keywords internal
#'
get_record_id_field <- function(data) {
  names(data)[[1]]
}

#' @title
#' Remove html tags and field embedding logic from a string
#'
#' @param x vector of strings to format
#'
#' @return
#' vector of strings with html tags, field embedding logic, and extra whitespace
#' removed
#'
#' @keywords internal
#'
strip_html_field_embedding <- function(x) {
  x %>%
    str_replace_all("\\{.+?\\}", "") %>%
    str_replace_all("<.+?\\>", "") %>%
    str_trim() %>%
    str_squish()
}

#' @title
#' Make a `REDCapR` API call with custom error handling
#'
#' @param expr an expression making a `REDCapR` API call
#' @param call the calling environment to use in the warning message
#'
#' @return
#' If successful, the `data` element of the `REDCapR` result. Otherwise an error
#'
#' @keywords internal
#'
try_redcapr <- function(expr, call = caller_env()) {
  quo <- enquo(expr)

  # List to store components of errors/warnings so we can look them up unambiguously
  condition <- list()

  # URI and token we want are in the env associated with expr
  env <- get_env(quo)
  condition$redcap_uri <- env$redcap_uri
  condition$token <- env$token

  # Defaults for other error components
  condition$message <- c("x" = "The {.pkg REDCapR} export operation was not successful.")
  condition$class <- "REDCapTidieR_cond"
  condition$info <- c(
    "!" = "An unexpected error occured.",
    "i" = "This means that you probably discovered a bug!",
    "i" = "Please consider submitting a bug report here: {.href https://github.com/CHOP-CGTInformatics/REDCapTidieR/issues}." # nolint: line_length_linter
  )
  condition$call <- call

  # Try to evaluate expr and handle REDCapR errors
  out <- try_fetch(
    eval_tidy(quo),
    error = function(cnd) {
      if (str_detect(cnd$message, "Could not resolve host")) {
        condition$info <- c(
          "!" = "Could not resolve the hostname.",
          "i" = "Is there a typo in the URI?",
          "i" = "URI: `{condition$redcap_uri}`"
        )
        condition$class <- c("cannot_resolve_host", condition$class)
      } else {
        condition$parent <- cnd
        condition$class <- c("unexpected_error", condition$class)
      }
      cli_abort(
        c(condition$message, condition$info),
        call = condition$call,
        parent = condition$parent,
        class = condition$class
      )
    },
    warning = function(cnd) {
      cli_warn(
        message = c("!" = "One of the {.pkg REDCapR} operations produced a warning. See below for details."),
        call = condition$call,
        parent = cnd,
        class = c("unexpected_warning", condition$class)
      )

      # Muffle warning default since we provide our own
      cnd_muffle(cnd)

      # zap to return object
      zap()
    }
  )

  # Handle cases where the API call itself was not successful
  if (out$success == FALSE) {
    condition$class <- c("redcapr_api_call_success_false", condition$class)

    if (out$status_code == 403) {
      condition$info <- c(
        "!" = "The URL returned the HTTP error code 403 (Forbidden).",
        "i" = "Are you sure this is the correct API token?",
        "i" = "API token: `{condition$token}`"
      )
      condition$class <- c("api_token_rejected", condition$class)
    } else if (out$status_code == 405) {
      condition$info <- c(
        "!" = "The URL returned the HTTP error code 405 (POST Method not allowed).",
        "i" = "Are you sure the URI points to an active REDCap API endpoint?",
        "i" = "URI: `{condition$redcap_uri}`"
      )
      condition$class <- c("cannot_post", condition$class)
    } else {
      condition$class <- c("unexpected_error", condition$class)

      if (!is.null(out$outcome_message)) {
        # Throw error containing outcome message and attach that as the parent
        # Get the name of the function called inside try_redcapr so it can be mentioned in the error message
        calling_fn <- quo_get_expr(quo)
        # Handle case where try_redcapr had multiline expr
        if (inherits(calling_fn, "{")) {
          calling_fn <- calling_fn[[2]]
        }

        condition$parent <- catch_cnd(abort(out$outcome_message, call = calling_fn))
      }
    }
    cli_abort(
      c(condition$message, condition$info),
      call = condition$call,
      parent = condition$parent,
      class = condition$class,
      redcapr_status_code = out$status_code,
      redcapr_outcome_message = out$outcome_message
    )
  }

  # If we made it here return the data
  out$data
}


#' @title
#' Check whether a REDCap database has repeat forms
#'
#' @description
#' Simple utility function checking for the existence of repeat forms in a REDCap
#' database.
#'
#' @returns A boolean.
#'
#' @param db_data A REDCap dataframe.
#'
#' @keywords internal

db_has_repeat_forms <- function(db_data) {
  "redcap_repeat_instance" %in% names(db_data)
}

#' @title
#' Remove rows with empty data
#'
#' @description
#' Remove rows that are empty in all associated data columns (those derived from
#' fields in REDCap). This occurs when a form is filled out in an event, but
#' other forms are not. Regardless of a form's status, all forms in an event are
#' included in the output so long as any form in the event contains data.
#'
#' This only applies to longitudinal REDCap databases containing events.
#'
#' @returns A dataframe.
#'
#' @param data A REDCap dataframe from a longitudinal database,
#' pre-processed within a `distill_*` function.
#' @param my_record_id The record ID defined in the project.
#'
#' @keywords internal

remove_empty_rows <- function(data, my_record_id) {
  # Define non-data columns that do not impact analysis
  nondata_cols <- c(
    my_record_id,
    "redcap_event",
    "redcap_arm",
    "redcap_survey_timestamp",
    "redcap_survey_identifier",
    "redcap_form_instance",
    "redcap_event_instance",
    "form_status_complete"
  )

  # Subset columns that do impact analysis
  data_cols <- names(data)[!names(data) %in% nondata_cols]

  # Filter for rows where specified columns have any non-NA data
  data %>%
    filter(if_any(all_of(data_cols), ~ !is.na(.))) # nolint: object_usage_linter
}

#' @title Determine if an object is labelled
#'
#' @description
#' An internal utility function used to inform other processes of whether or
#' not a given object has been labelled (i.e. with `make_labelled()`).
#'
#' @details
#' An object is considered labelled if it has "label" attributes.
#'
#' @returns A boolean
#'
#' @param obj An object to be tested for "label" attributes
#'
#' @keywords internal

is_labelled <- function(obj) {
  some(obj, function(x) !is.null(attr(x, "label")))
}

#' @title
#' Make skimr labels from default skimr outputs
#'
#' @description
#' A simple helper function that returns all default `skimr` names as formatted
#' character vector for use in `make_lablled`
#'
#' @details
#' All labels supplied are manually created and agreed upon as human-readable
#'
#' @return A character vector
#'
#' @keywords internal
#'
make_skimr_labels <- function() {
  skimr_labels <- c(
    skim_type = "Data Type",
    n_missing = "Count of Missing Values",
    complete_rate = "Proportion of Non-Missing Values",
    AsIs.n_unique = "Count of Unique Values in AsIs",
    AsIs.min_length = "Minimum Length of AsIs Values",
    AsIs.max_length = "Maximum Length of AsIs Values",
    character.min = "Shortest Value (Fewest Characters)",
    character.max = "Longest Value (Most Characters)",
    character.empty = "Count of Empty Values",
    character.n_unique = "Count of Unique Values",
    character.whitespace = "Count of Values that are all Whitespace",
    Date.min = "Earliest",
    Date.max = "Latest",
    Date.median = "Median",
    Date.n_unique = "Count of Unique Values",
    difftime.min = "Minimum",
    difftime.max = "Maximum",
    difftime.median = "Median",
    difftime.n_unique = "Count of Unique Values",
    factor.ordered = "Is the Categorical Value Ordered?",
    factor.n_unique = "Count of Unique Values",
    factor.top_counts = "Most Frequent Values",
    logical.mean = "Proportion of TRUE Values",
    logical.count = "Count of Logical Values",
    numeric.mean = "Mean",
    numeric.sd = "Standard Deviation ",
    numeric.p0 = "Minimum",
    numeric.p25 = "25th Percentile",
    numeric.p50 = "Median",
    numeric.p75 = "75th Percentile",
    numeric.p100 = "Maximum",
    numeric.hist = "Histogram",
    POSIXct.min = "Earliest",
    POSIXct.max = "Latest",
    POSIXct.median = "Median",
    POSIXct.n_unique = "Count of Unique Values"
  )

  skimr_labels
}

#' @title Safely set variable labels
#'
#' @description
#' A utility function for setting labels of a tibble from a named vector while
#' accounting for labels that may not be present in the data.
#'
#' @returns A tibble
#'
#' @keywords internal

safe_set_variable_labels <- function(data, labs) {
  labs_to_keep <- intersect(names(labs), colnames(data))
  labelled::set_variable_labels(data, !!!labs[labs_to_keep])
}

#' @title
#' Extract a specific metadata tibble from a supertibble
#'
#' @description
#' Utility function to extract a specific metadata tibble from a supertibble
#' given a `redcap_form_name`
#'
#' @param supertbl A supertibble generated by [read_redcap()].
#' @param redcap_form_name A character string identifying the `redcap_form_name`
#' the metadata tibble is associated with.
#'
#' @return
#' A tibble
#'
#' @keywords internal

extract_metadata_tibble <- function(supertbl, redcap_form_name) {
  supertbl$redcap_metadata[supertbl$redcap_form_name == redcap_form_name][[1]]
}

Try the REDCapTidieR package in your browser

Any scripts or data that you put into this service are public.

REDCapTidieR documentation built on April 3, 2025, 10:50 p.m.