#' Generate data validation queries for a REDCap project based on branching
#' logic specified in the project codebook
#'
#' @description
#' Generates two types of data validation queries using the project codebook
#' (see [`meta_dictionary`]) and [`translate_logic`]:
#'
#' 1. __Field missing__: Branching logic evaluates to `TRUE` (if specified), but
#' field is missing. By default only applies to required fields (`required_field
#' == "y"`) (can modify with argument `non_required`).
#'
#' 2. __Field not missing__: Branching logic evaluates to `FALSE` but field is
#' not missing. Applies to any field with branching logic.
#'
#' @inheritParams fetch_records
#' @inheritParams translate_logic
#'
#' @param forms Character vector of forms (i.e. instruments) to include. Set to
#' `NULL` (the default) to generate queries for all forms in the project.
#' @param dict Metadata dictionary. By default is fetched automatically with
#' [`meta_dictionary`], but it's included as an argument here to allow the
#' user to modify the dictionary before passing to `generate_queries` (e.g. to
#' correct bugs in branching logic). If passing a modified version, make sure
#' it is initially fetched with argument `expand_checkbox = FALSE`.
#' @param lang Query language, either English ("en") or French ("fr"). Defaults
#' to "en".
#' @param query_types Which type of queries to generate (see __Description__
#' above). Options are "missing", "not missing", or "both". Defaults to
#' "both".
#' @param non_required Logical indicating whether to include non-required fields
#' in queries of type "Field missing". Defaults to `FALSE`.
#'
#' @return
#' A [`tibble`][tibble::tbl_df]-style data frame specifying queries, with the
#' following 7 columns:
#'
#' \describe{
#' \item{query_id}{Unique query identifier based on form name and integer sequence}
#' \item{field_name}{Field name (from REDCap dictionary, see [`meta_dictionary`])}
#' \item{form_name}{Form name (from REDCap dictionary)}
#' \item{required}{Is it a required field in REDCap dictionary ("y" or `<NA>`) ?}
#' \item{description}{Description of query (e.g. "Missing: \[Signed consent forms?\]")}
#' \item{suggestion}{Suggestion for query resolution. A human-readable
#' translation of query expression (e.g. If \[Is the participant 18 years or
#' older?\] is "Yes", item \[Signed consent forms?\] should not be missing)}
#' \item{branching_logic}{Branching logic for given field (from REDCap dictionary)}
#' \item{query}{R-style query expression (can be evaluated with [`queryr::query`])}
#' }
#'
#' @importFrom dplyr `%>%` filter mutate select left_join if_else n rename
#' arrange all_of bind_rows group_by ungroup summarize across case_when add_row
#' @importFrom rlang .data .env
#' @export generate_queries
generate_queries <- function(conn,
forms = NULL,
dict = meta_dictionary(
conn,
forms = forms,
expand_checkbox = FALSE
),
lang = "en",
query_types = "both",
non_required = FALSE,
drop_redundant = FALSE,
field_nchar_max = 80L,
on_error = "warn") {
## validate argument lang
lang <- match.arg(lang, c("en", "fr"))
## validate argument forms
m_instr <- meta_forms(conn)
if (!is.null(forms)) {
test_valid(forms, "forms", m_instr$instrument_name)
} else {
forms <- m_instr$instrument_name
}
## validate argument query_types
query_types <- match.arg(query_types, c("missing", "not missing", "both"))
## fetch metadata events
m_events <- meta_events(conn)
event_choices <- paste(
m_events$unique_event_name,
m_events$event_name,
sep = ", ",
collapse = " | "
)
## fetch metadata dictionary
dict$field_label <- cutoff_str_len(string_squish(dict$field_label), field_nchar_max)
dict_check <- expand_checkbox(dict) %>%
add_row(
field_name = "redcap_event_name",
field_label = "Event Name",
field_type = "radio",
choices = event_choices
)
## fetch metadata exported fields
exported_fields <- meta_fields(conn)
## derive long-form data frame of factor options from metadata dictionary
types <- c("radio", "yesno", "dropdown", "checkbox")
fact_check <- prep_meta_factors(dict_check, types = types)
## prep logic-portion of query expressions (i.e. branching_logic)
fields_logic <- c(
"field_name",
"form_name",
"field_type",
"field_label",
"branching_logic",
"required_field"
)
rows_logic <- dict$form_name %in% forms & !dict$field_type %in% "descriptive"
q_logic <- dict[rows_logic, fields_logic]
q_logic$logic_base <- wrap_parens(
translate_logic(
unparens(q_logic$branching_logic),
use_value_labs = TRUE,
use_header_labs = FALSE,
use_is_na = TRUE,
use_in = TRUE,
drop_redundant = FALSE,
meta_factors = fact_check,
meta_dictionary = NULL,
on_error = on_error
)
)
q_logic$logic_base_text <- translate_human(
translate_logic(
q_logic$branching_logic,
use_value_labs = TRUE,
use_header_labs = TRUE,
use_is_na = FALSE,
use_in = FALSE,
drop_redundant = drop_redundant,
field_nchar_max = field_nchar_max,
meta_factors = fact_check,
meta_dictionary = dict_check,
on_error = "ignore"
),
lang = lang
)
## prep field-portion of query expressions (i.e. 'Is missing' or 'Not missing')
# For non-checkbox variables use:
# - is.na(var)
# - !is.na(var)
# For checkbox variables use:
# - var___1 %in% "Unchecked" & var___2 %in% "Unchecked" ...
# - var___1 %in% "Checked" | var___2 %in% "Checked" ...
q_field <- exported_fields %>%
left_join(dict, by = c("original_field_name" = "field_name")) %>%
select(
field_name = "original_field_name",
"export_field_name",
"form_name",
"field_type"
) %>%
mutate(
var_missing = if_else(
.data$field_type %in% "checkbox",
paste0(.data$export_field_name, " %in% 'Unchecked'"),
paste0("is.na(", .data$export_field_name, ")")
),
var_not_missing = if_else(
.data$field_type %in% "checkbox",
paste0(.data$export_field_name, " %in% 'Checked'"),
paste0("!is.na(", .data$export_field_name, ")")
),
) %>%
group_by(
.data$field_name, .data$field_type
) %>%
summarize(
var_missing = paste(.data$var_missing, collapse = " & "),
var_not_missing = paste(.data$var_not_missing, collapse = " | "),
.groups = "drop"
) %>%
mutate(
var_missing = if_else(
.data$field_type %in% "checkbox",
paste0("(", .data$var_missing, ")"),
.data$var_missing
),
var_not_missing = if_else(
.data$field_type %in% "checkbox",
paste0("(", .data$var_not_missing, ")"),
.data$var_not_missing
)
)
## join q_logic and q_field
q_full <- q_logic %>%
left_join(q_field, by = c("field_name", "field_type")) %>%
mutate(rownumber = seq_len(n()))
## queries for var missing when should not be
lab_missing_pre <- ifelse(
lang == "fr",
"Manquant: ",
"Missing: "
)
lab_not_missing_pre <- ifelse(
lang == "fr",
"Renseign\U00E9: ",
"Not missing: "
)
lab_missing_suf <- ifelse(
lang == "fr",
" devrait \U00EAtre renseign\U00E9",
" should not be missing"
)
lab_not_missing_suf <- ifelse(
lang == "fr",
" ne devrait pas \U00EAtre renseign\U00E9",
" should be missing"
)
lab_if <- ifelse(
lang == "fr",
"Si ",
"If "
)
lab_unless <- ifelse(
lang == "fr",
"Sauf si ",
"Unless "
)
lab_item <- ifelse(
lang == "fr",
"L'\U00E9l\U00E9ment ",
"Item "
)
lab_item_middle <- ifelse(
lang == "fr",
", l'\U00E9l\U00E9ment ",
", item "
)
lab_not_missing_mid <- ifelse(
lang == "fr",
" ne devrait \U00EAtre renseign\U00E9 que si ",
" should only be filled if "
)
if (query_types %in% c("missing", "both")) {
if (non_required) {
req_fields <- c("y", NA_character_)
} else {
req_fields <- "y"
}
q_missing <- q_full %>%
filter(.data$required_field %in% req_fields) %>%
mutate(
query_type = "Missing",
query = case_when(
!is.na(.data$branching_logic) & is.na(.data$logic_base) ~ NA_character_,
is.na(.data$logic_base) ~ .data$var_missing,
TRUE ~ paste(.data$logic_base, .data$var_missing, sep = " & ")
),
description = paste0(
.env$lab_missing_pre, enclose(.data$field_label, l = "[", r = "]")
),
suggestion = if_else(
is.na(.data$branching_logic),
paste0(
.env$lab_item, enclose(.data$field_label, l = "[", r = "]"), .env$lab_missing_suf
),
paste0(
.env$lab_if, .data$logic_base_text, .env$lab_item_middle,
enclose(.data$field_label, l = "[", r = "]"), .env$lab_missing_suf
)
)
)
} else {
q_missing <- NULL
}
## queries for var not missing when should be
if (query_types %in% c("not missing", "both")) {
q_not_missing <- q_full %>%
filter(!is.na(.data$branching_logic)) %>%
mutate(
query_type = "Not missing",
query = paste0("!", wrap_parens(.data$logic_base), " & ", .data$var_not_missing),
description = paste0(.env$lab_not_missing_pre, enclose(.data$field_label, l = "[", r = "]")),
suggestion = paste0(
.env$lab_item,
enclose(.data$field_label, l = "[", r = "]"),
.env$lab_not_missing_mid,
.data$logic_base_text
)
) %>%
mutate(
across(
all_of(c("query", "description", "suggestion")),
~ if_else(is.na(.data$logic_base), NA_character_, .x)
)
)
} else {
q_not_missing <- NULL
}
## combine and return
bind_rows(q_missing, q_not_missing) %>%
rename("required" = "required_field") %>%
arrange(.data$rownumber, .data$query_type) %>%
group_by(.data$form_name) %>%
mutate(query_id = formatC(seq_len(n()), width = 3, flag = "0")) %>%
ungroup() %>%
mutate(query_id = paste(.data$form_name, .data$query_id, sep = "__")) %>%
select(
all_of(
c(
"query_id",
"field_name",
"form_name",
"required",
"description",
"suggestion",
"branching_logic",
"query"
)
)
)
}
#' @noRd
translate_human <- function(x, lang = "en") {
lang <- match.arg(lang, c("en", "fr"))
x <- gsub("\"\\[", "[", x)
x <- gsub("\\]\"", "]", x)
if (lang == "fr") {
x <- gsub(" == ", " est ", x)
x <- gsub(" != ", " n'est pas ", x)
x <- gsub(" \\& ", " et ", x)
x <- gsub(" \\| ", " ou ", x)
x <- gsub("\"\"", "manquant", x)
} else {
x <- gsub(" == ", " is ", x)
x <- gsub(" != ", " is not ", x)
x <- gsub(" \\& ", " and ", x)
x <- gsub(" \\| ", " or ", x)
x <- gsub("\"\"", "missing", x)
}
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.