#' MSF data dictionaries and dummy datasets
#'
#' These function produces MSF OCA dictionaries based on DHIS2 (for outbreaks)
#' and Kobo (for surveys) data sets defining the data element name, code,
#' short names, types, and key/value pairs for translating the codes
#' into human-readable format.
#'
#' @param disease Specify which disease you would like to use.
#' - `msf_dict()` supports "AJS", "Cholera", "Measles", "Meningitis"
#' - `msf_dict_survey()` supports "Mortality", "Nutrition", "Vaccination_long"
#' and "Vaccination_short" (only used in surveys if `template = TRUE`)
#'
#' @param name the name of the dictionary stored in the package.
#' - `msf_dict_survey()` supports Kobo dictionaries not stored within this package,
#' to use these: specify `name`as path to .xlsx file and set the `template = False`
#'
#' @param tibble Return data dictionary as a tidyverse tibble (default is TRUE)
#'
#' @param compact if `TRUE` (default), then a nested data frame is returned
#' where each row represents a single variable and a nested data frame column
#' called "options", which can be expanded with [tidyr::unnest()]. This only
#' works if `long = TRUE`.
#'
#' @param long If `TRUE` (default), the returned data dictionary is in long
#' format with each option getting one row. If `FALSE`, then two data frames
#' are returned, one with variables and the other with content options.
#'
#' @param template Only used for `msf_dict_survey()`.
#' If `TRUE` (default) the returned data dictionary is a generic
#' MSF OCA ERB pre-approved dictionary. If `FALSE` allows you to read in your
#' own Kobo dictionary by defining a path in `name`.
#'
#' @seealso [matchmaker::match_df()] [gen_data()] [msf_dict_survey()]
#' @export
#' @examples
#'
#' if (require("dplyr") & require("matchmaker")) {
#' withAutoprint({
#' # You will often want to use MSF dictionaries to translate codes to human-
#' # readable variables. Here, we generate a data set of 20 cases:
#' dat <- gen_data(
#' dictionary = "Cholera",
#' varnames = "data_element_shortname",
#' numcases = 20,
#' org = "MSF"
#' )
#' print(dat)
#'
#' # We want the expanded dictionary, so we will select `compact = FALSE`
#' dict <- msf_dict(disease = "Cholera", long = TRUE, compact = FALSE, tibble = TRUE)
#' print(dict)
#'
#' # Now we can use matchmaker to filter the data:
#' dat_clean <- matchmaker::match_df(dat, dict,
#' from = "option_code",
#' to = "option_name",
#' by = "data_element_shortname",
#' order = "option_order_in_set"
#' )
#' print(dat_clean)
#' })
#' }
msf_dict <- function(disease, name = "MSF-outbreak-dict.xlsx", tibble = TRUE,
compact = TRUE, long = TRUE) {
disease <- get_dictionary(disease, org = "MSF")$outbreak
if (length(disease) == 0) {
stop("disease must be one of 'Cholera', 'Measles', 'Meningitis', or 'AJS'", call. = FALSE)
}
# get excel file path (need to specify the file name)
path <- system.file("extdata", name, package = "epidict")
# read in categorical variable content options
dat_opts <- readxl::read_excel(path, sheet = "OptionCodes")
# read in data set - pasting the disease name for sheet
dat_dict <- readxl::read_excel(path, sheet = disease)
# clean col names
colnames(dat_dict) <- tidy_labels(colnames(dat_dict))
colnames(dat_opts) <- tidy_labels(colnames(dat_opts))
# clean future var names
# excel names (data element shortname)
# csv names (data_element_name)
dat_dict$data_element_shortname <- tidy_labels(dat_dict$data_element_shortname)
dat_dict$data_element_name <- tidy_labels(dat_dict$data_element_name)
# Adding hardcoded var types to options list
# 2 types added to - BOOLEAN, TRUE_ONLY
BOOLEAN <- data.frame(
option_code = c("1", "0"),
option_name = c("[1] Yes", "[0] No"),
option_uid = c(NA, NA),
option_order_in_set = c(1, 2),
optionset_uid = c("BOOLEAN", "BOOLEAN")
)
TRUE_ONLY <- data.frame(
option_code = c("1", "NA"),
option_name = c("[1] TRUE", "[NA] Not TRUE"),
option_uid = c(NA, NA),
option_order_in_set = c(1, 2),
optionset_uid = c("TRUE_ONLY", "TRUE_ONLY")
)
# bind these on to the bottom of dat_opts (option list) as rows
suppressWarnings(dat_opts <- dplyr::bind_rows(dat_opts, BOOLEAN, TRUE_ONLY))
# add the unique identifier to link above three in dictionary to options list
for (i in c("BOOLEAN", "TRUE_ONLY")) {
dat_dict$used_optionset_uid[dat_dict$data_element_valuetype == i] <- i
}
# remove back end codes from front end var in the options list
dat_opts$option_name <- gsub("^\\[.*\\] ", "", dat_opts$option_name)
if (long) {
outtie <- dplyr::left_join(dat_dict, dat_opts,
by = c("used_optionset_uid" = "optionset_uid")
)
outtie <- if (tibble) tibble::as_tibble(outtie) else outtie
# Return second option: a list with data dictionary and value options seperate
} else {
if (tibble) {
dat_dict <- tibble::as_tibble(dat_dict)
dat_opts <- tibble::as_tibble(dat_opts)
}
outtie <- list(
dictionary = dat_dict,
options = dat_opts
)
}
# produce clean compact data dictionary for use in gen_data
if (long && compact == TRUE) {
squished <- dplyr::group_by(outtie, !!quote(data_element_shortname))
if (utils::packageVersion("tidyr") > "0.8.99") {
squished <- tidyr::nest(squished, options = dplyr::starts_with("option_"))
} else {
squished <- tidyr::nest(squished, dplyr::starts_with("option_"), .key = "options")
outtie <- dplyr::select(outtie, -dplyr::starts_with("option_"))
outtie <- dplyr::distinct(outtie)
squished <- dplyr::left_join(outtie, squished, by = "data_element_shortname")
}
return(dplyr::ungroup(squished))
}
# return dictionary dataset
outtie
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.