#' Generate random linelist or survey data
#'
#' Based on a dictionary generator like [msf_dict()] or [msf_dict_survey()],
#' this function will generate a randomized data set based on values defined in
#' the dictionaries. The randomized dataset produced should mimic an excel
#' export from DHIS2 for outbreaks and a Kobo export for surveys.
#'
#' @param dictionary Specify which dictionary you would like to use.
#'
#' @param varnames Specify name of column that contains variable names.
#' If `dictionary` is a survey, `varnames` needs to be "name"`.
#'
#' @param numcases Specify the number of cases you want (default is 300)
#'
#' @param org the organization the dictionary belongs to. Currently, only MSF
#' exists. In the future, dictionaries from WHO and other organizations may
#' become available.
#'
#' @return a data frame with cases in rows and variables in columns. The number
#' of columns will vary from dictionary to dictionary, so please use the
#' dictionary functions to generate a corresponding dictionary.
#' @export
#' @examples
#'
#' if (require("dplyr") & require("matchmaker")) {
#' withAutoprint({
#'
#' # You will often want to use MSF dictionaries to translate codes to human-
#' # readable variables. Here, we generate a data set of 20 cases:
#' dat <- gen_data(
#' dictionary = "Cholera",
#' varnames = "data_element_shortname",
#' numcases = 20,
#' org = "MSF"
#' )
#' print(dat)
#'
#' # We want the expanded dictionary, so we will select `compact = FALSE`
#' dict <- msf_dict(disease = "Cholera", long = TRUE, compact = FALSE, tibble = TRUE)
#' print(dict)
#'
#' # Now we can use matchmaker to filter the data:
#' dat_clean <- matchmaker::match_df(dat, dict,
#' from = "option_code",
#' to = "option_name",
#' by = "data_element_shortname",
#' order = "option_order_in_set"
#' )
#' print(dat_clean)
#'
#' })
#' }
gen_data <- function(dictionary, varnames = "data_element_shortname", numcases = 300, org = "MSF") {
# Three datasets:
# 1) dat_dict = msf data dicationary generated by (msf_dict)
# 2) dat_output = formatting of data dictionary to make use for sampling
# 3) dis_output = dictionary dataset generated from sampling (exported)
# define which ones are outbreaks and which ones are survey datasets
# get msf dictionary specific data dictionary
dict <- get_dictionary(dictionary, org)
disease <- unlist(dict, use.names = FALSE)
is_survey <- length(dict$survey) == 1
# Match the function for providing the data dictionaries. Note that each of
# the dictionary functions will be prefixed by a lowercase org name, for
# example, MSF will have msf_dict and WHO will have who_dict
lorg <- tolower(org)
ns <- asNamespace("epidict")
if (is_survey) {
GET_DICT <- get(sprintf("%s_dict_survey", lorg), envir = ns)
} else {
GET_DICT <- get(sprintf("%s_dict", lorg), envir = ns)
}
# Match the function for data generator. At the moment, these are all internal
GENERATE <- get(sprintf("gen_%s_data", lorg), envir = ns)
dictionary <- GET_DICT(disease = disease, tibble = FALSE, compact = TRUE)
GENERATE(disease, dictionary, is_survey, varnames = varnames, numcases = numcases)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.