Nothing
#' Regexp that matches a classification file section heading
#' @keywords internal
classification_file_section_heading_regexp <- function() {
'^\\[[^\\]]+\\]$'
}
#' Smallest larger value
#'
#' @param vec A numeric vector
#' @param value A numeric value
#'
#' @returns The smallest value in 'vec' that is larger than 'value'
#' @keywords internal
smallest_larger_value <- function(vec, value) {
min(vec[vec > value])
}
#' Get index of first and last line of section
#'
#' @param lines A character vector
#' @param head_line A numeric value, line number of section heading
#' @keywords internal
section_interval <- function(lines, head_line) {
breaks <-
c(stringr::str_which(lines,
classification_file_section_heading_regexp()
),
length(lines)+1
)
(head_line+1):(smallest_larger_value(breaks, head_line)-1)
}
#' Return lines in section
#'
#' @inheritParams section_interval
#' @keywords internal
get_section <- function(lines, head_line) {
lines[section_interval(lines, head_line)]
}
#' Get section in .vs or .agg file
#'
#' @param lines A character vector
#' @param heading Character, the section heading
#' @param key Optional. Character, subkey in section
#' @keywords internal
extract_section <- function(lines, heading, key = NULL) {
head_lines <- stringr::str_which(lines, stringr::fixed(heading))
if (identical(head_lines, integer(0))) {
return(NULL)
}
colname <- stringr::str_remove_all(heading, "\\[|\\]") %>% tolower()
section <-
head_lines %>%
purrr::map(~ get_section(lines, .x)) %>%
unlist() %>%
dplyr::as_tibble() %>%
dplyr::mutate(across("value", stringr::str_trim)) %>%
dplyr::filter(.data$value != "") %>%
tidyr::drop_na("value") %>%
tidyr::separate_wider_delim(cols = "value", delim = "=", names = c('id', colname))
if (! is.null(key)) {
section <- section %>% dplyr::filter(.data$id == key)
}
return(section)
}
#' Create new classification object
#'
#' Constructor for internal functions
#'
#' @inheritParams px_classification
#'
#' @returns A classification object
#' @keywords internal
new_classification <- function(name, prestext, domain, df) {
if (length(name) == 0 | length(prestext) == 0 | length(domain) == 0) {
stop("name, prestext, and domain must be non-empty.")
}
vs <- list(name = as.character(name),
prestext = as.character(prestext),
domain = as.character(domain), #list
# type = "V", only support V
df = df
)
structure(vs, class = "classification")
}
#' Get data set from aggregation file
#'
#' @param path to an aggregation file
#'
#' @returns A data frame with columns 'valuecode' (character) and a second column
#' (ordered) named after the aggregation
#' @keywords internal
aggregation_df <- function(path) {
agg_lines <- readLines_guess_encoding(path)
error_if_aggregation_file_is_missing_mandatory_headings(agg_lines)
aggregation_name <-
basename(path) %>%
stringr::str_remove("\\.agg$")
aggregation_groups_df <-
extract_section(agg_lines, '[Aggreg]') %>%
dplyr::filter(stringr::str_detect(.data$id, "^\\d+$"))
aggregation_text_df <- extract_section(agg_lines, '[Aggtext]')
if (nrow(aggregation_groups_df) != nrow(aggregation_text_df)) {
warning(paste0("The number of aggregation groups (",
nrow(aggregation_groups_df), ") and Aggtexts (",
nrow(aggregation_text_df),
") differ in '", basename(path), "'."
)
)
}
aggregation_df <-
dplyr::left_join(aggregation_groups_df, aggregation_text_df, by = "id") %>%
dplyr::select(-"id") %>%
dplyr::mutate(across(everything(), ~ dplyr::na_if(.x, "")))
df <- dplyr::tibble(valuecode = as.character(),
!!aggregation_name := factor(levels = aggregation_df$aggtext,
ordered = TRUE
)
)
for (aggregation_group in aggregation_df$aggreg) {
section <- extract_section(agg_lines, paste0("[", aggregation_group, "]"))
aggregation_text <- aggregation_df$aggtext[aggregation_df$aggreg == aggregation_group]
if (is.null(section)) {
warning("No group with label '[", aggregation_group, "]' found in '",
basename(path), "'."
)
} else {
aggregation_values <-
section %>%
dplyr::select(-"id") %>%
dplyr::pull(1)
df <-
dplyr::bind_rows(df,
dplyr::tibble(valuecode = aggregation_values,
!!aggregation_name := factor(aggregation_text,
levels = aggregation_df$aggtext,
ordered = TRUE
)
)
)
}
}
return(df)
}
#' px classification from path
#'
#' Create px classification from .vs and .agg files
#'
#' @inheritParams px_classification
#'
#' @returns A classification object
#' @keywords internal
px_classification_from_path <- function(vs_path, agg_paths) {
vs_lines <- readLines_guess_encoding(vs_path)
error_if_vs_file_is_missing_mandatory_headings(vs_lines)
valuecode_df <- extract_section(vs_lines, '[Valuecode]')
valuetext_df <- extract_section(vs_lines, '[Valuetext]')
if (is.null(valuetext_df)) {
vs_df <- dplyr::select(valuecode_df, -"id")
} else {
if (nrow(valuecode_df) != nrow(valuetext_df)) {
warning(paste0("[Valuecode] and [Valuetext] in '", basename(vs_path),
"' have different number of rows."
)
)
}
vs_df <-
dplyr::left_join(valuecode_df, valuetext_df, by = "id") %>%
dplyr::select(-"id") %>%
dplyr::mutate(across(everything(), ~ dplyr::na_if(.x, "")))
}
if (missing(agg_paths)) {
vs_dir <- dirname(vs_path)
agg_paths <- file.path(vs_dir, extract_section(vs_lines, '[Aggreg]')$aggreg)
}
if (any(!file.exists(agg_paths))) {
warning(paste0("Aggregation files: ",
paste(basename(agg_paths[!file.exists(agg_paths)]),
collapse = ", "
),
" do not exist."
)
)
agg_paths <- agg_paths[file.exists(agg_paths)]
}
if (length(agg_paths) == 0) {
df <- vs_df
} else {
agg_df <-
agg_paths %>%
purrr::map(aggregation_df) %>%
purrr::compact() %>%
purrr::reduce(dplyr::full_join, by = "valuecode")
df <- dplyr::full_join(vs_df, agg_df, by = 'valuecode')
}
new_classification(name = extract_section(vs_lines, '[Descr]', 'Name') %>% dplyr::pull(2),
prestext = extract_section(vs_lines, '[Descr]', 'Prestext') %>% dplyr::pull(2),
domain = extract_section(vs_lines, '[Domain]') %>% dplyr::pull(2),
df = df
)
}
#' px classification from data frame
#'
#' Create px classification from a data frame.
#'
#' @inheritParams px_classification
#'
#' @returns A classification object
#' @keywords internal
px_classification_from_df <- function(name, prestext, domain, df) {
character_columns <- intersect(names(df), c("valuecode", "valuetext"))
df_formatted <-
df %>%
dplyr::mutate(across(all_of(character_columns), as.character),
across(-all_of(character_columns), ~ factor(.x, ordered = TRUE))
)
new_classification(name = name,
prestext = prestext,
domain = domain,
df = df_formatted
)
}
#' Create a classification object
#'
#' Create a classification object from a data frame or .vs and .agg files.
#'
#' A classification is a combination of a value set and zero, one, or more
#' aggregations. The classification can be saved as .vs and .agg files
#' (see [px_save_classification()]).
#'
#' If a classification is created from a data frame, the arguments `name` and
#' `prestext` and `domain` are required. If a classification is created from .vs
#' and .agg files, all other arguments should be empty.
#'
#' For aggregations, it's normally possible to have codes and values in the .agg
#' file under the sections '\[Aggreg\]' and '\[Aggtext\]' respectively. However,
#' in pxmake's implementation of classifications, it's not possible to
#' distinguish between these. When a new classification is created, the values
#' in the section '\[Aggtext\]' are used as aggregation values.
#'
#' Only value sets of type 'V' are supported. Type values sets with type 'H' and
#' 'N' are not supported.
#'
#' @param name Optional. Name of the classification.
#' @param prestext Optional. Presentation text.
#' @param domain Optional. Character vector with domain names. Used to link to
#' PX-file.
#' @param df Optional. A data frame with required column 'valuecode' and
#' optional column 'valuetext', if the codes have texts. Each additional column
#' represents an aggregation. The column name is the name of the aggregation.
#' If the column type is character the aggregation levels will be sorted
#' alphabetically; use factors to control the ordering.
#' @param vs_path Optional. Path to a values set (.vs) file.
#' @param agg_paths Optional.
#' \itemize{
#' \item If NULL, aggregation paths are automatically taken from the
#' \[Aggreg\] field in the .vs file.
#' \item Use a vector of paths to one or more aggregation files (.agg) to
#' manually choose aggregations.
#' \item Use character(0) if aggregations from the .vs files should not be
#' added automatically.
#' }
#' @returns A classification object
#'
#' @examples
#' # Create classification from data frame
#' library(tibble)
#'
#' c1 <- px_classification(name = "Age5",
#' prestext = "Ages 0-9 - 60+",
#' domain = "age",
#' df = tribble(
#' ~valuecode, ~valuetext, ~`25 years classes`,
#' "0-4", "0-4 years", "0-24",
#' "5-9", "5-9 years", "0-24",
#' "10-14", "10-14 years", "0-24",
#' "15-19", "15-19 years", "0-24",
#' "20-24", "20-24 years", "0-24",
#' "25-29", "25-29 years", "25-49",
#' "30-34", "30-34 years", "25-49",
#' "35-39", "35-39 years", "25-49",
#' "40-44", "40-44 years", "25-49",
#' "45-49", "45-49 years", "25-49",
#' "50-54", "50-54 years", "50-74",
#' "55-59", "55-59 years", "50-74",
#' "60-64", "60-64 years", "50-74",
#' "65-69", "65-69 years", "50-74",
#' "70-74", "70-74 years", "50-74",
#' "75+", "75+ years", "75+"
#' )
#' )
#'
#' # Create classifications from files
#'
#' vs_file <- system.file("extdata", "Age5.vs", package = "pxmake")
#'
#' agg_files <- c(
#' system.file("extdata", "10-years_classes.agg", package = "pxmake"),
#' system.file("extdata", "25-years_classes.agg", package = "pxmake")
#' )
#'
#' if (vs_file != "" & all(agg_files != "")) {
#' # Create classification from .vs file and use aggregations mentioned in .vs
#' c2 <- px_classification(vs_path = vs_file)
#'
#' # Create classification from .vs file and manually specify aggregation files
#' c3 <- px_classification(vs_path = vs_file,
#' agg_paths = agg_files
#' )
#'
#' identical(c2, c3)
#' }
#' @export
px_classification <- function(name, prestext, domain, df, vs_path, agg_paths) {
validate_px_classification_arguments(name, prestext, domain, df, vs_path, agg_paths)
if (all(missing(vs_path), missing(agg_paths))) {
c <- px_classification_from_df(name, prestext, domain, df)
} else if (missing(agg_paths)) {
c <- px_classification_from_path(vs_path)
} else {
c <- px_classification_from_path(vs_path, agg_paths)
}
return(c)
}
#' Add lines numbers
#'
#' Add '1=', '2=', etc. at start of each line.
#'
#' @param lines A character vector
#'
#' @returns A character vector
#' @keywords internal
enumerate_lines <- function(lines) {
stringr::str_glue("{seq_along(lines)}={lines}") %>%
paste(collapse = "\n")
}
blank_line <- function() " "
#' Save classification as .vs file
#'
#' Write value set part of classification to a file.
#'
#' @param c A classification object
#' @param directory Directory to save the file in
#'
#' @returns Nothing
#' @keywords internal
write_value_set <- function(c, directory) {
filename <- file.path(directory, paste0(c$name, ".vs"))
aggregation_df <-
c$df %>%
dplyr::select(-"valuecode", -"valuetext")
if (ncol(aggregation_df) == 0) {
aggregation_text <- ''
} else {
aggregation_names <-
aggregation_df %>%
names() %>%
paste0(".agg") %>%
enumerate_lines()
aggregation_text <-
stringr::str_glue("[Aggreg]",
aggregation_names,
"{blank_line()}\n",
.sep = "\n"
)
}
get_values <- function(var) {
c$df %>%
tidyr::replace_na(setNames(list(""), var)) %>%
dplyr::pull(var) %>%
enumerate_lines()
}
value_codes <- get_values("valuecode")
value_text <- get_values("valuetext")
vs_lines <-
stringr::str_glue(
"[Descr]",
"Name={c$name}",
"Prestext={c$prestext}",
"Type=V",
blank_line(),
aggregation_text,
"[Domain]",
enumerate_lines(c$domain),
blank_line(),
"[Valuecode]",
value_codes,
blank_line(),
"[Valuetext]",
value_text,
.sep = "\n"
)
file_connection <- file(filename, encoding = "ISO-8859-1")
writeLines(vs_lines, file_connection)
close(file_connection)
}
#' Save classification as .agg file
#'
#' Write one aggregation column in a classification to an .agg file.
#'
#' @param aggregation Character, the column name of the aggregation in the
#' classification
#' @param c A classification object
#' @param directory Directory to save the file in
#'
#' @returns Nothing
#' @keywords internal
write_aggregation <- function(aggregation, c, directory) {
filename <- file.path(directory, paste0(aggregation, ".agg"))
groups <- levels(na.omit(c$df[[aggregation]]))
agg_texts <-
c$df %>%
dplyr::distinct(.data$valuecode, !!rlang::sym(aggregation)) %>%
tidyr::drop_na() %>%
dplyr::arrange(as.character(!!rlang::sym(aggregation))) %>%
tidyr::pivot_wider(names_from = all_of(aggregation),
values_from = "valuecode",
values_fn = list
) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rowwise() %>%
dplyr::mutate(str =
stringr::str_glue("[{name}]\n",
"{paste(enumerate_lines(value), collapse = '\n')}"
)
) %>%
dplyr::pull("str") %>%
paste0(collapse = paste0("\n", blank_line(), "\n"))
agg_lines <-
stringr::str_glue(
"[Aggreg]",
"Name={aggregation}",
"Valueset={c$name}",
enumerate_lines(groups),
blank_line(),
"[Aggtext]",
enumerate_lines(groups),
blank_line(),
agg_texts,
.sep = "\n"
)
file_connection <- file(filename, encoding = "ISO-8859-1")
writeLines(agg_lines, file_connection)
close(file_connection)
}
#' Save classification as .vs and .agg files
#'
#' Save a classification object as .vs and .agg files. The .vs file contains
#' the value set and the .agg files contain the aggregations.
#'
#' @param c A classification object
#' @param path Directory to save the files in
#'
#' @returns Nothing.
#'
#' @examples
#' # Save classification as .vs as .agg files
#' c <- px_classification(name = "Age5",
#' prestext = "Ages 0-9 - 60+",
#' domain = "age",
#' df = age_classification
#' )
#'
#' px_save_classification(c, path = tempdir())
#' @export
px_save_classification <- function(c, path) {
write_value_set(c, path)
aggregations <-
c$df %>%
dplyr::select(-"valuecode", -"valuetext") %>%
names()
purrr::walk(aggregations, write_aggregation, c = c, directory = path)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.