Nothing
#' Sum of Weights
#'
#' @description
#' If specifying this stat with a provided weight variable, the sum of all weights
#' is computed.
#'
#' @param values The weights to sum up.
#' @param group Grouping variables.
#'
#' @return
#' Returns the sum of weights.
#'
#' @noRd
sum_wgt_qol <- function(values, group){
# Sum up only the weight values
collapse::fsum(x = values, g = group)
}
#' Unweighted Frequencies of Values Greater Than Zero
#'
#' @description
#' This stat always computes unweighted sums, completely ignoring provided weights.
#' It also just counts the values which are greater than 0.
#'
#' @param values The values to sum up.
#' @param group Grouping variables.
#'
#' @return
#' Returns uweighted frequencies of values greater than zero.
#'
#' @noRd
freq_g0_qol <- function(values, group){
# Create a vector of same size as input vector and make every value a 1
# that is greater than 0.
# Sum up this new vector to get unweighted frequencies.
values[values > 0] <- 1
collapse::fsum(values, g = group)
}
#' Calculate Any Percentile
#'
#' @description
#' Calculates any percentile by grouping variables.
#'
#' @param values The values for which to compute percentiles.
#' @param weight A weighting variable.
#' @param group Grouping variables.
#' @param probs The percentiles that should be computed.
#'
#' @return
#' Returns weighted percentiles.
#'
#' @noRd
percentiles_qol <- function(values, weight, group, probs){
if (anyNA(values)){
message(" ! WARNING: To calculate percentiles there may be no NAs in the value variables.")
return(NULL)
}
collapse::BY(values, g = group, collapse::fquantile, w = weight, probs = probs)
}
#' Get Missings of Combined Grouping Variables
#'
#' @description
#' Calculate the number of missings, which is generated by the combination of all
#' grouping variables.
#'
#' @param group_vars Grouping variables.
#' @param notes Flag if missings should be computed and message should be printed.
#' @param na.rm Flag if NA values are removed or not.
#'
#' @return
#' Returns a message stating the number of missing values based on grouping variables.
#'
#' @noRd
get_group_missings <- function(group_vars, notes, na.rm){
if (na.rm){
return(FALSE)
}
if (!notes){
return(FALSE)
}
# Count combined missings of group variables. To count as missings only
# one of the variables has to be NA in a position.
missings <- collapse::fsum(!stats::complete.cases(group_vars))
# If there are missings calculate percentage and output a message
if (missings > 0){
nobs <- length(group_vars[[1]])
percent <- round(missings * 100 / nobs, 1)
none_miss <- nobs - missings
message(" ~ NOTE: ", format(missings, big.mark = ".", decimal.mark = ","),
" missings generated from grouping variables (", percent,
" %). Number of observations: ", format(none_miss, big.mark = ".", decimal.mark = ","), "/", format(nobs, big.mark = ".", decimal.mark = ","))
}
}
#' Compute Percentages by Group
#'
#' @description
#' Calculate the percentages inside a specific group of variables by generating the
#' super group first and joining the super group totals back to the data frame.
#'
#' @param original_df The original unchanged data frame provided by the user.
#' @param summary_df Already summed up result data frame to build up upon.
#' @param statistics User provided statistics.
#' @param group_vars Grouping variables.
#' @param formats Provided list of variables with formats to apply.
#' @param values he values for which to compute percentages.
#' @param weight Weighting variable.
#' @param list_of_statistics A list of all statistics that can be computed.
#' @param monitor_df Data frame which stores the monitoring values.
#' @param fast_pct Flag that states whether the function can take a shortcut for faster
#' calculation.
#'
#' @return
#' Returns a data frame with added percentages, if the statistic was specified.
#'
#' @noRd
compute_group_percentages <- function(original_df,
summary_df,
statistics,
group_vars,
formats,
values,
weight,
list_of_statistics,
monitor_df,
fast_pct){
if (!"pct_group" %in% statistics){
return(list(summary_df, monitor_df))
}
monitor_df <- monitor_df |> monitor_start("pct_group", paste0("Calc(", paste(group_vars, collapse = " + "), ")"))
# Create pseudo group variable for totals
original_df[[".temp_key"]] <- 1
summary_df[[".temp_key"]] <- 1
# Get rid of the last grouping variable to get the grouping above the
# desired grouping
if (length(group_vars) > 1){
super_group <- group_vars[-length(group_vars)]
}
else{
super_group <- ".temp_key"
}
cut_off_var <- group_vars[length(group_vars)]
# If no fast percentage (means original_df is the full data frame)
if (!fast_pct){
# It is faster to summarise first without formats
# and then apply the formats to a much smaller data frame.
result_list <- original_df |>
matrix_summarise(values,
super_group,
original_df[[weight]],
NULL,
list_of_statistics,
monitor_df)
super_df <- result_list[[1]]
rm(result_list)
# Convert numeric variables back which have become characters during summarisation
# and apply formats
super_df <- super_df |>
convert_numeric(super_group) |>
apply_format(formats, super_group) |>
dropp("sum_wgt")
# Catch new variable names
values <- super_df |> inverse(group_vars)
}
# If fast percentage (means original_df is already summarised)
else{
values <- values[grepl("_sum$", values)]
# Summarise first
super_df <- original_df |>
collapse::fgroup_by(super_group) |>
collapse::fsummarise(across(values, collapse::fsum)) |>
convert_numeric(super_group)
super_df <- super_df |>
apply_format(formats, super_group)
}
# Generate new variable names for super_group totals
new_names <- paste0(values, "_qol")
old_names <- values
# Final summarise with formatted data frame
super_df <- super_df |>
collapse::fgroup_by(super_group) |>
collapse::fsummarise(across(values, collapse::fsum)) |>
collapse::frename(stats::setNames(old_names, new_names)) |>
keep(super_group, new_names)
# Join super_df to summarized data frame
if (is.null(super_group)){
# In case of only totals
joined_df <- merge(summary_df, super_df,
by = ".temp_key",
allow.cartesian = TRUE)
}
else{
# In case there is at least one grouping variable
joined_df <- collapse::join(summary_df, super_df,
on = super_group,
how = "left",
verbose = FALSE)
}
# Return finished data frame
joined_df <- joined_df |>
calculate_percentages(values, "pct_group", cut_off_var) |>
dropp(".temp_key")
monitor_df <- monitor_df |> monitor_end()
list(joined_df, monitor_df)
}
#' Compute Percentages by Group
#'
#' @description
#' Calculate the percentages inside a specific group of variables, which builds upon
#' a data frame which already carries the summarised super groups.
#'
#' @param summary_df Already summed up result data frame to build up upon.
#' @param super_df Already summed up super group results.
#' @param statistics User provided statistics.
#' @param group_vars Grouping variables.
#' @param values he values for which to compute percentages.
#' @param monitor_df Data frame which stores the monitoring values.
#' @param fast_pct Flag that states whether the function can take a shortcut for faster
#' calculation.
#'
#' @return
#' Returns a data frame with added percentages, if the statistic was specified.
#'
#' @noRd
compute_group_percentages_short <- function(summary_df,
super_df,
statistics,
group_vars,
values,
monitor_df,
fast_pct){
if (!"pct_group" %in% statistics){
return(list(summary_df, monitor_df))
}
monitor_df <- monitor_df |> monitor_start("pct_group", paste0("Calc(", paste(group_vars, collapse = " + "), ")"))
# Get rid of the last grouping variable to get the grouping above the
# desired grouping
super_group <- group_vars[-length(group_vars)]
# Generate new variable names for super_group totals
old_names <- values
new_names <- paste0(values, "_qol")
super_df <- super_df |>
keep(super_group, old_names) |>
collapse::frename(stats::setNames(old_names, new_names))
# Join data frames by super group
joined_df <- collapse::join(summary_df, super_df,
on = super_group,
how = "left",
verbose = FALSE)
# Return finished data frame
joined_df <- joined_df |>
calculate_percentages(values, "pct_group", group_vars[length(group_vars)])
monitor_df <- monitor_df |> monitor_end()
list(joined_df, monitor_df)
}
#' Compute Total Percentages
#'
#' @description
#' Calculate the percentages based on the overall totals.
#'
#' @param original_df The original unchanged data frame provided by the user.
#' @param summary_df Already summed up result data frame to build up upon.
#' @param statistics User provided statistics.
#' @param group_vars Grouping variables.
#' @param values he values for which to compute percentages.
#' @param weight Weighting variable.
#' @param list_of_statistics A list of all statistics that can be computed.
#' @param monitor_df Data frame which stores the monitoring values.
#' @param fast_pct Flag that states whether the function can take a shortcut for faster
#' calculation.
#'
#' @return
#' Returns a data frame with added percentages, if the statistic was specified.
#'
#' @noRd
compute_total_percentages <- function(original_df,
summary_df,
statistics,
group_vars,
values,
weight,
list_of_statistics,
monitor_df,
fast_pct){
if (!"pct_total" %in% statistics){
return(list(summary_df, monitor_df))
}
monitor_df <- monitor_df |> monitor_start("pct_total", paste0("Calc(", paste(group_vars, collapse = " + "), ")"))
# Generate new variable names for super_group totals
new_names <- paste0(values, "_qol")
# Compute the data frame with summaries of the super groups
super_df <- original_df |>
collapse::fungroup()
# If no fast percentage (means original_df is the full data frame)
if (!fast_pct){
result_list <- super_df |>
matrix_summarise(values,
NULL,
super_df[[weight]],
NULL,
list_of_statistics,
monitor_df)
# Split results and monitor
super_df <- result_list[[1]]
# Catch new variable names
values <- super_df |>
dropp("sum_wgt") |>
inverse(group_vars)
}
# If fast percentage (means original_df is already summarised)
else{
values <- values[grepl("_sum$", values)]
# Simple Summarise
super_df <- original_df |>
collapse::fsummarise(across(values, collapse::fsum))
}
# Generate new variable names for super_group totals
new_names <- paste0(values, "_qol")
old_names <- values
super_df <- super_df |>
collapse::frename(stats::setNames(old_names, new_names)) |>
keep(new_names)
# Join super_df to summarized data frame
summary_df[[".temp_key"]] <- 1
super_df[[".temp_key"]] <- 1
joined_df <- merge(summary_df, super_df,
by = ".temp_key",
allow.cartesian = TRUE)
# Return finished data frame
joined_df <- joined_df |>
calculate_percentages(values, "pct_total", ".temp_key") |>
dropp(".temp_key")
monitor_df <- monitor_df |> monitor_end()
list(joined_df, monitor_df)
}
#' Compute Total Percentages
#'
#' @description
#' Calculate the percentages based on the overall totals, which builds upon
#' a data frame which already carries the summarised overall totals.
#'
#' @param summary_df Already summed up result data frame to build up upon.
#' @param total_df Already summed up result data frame to build up upon.
#' @param statistics User provided statistics.
#' @param values he values for which to compute percentages.
#' @param last_group_var The last variable of grouping variables.
#' @param pct_name Name of percentages.
#' @param monitor_df Data frame which stores the monitoring values.
#'
#' @return
#' Returns a data frame with added percentages, if the statistic was specified.
#'
#' @noRd
compute_total_percentages_short <- function(summary_df,
total_df,
statistics,
values,
last_group_var,
pct_name = "pct_total",
monitor_df){
if (!pct_name %in% statistics){
return(list(summary_df, monitor_df))
}
monitor_df <- monitor_df |> monitor_start(pct_name, paste0("Calc(", paste(last_group_var, collapse = " + "), ")"))
# Join super_df to summarized data frame
summary_df[[".temp_key"]] <- 1
total_df[[".temp_key"]] <- 1
joined_df <- merge(summary_df, total_df,
by = ".temp_key",
allow.cartesian = TRUE)
# Return finished data frame
if (pct_name == "pct_total"){
joined_df <- joined_df |>
calculate_percentages(values, pct_name, ".temp_key") |>
dropp(".temp_key")
}
else{
# For group percentages with depth 1
joined_df <- joined_df |>
calculate_percentages(values, pct_name, last_group_var) |>
dropp(".temp_key")
}
monitor_df <- monitor_df |> monitor_end()
list(joined_df, monitor_df)
}
#' Compute Percentages
#'
#' @description
#' The main process of calculating percentages.
#'
#' @param joined_df Data frame with joined super group or overall totals.
#' @param values he values for which to compute percentages.
#' @param last_group_var The last variable of grouping variables.
#' @param pct_name Name of percentages.
#'
#' @return
#' Returns a data frame with added percentages.
#'
#' @noRd
calculate_percentages <- function(joined_df, values, pct_name, last_group_var){
# Specify numerators and denominators
numerator <- values
if (!all(values %in% names(joined_df))){
# In case variable names have the statistic extension at the end
numerator <- paste0(values, "_sum")
}
denominator <- paste0(values, "_qol")
new_values <- paste0(gsub("_sum$", "", values), "_", pct_name)
# Compute percentages for every variable
for (i in seq_along(numerator)) {
current_num <- numerator[i]
current_den <- denominator[i]
current_new_var <- new_values[i]
# Evaluate percentage and set division by 0 to NA
result <- joined_df[[current_num]] * 100 / joined_df[[current_den]]
result[is.nan(result)] <- NA
joined_df[[current_new_var]] <- result
}
joined_df |>
dropp(denominator)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.