R/count.R

Defines functions process_single_count_target process_summaries.count_layer

#' @noRd
#' @export
process_summaries.count_layer <- function(x, ...) {

  if (env_get(x, "is_built_nest", default = FALSE)) {
    refresh_nest(x)
  }

  # Subset the local built_target based on where
  # Catch errors
  evalq({
    tryCatch({
      # Check 'kept_levels' and stop if they're not in the target dataset
      #Logic to check for keep_levels
      # If this is not a built nest
      if (!("tplyr_layer" %in% class(env_parent()))) {
        keep_levels_logic <- expr(!is.null(levels_to_keep))
        # If this is a built nest and we're begining to process
      } else if ("tplyr_layer" %in% class(env_parent()) && length(target_var) == 2) {
        keep_levels_logic <- expr(!is.null(levels_to_keep) && quo_is_symbol(target_var[[1]]))
        # If this is a built nest and we are processing the "sub" layers
      } else {
        keep_levels_logic <- expr(FALSE)
      }

      # Check that all values in 'keep levels' are present in the data
      if (eval_tidy(keep_levels_logic)) {
        if (is.factor(target[[as_name(tail(target_var, 1)[[1]])]])) {
          target_levels <- levels(target[[as_name(tail(target_var, 1)[[1]])]])
        } else {
          target_levels <- unique(target[[as_name(tail(target_var, 1)[[1]])]])
        }
        kept_levels_found <- unlist(levels_to_keep) %in% target_levels
        assert_that(all(kept_levels_found),
                    msg = paste0("level passed to `kept_levels` not found: ",
                                 paste0(levels_to_keep[!kept_levels_found],
                                        collapse = "",
                                        sep = " ")))
      }

      # Do this here to make sure that defaults are available everywhere else
      # Downstream
      if (is.null(include_total_row)) include_total_row <- FALSE
      if (is.null(total_row_label)) total_row_label <- "Total"
      if (is.null(include_missing_subjects_row)) include_missing_subjects_row <- FALSE
      if (is.null(missing_subjects_row_label)) missing_subjects_row_label <- "Missing"

      # Save this for the denominator where, but only if it hasn't been saved yet.
      if (is.null(built_target_pre_where)) built_target_pre_where <- built_target

      built_target <- built_target %>%
        filter(!!where) %>%
        filter(!!kept_levels)

      ## Drop levels if target var is factor and kept levels used
      if (eval_tidy(keep_levels_logic) &&
          is.factor(built_target[[as_name(tail(target_var, 1)[[1]])]])) {
        # Pull out the levels that weren't in keep levels.
        target_levels <- levels(built_target[[as_name(tail(target_var, 1)[[1]])]])
        drop_levels_ind <- !(target_levels %in% levels_to_keep)
        drop_these_levels <- target_levels[drop_levels_ind]
        # Use forcats to remove the levels that weren't in the "keep levels"
        built_target <- built_target %>%
          mutate(!!tail(target_var,1)[[1]] := fct_drop(!!tail(target_var,1)[[1]], only = drop_these_levels))
      }

    }, error = function(e) {
      abort(paste0("group_count `where` condition `",
                   as_label(where),
                   "` is invalid. Filter error:\n", e))
    })

    if (!quo_is_symbol(target_var[[1]]) && as_name(target_var[[1]]) %in% names(target)) {
      warning(paste0("The first target variable has been coerced into a symbol.",
                     " You should pass variable names unquoted."), immediate. = TRUE)

      target_var[[1]] <- quo(!!sym(as_name(target_var[[1]])))
    }

    if (length(target_var) == 2 && !quo_is_symbol(target_var[[2]]) &&
        as_name(target_var[[2]]) %in% names(target)) {
      warning(paste0("The second target variable has been coerced into a symbol.",
                     "You should pass variable names unquoted."), immediate. = TRUE)

      target_var[[2]] <- quo(!!sym(as_name(target_var[[2]])))
    }

  }, envir = x)

  rename_missing_values(x)

  # Preprocssing in the case of two target_variables
  if (length(env_get(x, "target_var")) > 2) abort("Only up too two target_variables can be used in a count_layer")

  else if (length(env_get(x, "target_var")) == 2) {

    # Change treat_var to factor so all combinations appear in nest
    factor_treat_var(x)

    # If the nest_sort_index isn't null, reset it
    # This happens if the layer is reloaded
    if (!is.null(env_get(x, "nest_sort_index", default = NULL))) env_bind(x, nest_sort_index = NULL)

    process_nested_count_target(x)

  } else {

    process_count_denoms(x)

    process_single_count_target(x)

  }

  prepare_format_metadata(x)

  # Trigger any derivation of additional statistics
  map(x$stats, process_statistic_data)

  x
}

#' @param x A count layer with a single target_var
#'
#' This function uses dplyr to filter out the where call, pull out the distinct
#' rows if applicable, and tallies the different target_var values.
#'
#' If include_total_row is true a row will be added with a total row labeled
#' with total_row_label.
#'
#' Complete is used to complete the combinations of by, treat_var, and target_var
#'
#' @noRd
process_single_count_target <- function(x) {
  evalq({

    # The current environment should be the layer itself
    process_count_n(current_env())

    if (include_total_row) {
      process_count_total_row(current_env())

      # Used to temporarily check formats
      if (is.null(format_strings)) tmp_fmt <- gather_defaults.count_layer(current_env())
      if (count_missings && !(is.null(denom_ignore) || length(denom_ignore) == 0) &&
          (("pct" %in% total_count_format$vars || "distinct_pct" %in% total_count_format$vars) ||
           # Logic if no total_count format
           (is.null(total_count_format) && is.null(format_strings) && ("pct" %in% tmp_fmt$n_counts$vars || "distinct_pct" %in% tmp_fmt$n_counts$vars)) ||
           (is.null(total_count_format) && ("pct" %in% count_fmt$n_counts$vars || "distinct_pct" %in% count_fmt$n_counts$vars))
          )
      ) {
        warning("Your total row is ignoring certain values. The 'pct' in this row may not be 100%",
                immediate. = TRUE)
      }
    }

    if (include_missing_subjects_row) {
      process_missing_subjects_row(current_env())
    }

    if (is.null(count_row_prefix)) count_row_prefix <- ""

    # If a denoms variable is factor then it should be character for the denoms calculations
    denoms_df_prep <- denoms_df %>%
      mutate(
        across(dplyr::where(is.factor), ~as.character(.))
      )

    # Nested counts might have summary var come through as numeric
    if ('summary_var' %in% map_chr(denoms_by, as_name) && is.numeric(denoms_df_prep$summary_var)) {
      denoms_df_prep$summary_var <- as.character(denoms_df_prep$summary_var)
    }

    # But if a summary_stat variable is factor, then the denoms needs to match this
    # This happens if sorting was triggered for the variable as a factor
    # fct_cols will be a named logical vector of the variable names, where TRUE
    # is the summary_stat variables that are factors
    fct_cols <- map_lgl(summary_stat, is.factor)

    if (any(fct_cols)) {
      # From the bool vector of fct_cols, grab the names of the ones that
      # are TRUE
      # Create a regular expression like var1|var2|var3
      fct_cols_ns <- paste(names(fct_cols[fct_cols]), collapse="|")

      # Reset each factor variable to have the appropriate levels for the denom
      # so that 0 filling can happen appropriately later on
      denoms_df_prep <- denoms_df_prep %>%
        mutate(
          across(matches(fct_cols_ns), ~ factor(., levels=levels(summary_stat[[cur_column()]])))
        )

      rm(fct_cols_ns)
    }

    # rbind tables together
    numeric_data <- bind_rows(summary_stat, total_stat, missing_subjects_stat) %>%
      rename("summary_var" = !!target_var[[1]]) %>%
      group_by(!!!denoms_by) %>%
      do(get_denom_total(., denoms_by, denoms_df_prep, "n")) %>%
      mutate(summary_var = prefix_count_row(summary_var, count_row_prefix)) %>%
      ungroup()

    rm(denoms_df_prep, fct_cols)

  }, envir = x)
}

#' Process the n count data and put into summary_stat
#'
#' @param x Count layer
#' @noRd
process_count_n <- function(x) {

  evalq({

    if (is.null(denoms_by)) denoms_by <- c(treat_var, cols)
    denoms_by_ <- map(denoms_by, function(x) {
      if (as_name(x) == "summary_var") quo(!!target_var[[1]])
      else x
    })

    summary_stat <- built_target %>%
      mutate(
        across(
          .cols = any_of(map_chr(c(denoms_by, target_var, by), ~as_name(.))),
          .fns = function(x) if (is.factor(x)) x else as.factor(x)
        )
      ) %>%
      # Group by variables including target variables and count them
      group_by(!!treat_var, !!!by, !!!target_var, !!!cols) %>%
      summarize(
        n = n(),
        distinct_n = n_distinct(!!!distinct_by, !!treat_var, !!!target_var)
      ) %>%
      mutate(
        n = as.double(n),
        distinct_n = as.double(distinct_n)
      ) %>%
      ungroup()

    # If there is a missing_count_string, but its not in the dataset
    if (!is.null(missing_count_string) &&

        !((any(unname(unlist(missing_count_list)) %in% unique(built_target[, as_name(target_var[[1]])]))) ||
          any(is.na(built_target[, as_name(target_var[[1]])])))) {
      # This adds the missing string as a factor to the tallies. This is needed
      # to make sure the missing row is added even if there are no missing values.
      summary_stat <- summary_stat %>%
        mutate(!!target_var[[1]] := fct_expand(.data[[as_name(target_var[[1]])]],
                                               names(missing_count_list)))
    }

    # Need to mark this for nested counts
    if (!exists('outer_')) outer_ <- FALSE

    complete_levels <- summary_stat %>%
      complete_and_limit(treat_var, by, cols, target_var, limit_data_by,
                         .fill = list(n = 0, total = 0, distinct_n = 0, distinct_total = 0),
                         outer=outer_)

    summary_stat <- complete_levels %>%
      # Change the treat_var and first target_var to characters to resolve any
      # issues if there are total rows and the original column is numeric
      mutate(!!treat_var := as.character(!!treat_var)) %>%
      mutate(!!as_name(target_var[[1]]) := as.character(!!target_var[[1]])) %>%
      group_by(!!!denoms_by_) %>%
      ungroup()

    rm(denoms_by_)
    # If there is no values in summary_stat, which can happen depending on where. Return nothing
    if (nrow(summary_stat) == 0) return()
  }, envir = x)

}


#' Get Logical vector that is used to remove the treat_var and cols
#'
#' In total row and missing subject counts, denoms_by needs to be stripped of
#' cols and treat_var variables, otherwise it will error out in the group_by
#'
#' @param denoms_by The layer denoms by
#' @param treat_var table treat var
#' @param cols tables cols vars
#'
#' @return list of quosures
#' @noRd
get_needed_denoms_by <- function(denoms_by, treat_var, cols) {
  map_lgl(denoms_by, function(x, treat_var, cols) {
    all(as_name(x) != as_name(treat_var),
        as_name(x) != map_chr(cols, as_name))
  }, treat_var, cols)
}

#' Process the amounts for a total row
#'
#' @param x A Count layer
#' @noRd
process_count_total_row <- function(x) {
  evalq({

    # Check if denoms_by wasn't passed and by was passed.
    if (exists("include_total_row") && include_total_row &&
        identical(denoms_by, c(treat_var, cols)) && any(map_lgl(by, quo_is_symbol)) > 0) {
      warning("A total row was added in addition to non-text by variables, but
no denoms_by variable was set. This may cause unexpected results. If you wish to
change this behavior, use `set_denoms_by()`.", immediate. = TRUE)
    }

    # Logical vector that is used to remove the treat_var and cols
    needed_denoms_by <- get_needed_denoms_by(denoms_by, treat_var, cols)

    #Create an expression to evaluate filter
    if (!count_missings) {
      filter_logic <- expr(!(!!target_var[[1]] %in% names(missing_count_list)))
    } else {
      filter_logic <- expr(TRUE)
    }

    # create a data.frame to create total counts
    total_stat <- summary_stat %>%
      #Filter out any ignored denoms
      filter(!!filter_logic) %>%
      # Use distinct if this is a distinct total row
      # Group by all column variables
      group_by(!!treat_var, !!!cols, !!!denoms_by[needed_denoms_by]) %>%
      summarize(
        n = sum(n),
        distinct_n = sum(distinct_n)
      ) %>%
      ungroup() %>%
      # Create a variable to label the totals when it is merged in.
      mutate(!!as_name(target_var[[1]]) := total_row_label) %>%
      # Create variables to carry forward 'by'. Only pull out the ones that
      # aren't symbols
      group_by(!!!extract_character_from_quo(by)) %>%
      # ungroup right away to make sure the complete works
      ungroup()
  }, envir = x)
}

#' Process the amounts for a missing subjects row
#'
#' @param x A Count layer
#' @noRd
process_missing_subjects_row <- function(x) {
  evalq({

    # Logical vector that is used to remove the treat_var and cols
    needed_denoms_by <- get_needed_denoms_by(denoms_by, treat_var, cols)

    # Create the merge variables to join the header_n data
    mrg_vars <- map_chr(c(pop_treat_var, cols, denoms_by[needed_denoms_by]), as_name)
    names(mrg_vars)[1] <- as_name(treat_var)
    # create a data.frame to create total counts
    missing_subjects_stat <- built_target %>%
      # Use distinct if this is a distinct total row
      # Group by all column variables
      distinct(!!treat_var, !!!cols, !!!by, !!!distinct_by) %>%
      ungroup() %>%
      count(!!treat_var, !!!cols, !!!by, name="n_present") %>%
      # complete based on missing groupings
      complete(!!treat_var, !!!cols, !!!by, fill = list(n_present = 0)) %>%
      left_join(
        header_n %>% rename(header_tots = n), by = mrg_vars
      ) %>%
      # Create a variable to label the totals when it is merged in.
      mutate(
        !!as_name(target_var[[1]]) := missing_subjects_row_label,
        distinct_n = header_tots - n_present
      ) %>%
      # Create variables to carry forward 'by'. Only pull out the ones that
      # aren't symbols
      group_by(!!!extract_character_from_quo(by)) %>%
      # ungroup right away to make sure the complete works
      ungroup() %>%
      select(-c(n_present, header_tots))
  }, envir = x)
}

#' Prepare metadata for table
#'
#' @param x count_layer object
#' @noRd
prepare_format_metadata.count_layer <- function(x) {
  evalq({

    # Get formatting metadata prepared
    if (is.null(format_strings)) {
      format_strings <- gather_defaults(environment())
    } else if (!'n_counts' %in% names(format_strings)) {
      format_strings[['n_counts']] <- gather_defaults(environment())[['n_counts']]
    }


    # If there is both n & distinct, or pct and distinct_pct there has to be a
    # distinct_by
    # If both distinct and n
    if (((("distinct_n" %in% map(format_strings$n_counts$vars, as_name) &
           "n" %in% map(format_strings$n_counts$vars, as_name)) |
          # or both distinct_pct and pct
          ("distinct_pct" %in% map(format_strings$n_counts$vars, as_name) &
           "pct" %in% map(format_strings$n_counts$vars, as_name))) &
         # AND distinct_by is null
         is.null(distinct_by))) {
      stop("You can't use distinct and non-distinct parameters without specifying a distinct_by")
    }

    # If distinct_by isn't there, change distinct and distinct_pct
    if (is.null(distinct_by) & "distinct_n" %in% map(format_strings$n_counts$vars, as_name)) {
      distinct_ind <- which(map(format_strings$n_counts$vars, as_name) %in% "distinct_n")
      format_strings$n_counts$vars[[distinct_ind]] <- expr(n)
    }
    if (is.null(distinct_by) & "distinct_pct" %in% map(format_strings$n_counts$vars, as_name)) {
      distinct_ind <- which(map(format_strings$n_counts$vars, as_name) %in% "distinct_pct")
      format_strings$n_counts$vars[[distinct_ind]] <- expr(pct)
    }

    # Pull max character length from counts. Should be at least 1
    n_width <- max(c(nchar(numeric_data$n), 1L), na.rm = TRUE)

    # If a layer_width flag is present, edit the formatting string to display the maximum
    # character length
    if (str_detect(format_strings[['n_counts']]$format_string, "a|A")) {
      # Replace 'a' with appropriate 'x'
      replaced_string <- str_replace(format_strings[['n_counts']]$format_string, "a",
                                     paste(rep("x", n_width), collapse = ""))
      # Replace 'A' with appropriate 'X'
      replaced_string <- str_replace(replaced_string, "A",
                                     paste(rep("X", n_width), collapse = ""))

      # Make a new f_str and replace the old one
      format_strings[['n_counts']] <- f_str(replaced_string, !!!format_strings$n_counts$vars)
    }
    max_length <- format_strings[['n_counts']]$size
  }, envir = x)
}

#' @noRd
#' @export
process_formatting.count_layer <- function(x, ...) {
  evalq({

    # Calculate the indentation length. This is needed if there are missing
    #values in a nested count layer. Length is sent to string construction and
    #used to split the string.
    indentation_length <- ifelse(is.null(indentation), 0, nchar(encodeString(indentation)))

    formatted_data <- numeric_data %>%
      filter_numeric(numeric_cutoff,
                     numeric_cutoff_stat,
                     numeric_cutoff_column,
                     treat_var) %>%
      # Mutate value based on if there is a distinct_by
      mutate(n = {
        construct_count_string(.n = n, .total = total,
                               .distinct_n = distinct_n,
                               .distinct_total = distinct_total,
                               count_fmt = format_strings[['n_counts']],
                               max_layer_length = max_layer_length,
                               max_n_width = max_n_width,
                               missing_string = missing_string,
                               missing_f_str = missing_count_string,
                               summary_var = summary_var,
                               indentation_length = indentation_length,
                               total_count_format = total_count_format,
                               missing_subjects_count_format = missing_subjects_count_format,
                               total_row_label = total_row_label,
                               missing_subjects_row_label = missing_subjects_row_label,
                               has_missing_count = has_missing_count)
      }) %>%
      # Pivot table
      pivot_wider(id_cols = c(match_exact(by), "summary_var"),
                  names_from = c(!!treat_var, match_exact(cols)), values_from = n,
                  names_prefix = "var1_") %>%
      # Replace the by variables and target variable names with `row_label<n>`
      replace_by_string_names(quos(!!!by, summary_var))

    if (is_built_nest) {
      # I had trouble doing this in a 'tidy' way so I just did it here.
      # First column is always the outer target variable.
      # Last row label is always the inner target variable
      row_labels <- vars_select(names(formatted_data), starts_with("row_label"))
      # Replace the missing 'outer' with the original target
      # The indexing looks weird but the idea is to get rid of the matrix with the '[, 1]'
      formatted_data[is.na(formatted_data[[1]]), 1] <- formatted_data[is.na(formatted_data[[1]]),
                                                                      tail(row_labels, 1)]
    }

    if (!is_empty(stats)) {
      # Process the statistical data formatting
      formatted_stats_data <- map(stats, process_statistic_formatting) %>%
        reduce(full_join, by = c('summary_var', match_exact(c(by, head(target_var, -1))))) %>%
        # Replace the by variables and target variable names with `row_label<n>`
        replace_by_string_names(quos(!!!by, summary_var))

      formatted_data <- full_join(formatted_data, formatted_stats_data,
                                  by = vars_select(names(formatted_data), starts_with("row_label")))
    }

    # Attach the row identifier
    formatted_data <- assign_row_id(formatted_data, 'c')
  }, envir = x)

  add_order_columns(x)

  env_get(x, "formatted_data")

}

#' Format n counts for display in count_layer
#'
#' left padding = (maximum_n_width - this_n_width)
#' right padding = (maximum_layer_width - this_layer_width[after left padding])
#'
#' @param .n Vector of counts for each cell
#' @param .total  Vector of totals. Should be the same length as .n and be the
#'   denominator that column is based off of.
#' @param count_fmt The f_str object the strings are formatted around.
#' @param max_layer_length The maximum layer length of the whole table
#' @param max_n_width The maximum length of the actual numeric counts
#' @param .distinct_n Vector of distinct counts
#' @param .distinct_total Vector of total counts for distinct
#' @param missing_string The value of the string used to note missing. Usually NA
#' @param missing_f_str The f_str object used to display missing values
#' @param summary_var The summary_var values that contain the values of the
#'   target variable.
#' @param indentation_length If this is a nested count layer. The row prefixes
#'   must be removed
#' @param total_count_format f_str for total counts
#' @param missing_subjects_count_format f_str for missing subjects
#' @param total_row_label Label string for total rows
#' @param missing_subjects_row_label Label string for missing subjects
#' @param has_missing_count Boolean for if missing counts are present
#'
#' @return A tibble replacing the original counts
#' @noRd
construct_count_string <- function(.n, .total, .distinct_n = NULL, .distinct_total = NULL,
                                   count_fmt = NULL, max_layer_length, max_n_width, missing_string,
                                   missing_f_str, summary_var, indentation_length, total_count_format,
                                   missing_subjects_count_format, total_row_label, missing_subjects_row_label,
                                   has_missing_count) {

  ## Added this for processing formatting in nested count layers where this won't be processed yet
  if (is.null(max_layer_length)) max_layer_length <- 0
  if (is.null(max_n_width)) max_n_width <- 0
  missing_rows <- FALSE
  total_rows <- FALSE
  missing_subject_rows <- FALSE

  # Add in the missing format if its null and there are missing counts
  if (has_missing_count && is.null(missing_f_str)) {
    missing_f_str <- count_fmt
  }

  if (!is.null(missing_f_str)) {

    # This subsets the indentation length for nested count layers. The 'outer'
    # values will be cut off but they will never be "missing" so that shouldn't
    # be an issue.
    summary_var <- str_sub(summary_var, indentation_length)

    missing_rows <- summary_var %in% missing_string
    missing_vars_ord <- map_chr(missing_f_str$vars, as_name)
  }

  ## Pull out string information for total rows
  if (!is.null(total_count_format)) {
    total_rows <- summary_var %in% total_row_label
    total_vars_ord <- map_chr(total_count_format$vars, as_name)
  }

  ## Pull out string information for missing subject rows
  if (!is.null(missing_subjects_count_format)) {
    missing_subject_rows <- summary_var %in% missing_subjects_row_label
    missing_subject_vars_ord <- map_chr(missing_subjects_count_format$vars, as_name)
  }

  vars_ord <- map_chr(count_fmt$vars, as_name)

  # str_all is a list that contains character vectors for each parameter that might be calculated
  str_all <- vector("list", 5)
  # Append the repl_str to be passed to do.call
  str_all[1] <- count_fmt$repl_str
  # Iterate over every variable
  rows_ <- !missing_rows & !total_rows & !missing_subject_rows
  for (i in seq_along(vars_ord)) {
    str_all[[i + 1]] <-  count_string_switch_help(vars_ord[i], count_fmt,
                                                  .n[rows_],
                                                  .total[rows_],
                                                  .distinct_n[rows_],
                                                  .distinct_total[rows_],
                                                  vars_ord)
  }

  # Logic for missing
  # Same logic as above, just add for missing
  missing_str_all <- vector("list", 5)
  missing_str_all[1] <- missing_f_str$repl_str
  for (i in seq_along(missing_vars_ord)) {
    missing_str_all[[i + 1]] <- count_string_switch_help(missing_vars_ord[i],
                                                         missing_f_str,
                                                         .n[missing_rows],
                                                         .total[missing_rows],
                                                         .distinct_n[missing_rows],
                                                         .distinct_total[missing_rows],
                                                         missing_vars_ord)
  }

  total_str_all <- vector("list", 5)
  total_str_all[1] <- total_count_format$repl_str
  for (i in seq_along(total_vars_ord)) {
    total_str_all[[i + 1]] <- count_string_switch_help(total_vars_ord[i],
                                                       total_count_format,
                                                       .n[total_rows],
                                                       .total[total_rows],
                                                       .distinct_n[total_rows],
                                                       .distinct_total[total_rows],
                                                       total_vars_ord)
  }

  missing_subs_str_all <- vector("list", 5)
  missing_subs_str_all[1] <- missing_subjects_count_format$repl_str
  for (i in seq_along(missing_subject_vars_ord)) {
    missing_subs_str_all[[i + 1]] <- count_string_switch_help(missing_subject_vars_ord[i],
                                                              missing_subjects_count_format,
                                                              .n[missing_subject_rows],
                                                              .total[missing_subject_rows],
                                                              .distinct_n[missing_subject_rows],
                                                              .distinct_total[missing_subject_rows],
                                                              missing_subject_vars_ord)
  }

  # Put the vector strings together. Only include parts of str_all that aren't null
  # nm is non-missing, m is missing, t is total, ms is missing subjects
  string_nm <- do.call(sprintf, str_all[!map_lgl(str_all, is.null)])
  if (!is.null(missing_vars_ord)) string_m <-  do.call(sprintf, missing_str_all[!map_lgl(missing_str_all, is.null)])
  if (!is.null(total_vars_ord)) string_t <- do.call(sprintf, total_str_all[!map_lgl(total_str_all, is.null)])
  if (!is.null(missing_subject_vars_ord)) string_ms <- do.call(sprintf, missing_subs_str_all[!map_lgl(missing_subs_str_all, is.null)])
  # string_ is the final string to return. Merge the missing, non-missing, and others together
  string_ <- character(sum(length(string_nm), length(string_m), length(string_t), length(string_ms)))
  string_[rows_] <- string_nm
  string_[total_rows] <-   string_t
  string_[missing_rows] <-  string_m
  string_[missing_subject_rows] <-  string_ms
  # Left pad set to 0 meaning it won't pad to the left at all
  # right pad is set to the maximum n count in the table
  string_ <- pad_formatted_data(string_, 0, max_n_width)

  string_
}

#' Switch statement used in processing
#'
#' @param x Current parameter to format
#' @param count_fmt f_str object used to format
#' @param .n values used in 'n'
#' @param .total values used in pct calculations
#' @param .distinct_n values used in 'distinct_n'
#' @param vars_ord values used in distinct pct
#'
#' @noRd
count_string_switch_help <- function(x, count_fmt, .n, .total,
                                     .distinct_n, .distinct_total, vars_ord){

  switch(x,
         "n" = map_chr(.n, num_fmt, which(vars_ord == "n"), fmt = count_fmt),
         "pct" = {
           # Makea vector of ratios between n and total. Replace na values with 0
           pcts <- replace(.n/.total, is.na(.n/.total), 0)
           # Make a vector of percentages
           map_chr(pcts*100, num_fmt, which(vars_ord == "pct"), fmt = count_fmt)
         },
         "distinct_n" =  map_chr(.distinct_n, num_fmt, which(vars_ord == "distinct_n"), fmt = count_fmt),
         "distinct_pct" = {
           # Same as pct
           pcts <- replace(.distinct_n/.distinct_total, is.na(.distinct_n/.distinct_total), 0)

           map_chr(pcts*100, num_fmt, which(vars_ord == "distinct_pct"), fmt = count_fmt)
         },
         "total" = {
           map_chr(.total, num_fmt, which(vars_ord == "total"), fmt = count_fmt)
         },
         "distinct_total" = {
           map_chr(.distinct_total, num_fmt, which(vars_ord == "distinct_total"), fmt = count_fmt)
         }
  )


}

#' @param x Count Layer
#'
#' When nesting a count layer in some cases a treatment group will not apear in one of the
#' groups so this will turn the variable into a factor to force it to complete in the
#' complete logic
#'
#' @noRd
factor_treat_var <- function(x) {
  evalq({

    built_target[, as_name(treat_var)] <- as.factor(unlist(built_target[, as_name(treat_var)]))

  }, envir = env_parent(x))
}


#' Prefix a row with a specifed character
#'
#' @param row_i The row to prefix
#' @param count_row_prefix The prefix
#'
#' @return The modified row
#' @noRd
prefix_count_row <- function(row_i, count_row_prefix) {

  paste0(count_row_prefix, row_i)

}

#' @noRd
process_count_denoms <- function(x) {

  evalq({

    # This used in case there is a character passed to the layer
    layer_params <- c(target_var, treat_var, by, cols)
    # Logical vector indicating if the param appears in the target dataset.
    param_apears <- map_lgl(layer_params, function(x) {
      as_name(x) %in% names(target)
    })

    # Raise errors if a denom is ignored but there isn't a missing count string
    if (!is.null(denom_ignore) && is.null(missing_count_string)) {
      abort("A value(s) were set with 'denom_ignore' but no missing count was set. Your percentages/totals may not have meaning.")
    }

    # Logic to determine how to subset target for denominators.
    if (is.null(denom_where)) {
      # If a pop_data was passed change the denom_where to the pop_data_where
      if (!isTRUE(try(identical(pop_data, target)))) {
        denom_where <- quo(TRUE)
      } else {
        # Otherwise make denom_where equal to table where
        denom_where <- where
      }
    }

    # Because the missing strings haven't replaced the missing strings, it has to happen here.
    # Expand denoms contains the
    if (!is.null(missing_count_list)) {
      expand_denoms <- names(missing_count_list) %in% unlist(denom_ignore)
      denom_ignore <- c(denom_ignore, unname(missing_count_list[expand_denoms]))
    }


    # Subset the local built_target based on where
    # Catch errors
    tryCatch({
      denom_target <- built_target_pre_where %>%
        filter(!!denom_where) %>%
        filter(!(!!target_var[[1]] %in% unlist(denom_ignore)))
    }, error = function(e) {
      abort(paste0("group_count `where` condition `",
                   as_label(denom_where),
                   "` is invalid. Filter error:\n", e))
    })

    # For distinct counts, we want to defer back to the
    # population dataset. Trigger this by identifying that
    # the population dataset was overridden
    if (!isTRUE(try(identical(pop_data, target)))) {
      # If the denom_where doesn't match the where AND the denom_where isn't true
      # than the user passed a custom denom_where
      if (deparse(denom_where) != deparse(where) && !isTRUE(quo_get_expr(denom_where))) {
        warning(paste0("A `denom_where` has been set with a pop_data. The `denom_where` has been ignored.",
                       "You should use `set_pop_where` instead of `set_denom_where`.", sep = "\n"),
                immediate. = TRUE)
      }
    }

    denoms_df_n <- denom_target %>%
      group_by(!!!layer_params[param_apears]) %>%
      summarize(
        n = n()
      ) %>%
      ungroup()

    # If user specified treatment var as a denom by then remove it
    # and if inside a nested layer, rename summary_var in the denoms_by
    # for building this table
    if (is.null(denoms_by)) denoms_by <- c(treat_var, cols)
    dist_grp <- denoms_by
    which_is_treatvar <- which(
      map_lgl(denoms_by, ~ as_name(.) %in% c(as_name(pop_treat_var), as_name(treat_var)))
    )
    if (length(which_is_treatvar) > 0) {
      dist_grp <- dist_grp[-which_is_treatvar]
    }
    is_svar <- map_lgl(dist_grp, ~as_name(.) == "summary_var")
    if (any(is_svar)) {
      dist_grp[[which(is_svar)]] <- layer_params[[1]]
    }

    denoms_df_dist <- built_pop_data %>%
      filter(!!denom_where) %>%
      group_by(!!pop_treat_var, !!!dist_grp) %>%
      summarize(
        distinct_n = n_distinct(!!!distinct_by, !!pop_treat_var)
      ) %>%
      ungroup()

    # Create merge variables to get the denoms dataframe merged correctly
    by_join <- map_chr(append(dist_grp, pop_treat_var, after=0), as_name)
    names(by_join) <- map_chr(append(dist_grp, treat_var, after=0), as_name)


    denoms_df <- denoms_df_n %>%
      left_join(denoms_df_dist, by = by_join)

    if (as_name(target_var[[1]]) %in% names(target)) {
      denoms_df <- denoms_df %>%
        rename("summary_var" := !!target_var[[1]])
    }

    rm(by_join, denoms_df_n, denoms_df_dist, dist_grp, is_svar, which_is_treatvar)

  }, envir = x)

}

rename_missing_values <- function(x) {
  evalq({
    # Rename missing values
    if (!is.null(missing_count_list)) {
      missing_count_list_ <- missing_count_list
      # If the target variable isn't a character or a factor. Coerse it as a
      # character. This can happen if the target var is numeric
      if (!(class(built_target[, as_name(target_var[[1]])][[1]]) %in% c("factor", "character"))) {
        built_target <- built_target %>%
          mutate(!!target_var[[1]] := as.character(!!target_var[[1]]))
      }
      # Collapse the factors that were missing.
      for (i in seq_along(missing_count_list)) {

        # Logic if the missing_count_list contains an implicit NA
        if (any(is.nan(missing_count_list[[i]]))) {
          ## Repalce the NA in the missing_count list with an explicit value
          missing_count_list_[[i]] <- ifelse(missing_count_list[[i]] == "NaN", "(Missing_NAN)", as.character(missing_count_list[[i]]))
          # Replace the implicit values in built_target
          built_target <- built_target %>%
            mutate(!!target_var[[1]] := fct_expand(!!target_var[[1]], "(Missing_NAN)")) %>%
            mutate(!!target_var[[1]] := ifelse(is.nan(!!target_var[[1]]), "(Missing_NAN)", as.character(!!target_var[[1]])))

        } else if (any(is.na(missing_count_list[[i]]))) {
          ## Repalce the NA in the missing_count list with an explicit value
          missing_count_list_[[i]] <- ifelse(is.na(as.character(missing_count_list[[i]])) , "(Missing)", as.character(missing_count_list[[i]]))
          # Replace the implicit values in built_target
          built_target <- built_target %>%
            mutate(!!target_var[[1]] := fct_expand(!!target_var[[1]], "(Missing)")) %>%
            mutate(!!target_var[[1]] := fct_na_value_to_level(!!target_var[[1]], level="(Missing)"))

        }
        built_target <- built_target %>%
          mutate(
            # Warnings suppressed here. They can happen if something is called missing
            # That isn't in the data, that isn't something to warn about in this context
            !!target_var[[1]] := suppressWarnings(fct_collapse(!!target_var[[1]], !!names(missing_count_list)[i] := missing_count_list_[[i]]))
          )
      }
    }
  }, envir = x)
}

filter_numeric <- function(.data,
                           numeric_cutoff,
                           numeric_cutoff_stat,
                           numeric_cutoff_column,
                           treat_var,
                           by = NULL) {

  if (is.null(numeric_cutoff)) {
    return(.data)
  }

  vals <- .data %>%
    {if (is.null(numeric_cutoff_column)) . else filter(., !!treat_var == numeric_cutoff_column)} %>%
    mutate(
      pct = n/total,
      distinct_pct = distinct_n/distinct_total
    ) %>%
    filter(!!sym(numeric_cutoff_stat) >= !!numeric_cutoff) %>%
    extract2("summary_var")

  .data %>%
    filter(summary_var %in% vals)


}

Try the Tplyr package in your browser

Any scripts or data that you put into this service are public.

Tplyr documentation built on May 29, 2024, 10:37 a.m.