R/format.R
In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

Documented in format.ggcomparisons format.ggeffects

#' @param variable_labels Logical, if `TRUE` variable labels are used as column
#' headers. If `FALSE`, variable names are used.
#' @param value_labels Logical, if `TRUE`, value labels are used as values in
#' the table output. If `FALSE`, the numeric values or factor levels are used.
#' @param row_header_separator Character, separator between the different
#' subgroups in the table output.
#' @param collapse_ci Logical, if `TRUE`, the columns with predicted values and
#' confidence intervals are collapsed into one column, e.g. `Predicted (95% CI)`.
#' @param collapse_p Logical, if `TRUE`, the columns with predicted values and
#' p-values are collapsed into one column, where significant p-values are
#' indicated as asterisks.
#' @param combine_levels Logical, if `TRUE`, the levels of the first comparison
#' of each focal term against the second are combined into one column. This is
#' useful when comparing multiple focal terms, e.g. `education = low-high` and
#' `gender = male-female` are combined into `first = low-male` and
#' `second = high-female`.
#' @param n Number of rows to print per subgroup. If `NULL`, a default number
#' of rows is printed, depending on the number of subgroups.
#' @param collapse_tables Logical, if `TRUE`, all tables are combined into one.
#' The tables are not split by further focal terms, but rather are added as
#' columns. Only works when there is more than one focal term.
#'
#' @rdname print
#' @export
format.ggeffects <- function(x,
                             variable_labels = FALSE,
                             value_labels = FALSE,
                             group_name = FALSE,
                             row_header_separator = ", ",
                             digits = 2,
                             collapse_ci = FALSE,
                             collapse_tables = FALSE,
                             n,
                             ...) {
  # we need to determine how many rows to print. this requires the original
  # data frame including attributes, that's why this code comes first
  nrow_to_print <- .nrows_to_print(x, n)
  focal_terms <- attributes(x)$terms
  ci_level <- attributes(x)$ci.lvl
  response_name <- attributes(x)$response.name
  # default name, used later
  if (is.null(response_name)) {
    response_name <- "Response Level"
  }

  # fix terms for survival models
  a1 <- attr(x, "fitfun", exact = TRUE)
  a2 <- attr(x, "y.title", exact = TRUE)

  if (!is.null(a1) && !is.null(a2) && a1 == "coxph" && a2 != "Risk Score" && !"time" %in% focal_terms) {
    focal_terms <- c("time", focal_terms)
  }

  # format confidence intervals
  dots <- list(...)
  if (is.null(dots$ci_brackets)) {
    dots$ci_brackets <- getOption("ggeffects_ci_brackets", c("", ""))
  }

  # set default for collapse_ci and collapse_tables
  collapse_ci <- getOption("ggeffects_collapse_ci", collapse_ci)
  collapse_tables <- getOption("ggeffects_collapse_tables", collapse_tables)

  # use value labels as values for focal term
  if (isTRUE(value_labels)) {
    labs <- get_x_labels(x, case = NULL)
    vals <- x$x

    if (!is.null(labs)) {
      x$x <- format(labs)
      labs <- format(sprintf("[%g]", vals))
      x$x <- paste(labs, x$x)
    }
  }

  # use variable labels or variable names as column header?
  if (isTRUE(variable_labels)) {
    x_label <- attributes(x)$x.title
    predicted_label <- attributes(x)$title
  } else {
    x_label <- focal_terms[1]
    predicted_label <- "Predicted"
  }

  # check if we have groups, facets, panels or response levels
  has_groups <- .obj_has_name(x, "group") && length(unique(x$group)) > 1
  has_facets <- .obj_has_name(x, "facet") && length(unique(x$facet)) > 1
  has_panel <- .obj_has_name(x, "panel") && length(unique(x$panel)) > 1
  has_response <- .obj_has_name(x, "response.level") && length(unique(x$response.level)) > 1

  # check which columns we have - we want to sort by "subgroups"
  sort_columns <- c("response.level", "group", "facet", "panel")[c(has_response, has_groups, has_facets, has_panel)]

  # response needs to be factor
  if (has_response) {
    x$response.level <- factor(x$response.level, levels = unique(x$response.level))
  }

  if (length(sort_columns)) {
    insight::check_if_installed("datawizard")
    x <- datawizard::data_arrange(x, sort_columns)
  }

  # format column names, to make it work with insight-formatting-functions
  x$CI <- ifelse(is.null(ci_level), 95, ci_level)
  colnames(x)[colnames(x) == "conf.low"] <- "CI_low"
  colnames(x)[colnames(x) == "conf.high"] <- "CI_high"

  # sanity check - if we don't have CI columns, remove CI level as well
  if (!any(c("CI_low", "CI_high") %in% colnames(x))) {
    x$CI <- NULL
  }

  # round numeric values and format data frame
  x$x <- insight::format_value(
    x$x,
    digits = digits,
    protect_integers = TRUE,
    ...
  )

  ft_args <- list(
    x,
    zap_small = TRUE,
    digits = digits,
    ci_digits = digits
  )
  x <- do.call(insight::format_table, c(ft_args, dots))

  x$std.error <- NULL
  row_header_labels <- NULL

  # when we have multiple focal terms, we create proper subheadings here
  if (length(sort_columns)) {
    # add variable name to group levels?
    if (isTRUE(group_name) && isFALSE(collapse_tables)) {
      for (i in sort_columns) {
        prefix <- switch(
          i,
          response.level = response_name,
          group = ifelse(length(focal_terms) > 1, focal_terms[2], ""),
          facet = ifelse(length(focal_terms) > 2, focal_terms[3], ""),
          panel = ifelse(length(focal_terms) > 3, focal_terms[4], ""),
          ""
        )
        x[[i]] <- paste(prefix, x[[i]], sep = ": ")
      }
    }

    # create labels, based on values from the different sort-columns ("subgroups")
    row_header_labels <- apply(x[sort_columns], 1, paste, collapse = row_header_separator)
  }

  colnames(x)[1] <- x_label
  colnames(x)[2] <- predicted_label

  x$groups <- row_header_labels

  # clean-up for subset data frames, e.g. ggeffects[c(1:2, 4:6)]
  invalid_rows <- !nzchar(x$Predicted)
  if (any(invalid_rows)) {
    x <- x[!invalid_rows, ]
  }

  # split by groups
  if (is.null(x$groups) || length(unique(x$groups)) <= 1) {
    x <- x[.get_sample_rows(x, n = nrow_to_print), , drop = FALSE]
  } else {
    # coerce to factor, so that `split()` preserves correct order
    f <- factor(x$groups, levels = unique(x$groups))
    # split by groups, apply row selection (filtering), and combine data frame
    tmp <- lapply(split(x, f), function(i) {
      i[.get_sample_rows(i, n = nrow_to_print), , drop = FALSE]
    })
    # create data frame w/o rownames
    x <- as.data.frame(do.call(rbind, tmp))
    # if user wants just one table, we need to preserve the group, facet and
    # panel columns, but rename those to the name of the respective focal terms
    if (isTRUE(collapse_tables)) {
      insight::check_if_installed("datawizard")
      # first focal term is main term, we don't want to touch it here
      focal_terms <- focal_terms[-1]
      # if we have ordinal models and alike, we have a response level, which
      # we need to include as first value of "focal_terms"
      if (has_response) {
        focal_terms <- c(response_name, focal_terms)
      }
      x <- datawizard::data_rename(x, sort_columns, focal_terms)
      x <- datawizard::data_relocate(x, focal_terms, after = 1)
      # we need to remove "groups", else table will be separated again
      x$groups <- NULL
      # remove repeating elements in focal term columns
      for (i in focal_terms) {
        for (j in nrow(x):2) {
          if (x[[i]][j] == x[[i]][j - 1]) x[[i]][j] <- ""
        }
      }
    }
  }

  # clean-up
  x[c("response.level", "group", "facet", "panel")] <- NULL

  # collapse CI?
  x <- .collapse_ci(x, collapse_ci, ci_brackets = dots$ci_brackets)

  rownames(x) <- NULL
  x
}


#' @rdname print
#' @export
format.ggcomparisons <- function(x,
                                 collapse_ci = FALSE,
                                 collapse_p = FALSE,
                                 combine_levels = FALSE,
                                 ...) {
  ci <- attributes(x)$ci_level
  by_factor <- attributes(x)$by_factor
  estimate_name <- attributes(x)$estimate_name
  datagrid <- attributes(x)$datagrid

  out <- insight::standardize_names(x)
  attr(out, "ci") <- ci
  # format confidence intervals
  dots <- list(...)
  if (is.null(dots$ci_brackets)) {
    dots$ci_brackets <- getOption("ggeffects_ci_brackets", c("", ""))
  }
  # zap small values by default
  if (is.null(dots$zap_small)) {
    dots$zap_small <- TRUE
  }
  # set default for collapse_ci
  collapse_ci <- getOption("ggeffects_collapse_ci", collapse_ci)
  # set default for collapse_p
  collapse_p <- getOption("ggeffects_collapse_p", collapse_p)

  out <- do.call(insight::format_table, c(list(out), dots))
  # collapse p?
  out <- .collapse_p(out, collapse_p)
  # collapse CI?
  out <- .collapse_ci(out, collapse_ci, ci_brackets = dots$ci_brackets)
  # mix levels and re-combine?
  out <- .combine_level_pairs(out, combine_levels, datagrid, estimate_name, by_factor)
  out
}


# helper ----------------------------------------------------------------------

# helper to re-arrange column values. Each column no longer represents pairs of
# the same focal term, but the levels of the first comparison of each focal
# term against the second. e.g. instead "education = low-high" and
# "sex = male-female", it will become "first = low-male" and "second = high-female"
.combine_level_pairs <- function(x, combine_levels, datagrid, estimate_name, by_factor = NULL) {
  if (!combine_levels) {
    return(x)
  }
  insight::check_if_installed("datawizard")
  # retrieve columns that represent focal terms, but exclude group-by variable
  focal_terms <- colnames(x)[1:(which(colnames(x) == estimate_name) - 1)]
  focal_terms <- setdiff(focal_terms, by_factor)
  # check if some of the levels contain a hyphen, which is used to separate
  # the levels of the first and second comparison. If so, we need to temporatily
  # replace the hyphen with another character, so that we can split the levels
  needs_preparation <- focal_terms[vapply(datagrid[focal_terms], function(i) {
    any(grepl("-", i, fixed = TRUE))
  }, logical(1))]
  if (length(needs_preparation)) {
    for (i in needs_preparation) {
      unique_values <- as.character(unique(datagrid[[i]]))
      for (j in unique_values) {
        j_new <- gsub("-", "#_#", j, fixed = TRUE)
        x[[i]] <- gsub(j, j_new, x[[i]], fixed = TRUE)
      }
    }
  }
  # separate columns, so we have one column for each level of the focal terms
  out <- datawizard::data_separate(
    x,
    focal_terms,
    guess_columns = "mode",
    separator = "-",
    append = FALSE
  )
  # check if we need to replace special characters back to hyphens
  if (length(needs_preparation)) {
    for (i in needs_preparation) {
      # if yes, we need to find the new columnsm which are no longer named
      # "a" or "b", but "a_1", "a_2" etc.
      split_columns <- grep(paste0(i, "_", "\\d"), colnames(out), value = TRUE)
      for (j in split_columns) {
        # add back hypen
        out[[j]] <- gsub("#_#", "-", out[[j]], fixed = TRUE)
      }
    }
  }
  # unite split columns again. we need to do this for each split-part separately
  split_parts <- unique(gsub("(.*)_(\\d)", "\\2", grep("_\\d$", colnames(out), value = TRUE)))
  for (i in split_parts) {
    # find columns that belong to the same focal term
    unite_columns <- grep(
      paste0("(", paste(focal_terms, collapse = "|"), ")_", i),
      colnames(out),
      value = TRUE
    )
    out <- datawizard::data_relocate(
      datawizard::data_unite(
        out,
        select = unite_columns,
        new_column = paste("Pair", i),
        append = FALSE,
        separator = "-"
      ),
      select = "^Pair",
      regex = TRUE
    )
  }
  # finished...
  out
}


.collapse_ci <- function(x, collapse_ci, ci_brackets) {
  # collapse CI?
  ci_column <- grep("\\d{2}% CI", colnames(x))
  if (collapse_ci && length(ci_column)) {
    # make sure we don't replace with empty string
    if (!any(nzchar(ci_brackets))) {
      ci_brackets <- c("(", ")")
    }
    # remove brackets/parentheses
    x[, ci_column] <- gsub("(\\(|\\)|\\[|\\])", "", x[, ci_column])
    x[, ci_column] <- format(paste0(ci_brackets[1], trimws(x[, ci_column]), ci_brackets[2]), justify = "right")
    # paste CI to predicted values
    x[, ci_column - 1] <- paste0(x[, ci_column - 1], " ", x[, ci_column])
    # reassign column name
    colnames(x)[ci_column - 1] <- paste0(colnames(x)[ci_column - 1], " (", colnames(x)[ci_column], ")")
    # remove CI column
    x[, ci_column] <- NULL
  }
  x
}


.collapse_p <- function(x, collapse_p) {
  # collapse p?
  p_column <- which(colnames(x) == "p")
  if (collapse_p && length(p_column)) {
    # find CI column - we may insert p-values before that column. If we don't
    # have CI columns, e.g. because user also used "collapse_ci = TRUE", we
    # simply put it before estimated values
    ci_column <- grep("\\d{2}% CI", colnames(x))
    if (length(ci_column)) {
      insert_column <- ci_column - 1
    } else {
      insert_column <- p_column - 1
    }
    # make sure "< .001" is correctly handled
    x$p[x$p == "< .001"] <- "0.0001"
    # format p-values, we only want significance stars
    p_values <- format(insight::format_p(as.numeric(x$p), stars = TRUE, stars_only = TRUE))
    # paste po-values to predicted values
    x[, insert_column] <- paste0(x[, insert_column], p_values)
    # remove p column
    x[p_column] <- NULL
  }
  x
}


.nrows_to_print <- function(x, n) {
  # if we have groups, show n rows per group
  .n <- 1

  # remember if we have a factor
  x_is_factor <- identical(attr(x, "x.is.factor"), "1") && is.factor(x$x)

  # we do not simply count rows, but rather the number of combinations
  # when we have facets / groups / response.levels. These are separated
  # with own heading, making the output probably too long. Thus, we
  # decide on how many rows per "subheading" to be printed also based on
  # the number of combinations of groups, facets and response level
  has_groups <- .obj_has_name(x, "group") && length(unique(x$group)) > 1
  has_facets <- .obj_has_name(x, "facet") && length(unique(x$facet)) > 1
  has_panel <- .obj_has_name(x, "panel") && length(unique(x$panel)) > 1
  has_response <- .obj_has_name(x, "response.level") && length(unique(x$response.level)) > 1

  # here comes the top-secret formula that calculates the perfect number of rows
  if (has_groups) {
    .n <- .n_distinct(x$group)
  }
  if (has_facets) {
    .n <- .n * .n_distinct(x$facet)
  }
  if (has_panel) {
    .n <- .n * .n_distinct(x$panel)
  }
  if (has_response) {
    .n <- .n * .n_distinct(x$response.level)
  }

  # make sure that by default not too many rows are printed. The larger ".n" is
  # (i.e. the more subheadings we have, see code above), the fewer rows we want
  # per subheading. For factors, however, we want to show all levels

  if (missing(n) && !x_is_factor) {
    n <- if (.n >= 6) {
      4
    } else if (.n >= 4 && .n < 6) {
      5
    } else if (.n >= 2 && .n < 4) {
      6
    } else {
      8
    }
  } else if (x_is_factor) {
    n <- Inf
  }
  n
}
Any scripts or data that you put into this service are public.
ggeffects documentation built on Sept. 12, 2024, 7:41 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ggeffects
Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

R/format.R
In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

Defines functions .nrows_to_print .collapse_p .collapse_ci .combine_level_pairs format.ggcomparisons format.ggeffects

Documented in format.ggcomparisons format.ggeffects

Try the ggeffects package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ggeffects Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

R/format.R In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

Defines functions .nrows_to_print .collapse_p .collapse_ci .combine_level_pairs format.ggcomparisons format.ggeffects

Documented in format.ggcomparisons format.ggeffects

Try the ggeffects package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ggeffects
Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs

R/format.R
In ggeffects: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs