R/tdcmm_class.R

Defines functions visualize.tdcmm visualize model.tdcmm model is_tdcmm new_tdcmm

Documented in is_tdcmm model new_tdcmm visualize

# Class description ----

#' `tdcmm` class
#'
#' @description The tdcmm class is a specialized type of data frame that is a
#' subclass of [tbl_df][tibble::tbl_df], also known as a "tibble". It is
#' designed to be used with `tidycomm` functions to provide additional
#' information and context to the output tibbles generated by these functions.
#' This subclass specifically augments output based on statistical tests, by
#' including the relevant model object(s) estimated during the analysis, as
#' well as any performed assumption checks.
#'
#' `tdcmm` objects in `tidycomm` are further subclassed with individual classes
#' handling visualization and printing per "output" type.
#'
#' @name tdcmm-class
#' @aliases tdcmm tdcmm-class
#' @keywords internal
NULL

# Constructors ----

#' `tdcmm` output constructor
#'
#' @description
#' Creates a new `tdcmm` class output object.
#'
#' The `tdcmm` class is a subclass of a [`tbl_df`][tibble::tbl_df],
#' also know as a "tibble", used for augmenting output tibbles of `tidycomm`
#' functions with additional information. For output based on statistical tests,
#' the model object(s) estimated and any performed assumption checks.
#'
#' `tdcmm` objects in `tidycomm` are further subclassed with individual classes
#' handling visualization and printing per "output" type.
#'
#' @param x A [tibble][tibble::tibble-package].
#' @param func Function name called that produced this model.
#' @param data The [tibble] that served as input to the function.
#' @param model A list of model object(s) used in preparation of the output.
#'   Defaults to `NULL`. A single model should be wrapped in a list of length
#'   `1`.
#' @param checks A list of assumption check object(s) used in preparation of the
#'   output. Defaults to `NULL`.
#' @param params A named list of parameters originally passed to the call.
#'   Defaults to an empty list.
#' @keywords internal
new_tdcmm <- function(x, func, data, model = NULL, checks = NULL, params = list()) {
  stopifnot(tibble::is_tibble(x))
  stopifnot(func != "")
  stopifnot(tibble::is_tibble(data))
  stopifnot(is.list(model) | is.null(model))
  stopifnot(is.list(checks) | is.null(checks))
  stopifnot(is.list(params))

  structure(
    x,
    class = c("tdcmm", class(x)),
    func = func,
    data = data,
    params = params,
    model = model,
    checks = checks
  )
}

# Test ----

#' @describeIn new_tdcmm Test for class `tdcmm`
is_tdcmm <- function(x) {
  inherits(x, "tdcmm")
}


# Accessors ----

#' Access model(s) used to estimate output
#'
#' Returns model objects used to estimate `tdcmm` output.
#'
#' @param x `tdcmm` output
#' @param ... other arguments
#'
#' @returns A model object or a list of model objects
#'
#' @export
#' @keywords internal
model <- function(x, ...) {
  UseMethod("model")
}

#' @export
model.tdcmm <- function(x, ...) {
  model <- attr(x, "model")
  if (is.null(model)) {
    x_string <- deparse(substitute(x))
    x_string <- ifelse(x_string == ".",
                       "The provided object",
                       glue("'{x_string}'"))
    warning(glue("{x_string} does not contain any model."),
            call. = FALSE)
  }
  if (length(model) == 1) model[[1]] else model
}

#' Visualize tidycomm output
#'
#' Returns [ggplot2] visualization appropriate to respective `tdcmm` model
#' (see list below). Returns `NULL` (and a warning) if no visualization has
#' been implemented for the particular model.
#'
#' - [describe()]: horizontal box plot depicting a box from Q25 to Q75, a thick
#' line for Mdn, and two whiskers to Min/Max respectively;
#' no additional arguments
#' - [describe_cat()]: horizontal bar plot depicting number of occurrences;
#' no additional arguments
#' - [tab_frequencies()]: either a histogram (if 1 variable is given) or
#' multiple histograms wrapped, 5+ variables issue a warning about readability;
#' no additional arguments
#' - [tab_percentiles()]: quantile plot
#' - [crosstab()]: horizontal stacked bar plot, either absolute or relative
#' (depending on the `percentages` argument in [crosstab()])
#' - [t_test()]: plot with points and appended 95% confidence intervals;
#' no additional arguments
#' - [unianova()]: plot with points and appended 95% confidence intervals;
#' no additional arguments
#' - [correlate()]: plot as scatter; for more than 2 variables, a correlogram
#' is plotted (just like for [to_correlation_matrix()]); use the `which`
#' parameter to select how points are visualized:
#'   - "jitter" adds a bit of random noise to each point to better reflect
#'   categorical values
#'   - "alpha" depicts points slightly transparent so that multiple points in
#'   the same position are more easily visible
#' - [correlate()]: for partial correlation, a scatter plot with some jitter is
#' plotted using the residuals between the control variable and (a) the
#' dependent as well as (b) the independent variable; no additional arguments
#' - [to_correlation_matrix()]: plot as correlogram building on
#' [GGally::ggpairs()] with jittered scatter plots in lower half, histograms as
#' diagonals, and correlation coefficients with 95% confidence intervals in
#' upper half
#' - [regress()]: plot regression results as scatter (without jitter) and an
#' additional depicted model line with including its 95% confidence intervals;
#' alternatively, visual check inspection helpers can be plotted through the
#' `which` parameter which can be set to yield one of the following:
#'   - "jitter" (default): plots a scatter plot with jitter per independent
#'   variable and adds a linear regression line with 95% confidence intervals
#'   to it; keep in mind that if you have, say, three independent variables,
#'   this visualization shows you three plots with one linear regression for
#'   each, so that the three models (i.e., the three colored lines) reflect
#'   only the particular combination of one independent and the dependent
#'   variable
#'   - "alpha" (default): almost like `jitter` but instead of jitter it plots
#'   scatter plots with some transparency so that multiple data points in the
#'   same position appear as darker
#'   - "correlogram": like [to_correlation_matrix()], a correlogram between
#'   independent variables are produced to help determine independent errors
#'   and multicollinearity
#'   - "residualsfitted" or "resfit": a residuals-versus-fitted plot is useful
#'   to determine distributions; for a normal distribution the colored line
#'   should ideally fit on the dashed line
#'   - "pp": a (normal) probability-probability plot helps checking for
#'   multicollinearity whereby the data (here mostly the center data from
#'   within the IQR) should ideally align with the dashed line
#'   - "qq": a (normal) quantile-quantile plot helps checking for
#'   multicollinearity but focuses more on outliers; the data should align with
#'   the dashed line
#'   - "scalelocation" or "scaloc": a scale-location (sometimes also called a
#'   spread-location) plot checks whether residuals are spread equally to help
#'   check for homoscedasticity; ideally, the colored line is horizontal and
#'   the data spreads more or less randomly
#'   - "residualsleverage" or "reslev": a residuals-versus-leverage plot allows
#'   to check for influential outliers affecting the final model more than the
#'   rest of the data; ideally, no data is far off compared to the bulk of the
#'   the data and thus shows high Cook's distance to the rest; the colored line
#'   helps to identify the bulk of the data and the five most-distant outliers
#'   are labelled with their case number (i.e., the row number in the dataset);
#'   note that 5 is arbitrary here, meaning that they might not be too far off
#'   or there might be more than 5 noteworthy outliers in this model; interpret
#'   with care
#'
#' Note that the returned [ggplot2] object can be modified easily by appending
#' or overwriting individual geom's or scale's. See the examples below and the
#' documentation of [ggplot2].
#'
#' @param x `tdcmm` output
#' @param ... other arguments
#' @param .design a list to style the visualization; by default and good practice
#' use one of the ready-made design functions' returns (e.g., `design_lmu()`,
#' `design_grey()`); you could, however, also provide your own list here which
#' has to be a list with 9 keys: `main_color_1`, a vector of 12 `main_colors`, a
#' corresponding `main_contrast_1` (the color of text to write on top of the
#' main color) and a corresponding `main_contrasts`, the `main_size` (for
#' lines), a `comparison_linetype`, `comparison_color`, and `comparison_size`
#' for all lines that act as comparative lines, and a [ggplot2] `theme`
#'
#' @return A [ggplot2] object
#'
#' @family visualize
#'
#' @examples
#' \dontrun{
#' WoJ %>%
#'   describe() %>%
#'   visualize()
#'
#' fbposts %>%
#'   describe_cat() %>%
#'   visualize()
#'
#' WoJ %>%
#'   tab_frequencies(trust_parliament) %>%
#'   visualize()
#' fbposts %>%
#'   tab_frequencies(pop_elite, pop_people, pop_othering) %>%
#'   visualize()
#'
#' WoJ %>%
#'   crosstab(reach, employment) %>%
#'   visualize()
#'
#' fbposts %>%
#'   crosstab(coder_id, type, percentages = TRUE) %>%
#'   visualize()
#'
#' WoJ %>%
#'   t_test(temp_contract, autonomy_selection, autonomy_emphasis) %>%
#'   visualize()
#'
#' WoJ %>%
#'   unianova(country, autonomy_selection, autonomy_emphasis) %>%
#'   visualize()
#'
#' fbposts %>%
#'   correlate(pop_elite, pop_people) %>%
#'   visualize()
#'
#' fbposts %>%
#'   correlate(pop_elite, pop_people, with = pop_othering) %>%
#'   visualize()
#'
#' fbposts %>%
#'   correlate(pop_elite, pop_people) %>%
#'   visualize("alpha")
#'
#' WoJ %>%
#'   correlate(autonomy_selection, ethics_1, partial = work_experience) %>%
#'   visualize()
#'
#' WoJ %>%
#'   correlate(ethics_1, ethics_2, ethics_3, ethics_4) %>%
#'   to_correlation_matrix() %>%
#'   visualize()
#'
#' r <- WoJ %>% regress(autonomy_selection, temp_contract, work_experience, ethics_2)
#' r %>% visualize() # same as r %>% visualize("jitter")
#' r %>% visualize("alpha")
#' r %>% visualize("correlogram")
#' r %>% visualize("resfit")
#' r %>% visualize("pp")
#' r %>% visualize("qq")
#' r %>% visualize("scaloc")
#' r %>% visualize("reslev")
#'
#' # To overwrite a certain scale or geom, just append as you would with ggplot2
#' fbposts %>%
#'   describe_cat() %>%
#'   visualize() +
#'     ggplot2::scale_fill_grey()
#'}
#'
#' @export
visualize <- function(x, ..., .design = design_lmu()) {
  UseMethod("visualize")
}

#' @export
visualize.tdcmm <- function(x, ..., .design = design_lmu()) {
  return(warn_about_missing_visualization(x))
}
joon-e/tidycomm documentation built on May 11, 2024, 9:07 a.m.