R/effects.R

Defines functions get_betas get_stars get_ci get_gini get_etasq get_npmi get_correlation adjust_p tidy_lm_levels .effect_correlations effect_metrics_items_cor_items effect_metrics_items_cor effect_metrics_items_grouped_items effect_metrics_items_grouped effect_metrics_items effect_metrics_one_cor effect_metrics_one_grouped effect_metrics_one effect_counts_items_cor_items effect_counts_items_cor effect_counts_items_grouped_items effect_counts_items_grouped effect_counts_items effect_counts_one_cor effect_counts_one_grouped effect_counts_one effect_metrics effect_counts

Documented in adjust_p .effect_correlations effect_counts effect_counts_items effect_counts_items_cor effect_counts_items_cor_items effect_counts_items_grouped effect_counts_items_grouped_items effect_counts_one effect_counts_one_cor effect_counts_one_grouped effect_metrics effect_metrics_items effect_metrics_items_cor effect_metrics_items_cor_items effect_metrics_items_grouped effect_metrics_items_grouped_items effect_metrics_one effect_metrics_one_cor effect_metrics_one_grouped get_betas get_ci get_correlation get_etasq get_gini get_npmi get_stars tidy_lm_levels

#' Output effect sizes and test statistics for count data
#'
#' @description
#' The type of effect size depends on the number of selected columns:
#' - One categorical column: see \link{effect_counts_one}
#' - Multiple categorical columns: see \link{effect_counts_items}
#'
#' Cross tabulations:
#'
#' - One categorical column and one grouping column: see \link{effect_counts_one_grouped}
#' - Multiple categorical columns and one grouping column: see \link{effect_counts_items_grouped} (not yet implemented)
#' - Multiple categorical columns and multiple grouping columns: \link{effect_counts_items_grouped_items} (not yet implemented)
#'
#' By default, if you provide two column selections, the second column is treated as categorical.
#' Setting the metric-parameter to TRUE will call the appropriate functions for correlation analysis:
#'
#' - One categorical column and one metric column: see \link{effect_counts_one_cor} (not yet implemented)
#' - Multiple categorical columns and one metric column: see \link{effect_counts_items_cor} (not yet implemented)
#' - Multiple categorical columns and multiple metric columns:\link{effect_counts_items_cor_items} (not yet implemented)
#'
#' `r lifecycle::badge("experimental")`
#'
#' @param data A data frame.
#' @param cols A tidy column selection,
#'             e.g. a single column (without quotes)
#'             or multiple columns selected by methods such as starts_with().
#' @param cross Optional, a grouping column. The column name without quotes.
#' @param metric When crossing variables, the cross column parameter can contain categorical or metric values.
#'            By default, the cross column selection is treated as categorical data.
#'            Set metric to TRUE, to treat it as metric and calculate correlations.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Other parameters passed to the appropriate effect function.
#' @return A volker tibble.
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_counts(data, sd_gender, adopter)
#'
#' @export
effect_counts <- function(data, cols, cross = NULL, metric = FALSE, clean = TRUE, ...) {
  # Check
  check_is_dataframe(data)

  # 2. Clean
  if (clean) {
    data <- data_clean(data, clean)
  }

  # Find columns
  cols_eval <- tidyselect::eval_select(expr = enquo(cols), data = data)
  cross_eval <- tidyselect::eval_select(expr = enquo(cross), data = data)

  is_items <- length(cols_eval) > 1
  is_grouped <- length(cross_eval) == 1
  is_multi <- length(cross_eval) > 1
  is_metric <- metric != FALSE

  # Single variables
  if (!is_items && !is_grouped && !is_multi) {
    effect_counts_one(data, {{ cols }}, ...)
  }
  else if (!is_items && is_grouped && !is_metric) {
    effect_counts_one_grouped(data, {{ cols }}, {{ cross }}, ...)
  }
  else if (!is_items && is_grouped && is_metric) {
    effect_counts_one_cor(data, {{ cols }}, {{ cross }}, ...)
  }

  # Items
  else if (is_items && !is_grouped && !is_multi) {
    effect_counts_items(data, {{ cols }} , ...)
  }
  else if (is_items && is_grouped && !is_metric) {
    effect_counts_items_grouped(data, {{ cols }}, {{ cross }},  ...)
  }
  else if (is_items && is_grouped && is_metric) {
    effect_counts_items_cor(data, {{ cols }}, {{ cross }},  ...)
  }
  else if (is_items && !is_grouped && is_multi && !is_metric) {
    effect_counts_items_grouped_items(data, {{ cols }}, {{ cross }},  ...)
  }
  # Not found
  else {
    stop("Check your parameters: the column selection is not yet supported by volker functions.")
  }
}

#' Output effect sizes and test statistics for metric data
#'
#' @description
#' The calculations depend on the number of selected columns:
#'
#' - One metric column: see \link{effect_metrics_one}
#' - Multiple metric columns: see \link{effect_metrics_items}
#'
#' Group comparisons:
#'
#' - One metric column and one grouping column: see \link{effect_metrics_one_grouped}
#' - Multiple metric columns and one grouping column: see \link{effect_metrics_items_grouped}
#' - Multiple metric columns and multiple grouping columns: not yet implemented
#'
#' By default, if you provide two column selections, the second column is treated as categorical.
#' Setting the metric-parameter to TRUE will call the appropriate functions for correlation analysis:
#'
#' - Two metric columns: see \link{effect_metrics_one_cor}
#' - Multiple metric columns and one metric column: see \link{effect_metrics_items_cor}
#' - Two metric column selections: see \link{effect_metrics_items_cor_items}
#'
#' `r lifecycle::badge("experimental")`
#'
#' @param data A data frame.
#' @param cols A tidy column selection,
#'             e.g. a single column (without quotes)
#'             or multiple columns selected by methods such as starts_with().
#' @param cross Optional, a grouping column (without quotes).
#' @param metric When crossing variables, the cross column parameter can contain categorical or metric values.
#'            By default, the cross column selection is treated as categorical data.
#'            Set metric to TRUE, to treat it as metric and calculate correlations.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Other parameters passed to the appropriate effect function.
#' @return A volker tibble.
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics(data, sd_age, sd_gender)
#'
#' @export
effect_metrics <- function(data, cols, cross = NULL, metric = FALSE, clean = TRUE, ...) {
  # Check
  check_is_dataframe(data)

  # 2. Clean
  if (clean) {
    data <- data_clean(data, clean)
  }

  # Find columns
  cols_eval <- tidyselect::eval_select(expr = enquo(cols), data = data)
  cross_eval <- tidyselect::eval_select(expr = enquo(cross), data = data)
  is_items <- length(cols_eval) > 1
  is_grouped <- length(cross_eval)== 1
  is_multi <- length(cross_eval) > 1
  is_metric <- metric != FALSE

  # Single variables
  if (!is_items && !is_grouped && !is_multi) {
    effect_metrics_one(data, {{ cols }}, ...)
  }
  else if (!is_items && is_grouped && !is_metric) {
    effect_metrics_one_grouped(data, {{ cols }}, {{ cross }}, ...)
  }
  else if (!is_items && is_grouped && is_metric) {
    effect_metrics_one_cor(data, {{ cols }}, {{ cross }}, ...)
  }

  # Items
  else if (is_items && !is_grouped && !is_multi) {
    effect_metrics_items(data, {{ cols }} , ...)
  }
  else if (is_items && is_grouped && !is_metric) {
    effect_metrics_items_grouped(data, {{ cols }}, {{ cross }},  ...)
  }
  else if (is_items && is_grouped && is_metric) {
    effect_metrics_items_cor(data, {{ cols }}, {{ cross }},  ...)
  }
  else if (is_items && !is_grouped && is_multi && is_metric) {
    effect_metrics_items_cor_items(data, {{ cols }}, {{ cross }},  ...)
  }
  # Not found
  else {
    stop("Check your parameters: the column selection is not yet supported by volker functions.")
  }

}

#' Test homogeneity of category shares
#'
#' Performs a goodness-of-fit test and calculates the Gini coefficient.
#' The goodness-of-fit-test is calculated using \code{stats::\link[stats:chisq.test]{chisq.test}}.
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param col The column holding factor values.
#' @param clean Prepare data by \link{data_clean}
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble with the following statistical measures:
#'  - **Gini coefficient**: Gini coefficient, measuring inequality.
#'  - **n**: Number of cases the calculation is based on.
#'  - **Chi-squared**: Chi-Squared test statistic.
#'  - **p**: p-value for the statistical test.
#'  - **stars**: Significance stars based on p-value (*, **, ***).
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' data |>
#'   filter(sd_gender != "diverse") |>
#'   effect_counts_one(sd_gender)
#'
#' @export
#' @importFrom rlang .data
effect_counts_one <- function(data, col, clean = TRUE, ...) {

  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ col }}, cols.categorical = {{ col }}, clean = clean)

  # 2. Chi-squared test
  counts <- data %>%
    dplyr::count({{ col }}) %>%
    dplyr::pull(.data$n)

  fit <- stats::chisq.test(counts)

  # 3. Result
  result <- list(
    "Gini coefficient" = sprintf("%.2f", get_gini(counts)),
    "n" = as.character(sum(counts)),
    "Chi-squared" = sprintf("%.2f", round(fit$statistic, 2)),
    "p" = sprintf("%.3f", round(fit$p.value, 3)),
    "stars" = get_stars(fit$p.value)
  ) %>%
  tibble::enframe(
    name = "Statistic",
    value = "Value"
    )

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result, caption = fit$method)
}


#' Output test statistics and effect size for contingency tables
#'
#' Chi squared is calculated using \code{stats::\link[stats:chisq.test]{chisq.test}}.
#' If any cell contains less than 5 observations, the exact-parameter is set.
#'
#' Phi is derived from the Chi squared value by \code{sqrt(fit$statistic / n)}.
#' Cramer's V is derived by \code{sqrt(phi / (min(dim(contingency)[1], dim(contingency)[2]) - 1))}.
#' Cramer's V is set to 1.0 for diagonal contingency matrices, indicating perfect association.
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param col The column holding factor values.
#' @param cross The column holding groups to compare.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker list with two volker tibbles.
#'   The first tibble contains npmi values for each combinations:
#'   - n Number the combination occurs.
#'   - p_x Marginal share of the first category.
#'   - p_y Marginal share of the second category.
#'   - p_xy Share of the combination based on all combinations.
#'   - ratio The ratio of p_xy to (p_x * p_y).
#'   - pmi Pointwise Mutual information, derived from the ratio.
#'   - npmi Normalized Pointwise Mutual Information, derived from the pmi.
#'
#'   The second tibble contains effect sizes based on the cross table:
#'
#'  - Cramer's V: Effect size measuring the association between the two variables.
#'  - n: Number of cases the calculation is based on.
#'  - Chi-squared: Chi-Squared test statistic.
#'                 If expected values are below 5 in at least one cell, an exact Fisher test is conducted.
#'  - df: Degrees of freedo of the chi-squared test. Empty for the exact Fisher test.
#'  - p: p-value of the chi-squared test.
#'  - stars: Significance stars based on p-value (*, **, ***).
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_counts_one_grouped(data, adopter, sd_gender)
#'
#' @importFrom rlang .data
#' @export
effect_counts_one_grouped <- function(data, col, cross, clean = TRUE, ...) {

  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ col }}, {{ cross }}, cols.categorical = c({{ col }}, {{ cross }}), clean = clean)

  # 2. NPMI
  result_items <- get_npmi(data, {{ col }}, {{ cross }}, ...)
  result_items$p_y_if_x <- NULL
  result_items$p_x_if_y <- NULL


  # 3. Cramer's V
  result_stats <- get_correlation(
    dplyr::pull(data, {{ col }}),
    dplyr::pull(data, {{ cross }}),
    method = "cramer"
  )

  result_stats$p = sprintf("%.3f", round(result_stats$p, 3))
  result_stats$stars = get_stars(result_stats$p)

  result_stats <- tibble::enframe(
    result_stats,
    name = "Statistic",
    value = "Value"
  )

  # 4. Assemble results
  result <- c(
    list(.to_vlkr_tab(result_items, digits=2)),
    list(.to_vlkr_tab(result_stats, digits=2))
  )

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_list(result)

}

#' Output test statistics and effect size from a logistic regression of one metric predictor
#'
#' \strong{Not yet implemented. The future will come.}
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param col The column holding factor values.
#' @param cross The column holding metric values.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble.
#' @importFrom rlang .data
effect_counts_one_cor <- function(data, col, cross, clean = TRUE, labels = TRUE, ...) {
  warning("Not implemented yet. The future will come.", noBreaks. = TRUE)
}


#' Test homogeneity of category shares for multiple items
#'
#' Performs a goodness-of-fit test and calculates the Gini coefficient for each item.
#' The goodness-of-fit-test is calculated using  \code{stats::\link[stats:chisq.test]{chisq.test}}.
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble with the following statistical measures:
#'
#'  - **Gini coefficient**: Gini coefficient, measuring inequality.
#'  - **n**: Number of cases the calculation is based on.
#'  - **Chi-squared**: Chi-Squared test statistic.
#'  - **p**: p-value for the statistical test.
#'  - **stars**: Significance stars based on p-value (*, **, ***).
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_counts_items(data, starts_with("cg_adoption_adv"))
#'
#' @export
#' @importFrom rlang .data
effect_counts_items <- function(data, cols, adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ cols }}, cols.categorical = {{ cols }}, clean = clean)

  # 2. Count
  counts <- data %>%
    labs_clear({{ cols }}) %>%
    tidyr::pivot_longer(
      {{ cols }},
      names_to = "item",
      values_to = ".value",
      values_drop_na = TRUE
    ) %>%
    dplyr::count(.data$item, .data$.value) %>%
    dplyr::group_by(.data$item) %>%
    dplyr::reframe(n = list(.data$n)) %>%
    tibble::deframe()

  # 3. Chi-square goodness-of-fit test for each item
  result <- purrr::imap(
    counts,
    \(.x, .y) {
    chi <- stats::chisq.test(.x)

      list(
        "item" = .y,
        "Gini coefficient" = get_gini(.x),
        "n" = sum(.x),
        "Chi-squared" = chi$statistic,
        "p" = chi$p.value
      )
   }) %>%
    dplyr::bind_rows()

  # Adjust
  result <- adjust_p(result, "p", method = adjust)

  # Round
  result <- .attr_setcolumn(
    result, tidyselect::all_of(c("Gini coefficient", "Chi-squared")),
    "round", 2
  )

  # 3. Get variable caption from the attributes
  if (labels) {
    result <- labs_replace(result, "item", codebook(data, {{ cols }}), col_from="item_name", col_to="item_label")
    prefix <- get_prefix(result$item)
    result <- dplyr::mutate(result, item = trim_prefix(.data$item, prefix))
  }

  # 4. Rename first column
  if (prefix != "") {
    colnames(result)[1] <- prefix
  } else {
    result <- dplyr::rename(result, Item = tidyselect::all_of("item"))
  }

  # 5. Result
  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result)
}


#' Effect size and test for comparing multiple variables by a grouping variable
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures and grouping variable.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross The column holding groups to compare.
#' @param method The output metrics, currently only `cramer` is supported.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble.
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_counts_items_grouped(
#'   data, starts_with("cg_adoption_adv"),  sd_gender
#' )
#'
#' @export
#' @importFrom rlang .data
effect_counts_items_grouped <- function(data, cols, cross, method = "cramer", adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ cols }}, {{ cross }}, cols.categorical = c({{ cols }}, {{ cross }}), clean = clean)

  check_is_param(method, c("cramer", "npmi"))


  # 2. Calculate correlations
  result <- .effect_correlations(data, {{ cols }}, {{ cross }}, method = method, adjust = adjust, labels = labels)

  # 3. Labels
  prefix1 <- get_prefix(result$item1)

  if (labels) {
    prefix2 <- get_title(data, {{ cross }})
  } else {
    prefix2 <- rlang::as_string(rlang::ensym(cross))
  }

  result <- result %>%
    dplyr::mutate(item1 = trim_prefix(.data$item1, prefix1)) %>%
    dplyr::select(-tidyselect::all_of("item2"))

  # Rename first column
  if (prefix1 != "") {
    colnames(result)[1] <- paste0(prefix1, ": Correlation with ", prefix2)
  }

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result, digits = 2)
}

#' Effect size and test for comparing multiple variables by multiple grouping variables
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures and grouping variable.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross The columns holding groups to compare.
#' @param method The output metrics: cramer = Cramer's V, pmi = Pointwise Mutual Information, npmi = Normalized PMI.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble.
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_counts(
#'   data,
#'   starts_with("cg_adoption_adv"),
#'   starts_with("use_")
#')
#'
#' @export
#' @importFrom rlang .data
effect_counts_items_grouped_items <- function(data, cols, cross, method = "cramer", adjust = "fdr", category = NULL, labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ cols }}, {{ cross }}, cols.categorical = c({{ cols }}, {{ cross }}), clean = clean)
  check_is_param(method, c("cramer", "npmi"))

  # 2. Calculate correlations
  result <- .effect_correlations(data, {{ cols }}, {{ cross }}, method = method, adjust = adjust, category = category, labels = labels)

  # 3. Labels
  prefix1 <- get_prefix(result$item1)
  prefix2 <- get_prefix(result$item2)

  result <- result %>%
    dplyr::mutate(item1 = trim_prefix(.data$item1, prefix1)) |>
    dplyr::mutate(item2 = trim_prefix(.data$item2, prefix2))

  if ((prefix1 == prefix2) && (prefix1 != "")) {
    prefix1 <- paste0("Item 1: ", trim_label(prefix1))
    prefix2 <- paste0("Item 2: ", trim_label(prefix2))
  }

  # Rename first column
  if (prefix1 != "") {
    colnames(result)[1] <- prefix1
  }

  # Rename second column
  if (prefix2 != "") {
    colnames(result)[2] <- prefix2
  }

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result, digits = 2)
}

#' Correlate the values in multiple items with one metric column and output effect sizes and tests
#'
#' \strong{Not yet implemented. The future will come.}
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross The metric column.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble.
#' @importFrom rlang .data
effect_counts_items_cor <- function(data, cols, cross, clean = TRUE, ...) {
  warning("Not implemented yet. The future will come.", noBreaks. = TRUE)
}


#' Correlate the values in multiple items with multiple metric columns and output effect sizes and tests
#'
#' \strong{Not yet implemented. The future will come.}
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross The metric target columns.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble.
#' @importFrom rlang .data
effect_counts_items_cor_items <- function(data, cols, cross, clean = TRUE, ...) {
  warning("Not implemented yet. The future will come.", noBreaks. = TRUE)
}


#' Test whether a distribution is normal
#'
#' The test is calculated using \code{stats::\link[stats:shapiro.test]{shapiro.test}}.
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param col The column holding metric values.
#' @param clean Prepare data by \link{data_clean}.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_metrics}.
#' @return A volker list object with the following statistical measures:
#'
#'  - **skewness**: Measure of asymmetry in the distribution. A value of 0 indicates perfect symmetry.
#'  - **kurtosis**: Measure of the "tailedness" of the distribution.
#'  - **W**: W-statistic from the Shapiro-Wilk normality test.
#'  - **p**: p-value for the statistical test.
#'  - **stars**: Significance stars based on p-value (*, **, ***).
#'  - **normality**: Interpretation of normality based on Shapiro-Wilk test.
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics_one(data, sd_age)
#'
#' @export
#' @importFrom rlang .data
effect_metrics_one <- function(data, col, labels = TRUE, clean = TRUE, ... ) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ col }}, cols.numeric = {{ col }}, clean = clean)

  # 2. Normality test
  stats <- dplyr::select(data, av = {{ col }})
  stats_shapiro <- stats::shapiro.test(stats$av)

  stats_shapiro <- list(
    "W" = sprintf("%.2f", round(stats_shapiro$statistic, 2)),
    "p" = sprintf("%.3f", round(stats_shapiro$p.value, 3)),
    "stars" = get_stars(stats_shapiro$p.value),
    "normality" = ifelse(stats_shapiro$p.value > 0.05, "normal", "not normal")
  ) |>
    tibble::enframe(
      name = "Shapiro-Wilk normality test",
      value = "Value"
    )

  # 3. Skewness and kurtosis
  stats_skew <- psych::describe(stats$av)
  stats <- list(
    "skewness" = sprintf("%.2f", round(stats_skew$skew, 2)),
    "kurtosis" = sprintf("%.2f", round(stats_skew$kurtosis, 2))
  ) |>
    tibble::enframe(
      name = "Metric",
      value = "Value"
    )

  # 4. Get item label from the attributes
  if (labels) {
    label <- get_title(data, {{ col }})
    stats <- dplyr::rename(stats, {{ label }} := "Metric")
  }

  # 5. Results
  result <- c(
    list(.to_vlkr_tab(stats, digits=2)),
    list(.to_vlkr_tab(stats_shapiro, digits=2))
  )

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_list(result)
}

#' Output a regression table with estimates and macro statistics
#'
#' The regression output comes from \code{stats::\link[stats:lm]{lm}}.
#' T-test is performed using \code{stats::\link[stats:t.test]{t.test}}.
#' Normality check is performed using
#' \code{stats::\link[stats:shapiro.test]{shapiro.test}}.
#' Equality of variances across groups is assessed using \code{car::\link[car:leveneTest]{leveneTest}}.
#' Cohen's d is calculated using \code{effectsize::\link[effectsize:cohens_d]{cohens_d}}.
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param col The column holding metric values.
#' @param cross The column holding groups to compare.
#' @param method A character vector of methods, e.g. c("t.test","lm").
#'              Supported methods are t.test (only valid if the cross column contains two levels)
#'              and lm (regression results).
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_metrics}.
#' @return A volker list object containing volker tables with the requested statistics.
#'
#' Regression table:
#' - **estimate**: Regression coefficient (unstandardized).
#' - **ci low / ci high**: lower and upper bound of the 95% confidence interval.
#' - **se**: Standard error of the estimate.
#' - **t**: t-statistic.
#' - **p**: p-value for the statistical test.
#' - **stars**: Significance stars based on p-value (*, **, ***).
#'
#' Macro statistics:
#' - **Adjusted R-squared**: Adjusted coefficient of determination.
#' - **F**: F-statistic for the overall significance of the model.
#' - **df**: Degrees of freedom for the model.
#' - **residual df**: Residual degrees of freedom.
#' - **p**: p-value for the statistical test.
#' - **stars**: Significance stars based on p-value (*, **, ***).
#'
#' If `method = t.test`:
#' ### Shapiro-Wilk test (normality check):
#' - **W**: W-statistic from the Shapiro-Wilk normality test.
#' - **p**: p-value for the test.
#' - **normality**: Interpretation of the Shapiro-Wilk test.
#'
#' ### Levene test (equality of variances):
#' - **F**: F-statistic from the Levene test for equality of variances between groups.
#' - **p**: p-value for Levene's test.
#' - **variances**: Interpretation of the Levene test.
#'
#' ### Cohen's d (effect size):
#' - **d**: Standardized mean difference between the two groups.
#' - **ci low / ci high**: Lower and upper bounds of the 95% confidence interval.
#'
#' ### t-test
#' - **method**: Type of t-test performed (e.g., "Two Sample t-test").
#' - **difference**: Observed difference between group means.
#' - **ci low / ci high**: Lower and upper bounds of the 95% confidence interval.
#' - **se**: Estimated standard error of the difference.
#' - **df**: Degrees of freedom used in the t-test.
#' - **t**: t-statistic.
#' - **p**: p-value for the t-test.
#' - **stars**: Significance stars based on p-value (`*`, `**`, `***`).
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics_one_grouped(data, sd_age, sd_gender)
#'
#' @export
#' @importFrom rlang .data
effect_metrics_one_grouped <- function(data, col, cross, method = "lm", adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ col }}, {{ cross }}, cols.categorical = {{ cross }}, cols.numeric = {{ col }}, clean = clean)

  check_is_param(method, c("lm", "t.test"))

  # 2. Calculate
  result <- list()
  lm_data <- dplyr::select(data, av = {{ col }}, uv = {{ cross }})

  # t.test
  if ("t.test" %in% method) {

    if (length(unique(lm_data$uv)) != 2) {
      stop("Check your parameters: the t.test method is only allowed for comparing two groups.")
    }

    stats_shapiro <- stats::shapiro.test(lm_data$av)
    stats_levene <- car::leveneTest(lm_data$av, group = lm_data$uv)
    stats_varequal = stats_levene[["Pr(>F)"]][1] > 0.05
    stats_cohen <- effectsize::cohens_d(lm_data$av, lm_data$uv, pooled_sd = stats_varequal, paired=FALSE)
    stats_t <- stats::t.test(lm_data$av ~ lm_data$uv, var.equal = stats_varequal)

    stats_t <- list(
      "Shapiro-Wilk normality test" = list(
        "W" = sprintf("%.2f", stats_shapiro$statistic),
        "p" = sprintf("%.3f", stats_shapiro$p.value),
        "stars" = get_stars(stats_shapiro$p.value),
        "normality" = ifelse(stats_shapiro$p.value > 0.05, "normal", "not normal")
      ),
      "Levene test" = list(
        "F" = sprintf("%.2f", stats_levene[["F value"]][1]),
        "p" = sprintf("%.3f", stats_levene[["Pr(>F)"]][1]),
        "stars" = get_stars(stats_levene[["Pr(>F)"]][1]),
        "variances" = ifelse(stats_varequal, "equal", "not equal")
      ),
      "Cohen's d" = list(
        "d" = sprintf("%.1f",round(stats_cohen$Cohens_d, 1)),
        "ci low" = sprintf("%.1f",round(stats_cohen$CI_low, 1)),
        "ci high" = sprintf("%.1f",round(stats_cohen$CI_high, 1))
      ),
      "t-Test" = list(
        "method" = stats_t$method,
        "difference" = sprintf("%.2f", round(stats_t$estimate[1] - stats_t$estimate[2], 2)),
        "ci low" = sprintf("%.2f", round(stats_t$conf.int[1], 2)),
        "ci high" = sprintf("%.2f",round(stats_t$conf.int[2], 2)),
        "se" = sprintf("%.2f",round(stats_t$stderr,2)),
        "df" = round(stats_t$parameter,2),
        "t" = sprintf("%.2f",round(stats_t$statistic,2)),
        "p" = sprintf("%.3f",round(stats_t$p.value,3)),
        "stars" = get_stars(stats_t$p.value)
      )
    ) %>%
      tibble::enframe(
        name = "Test",
        value = "Results"
        )

    stats_t <- stats_t |>
      tidyr::unnest_longer(
        tidyselect::all_of("Results"),
        indices_to="Statistic",
        values_to="Value",
        transform=as.character
      ) |>
      dplyr::select("Test","Statistic","Value")

    result <- c(result, list(.to_vlkr_tab(stats_t)))
  }

  # Regression model
  # TODO: should we omit the intercept from adjusting p values?
  else if ("lm" %in% method) {
    fit <- stats::lm(av ~ uv, data = lm_data)

    # Regression parameters
    lm_params <- tidy_lm_levels(fit)

    lm_params <- lm_params |>
      dplyr::mutate(
        Term = .data$term,
        estimate = .data$estimate,
        "ci low" = .data$conf.low,
        "ci high" = .data$conf.high,
        "se" = .data$std.error,
        t = .data$statistic,
        p = .data$p.value
      )

    # Adjust and round
    lm_params <- lm_params %>%
      adjust_p("p", method = adjust) %>%
      .attr_setcolumn(
        tidyselect::all_of(c("estimate", "ci low", "ci high", "se", "t")),
        "round", 2
      )

    lm_params <- lm_params %>%
      dplyr::select(tidyselect::all_of(c(
        "Term","estimate","ci low","ci high","se","t","p","stars"
      )))

    # Regression model statistics
    lm_model <- broom::glance(fit) |>
      dplyr::mutate(stars = get_stars(.data$p.value)) |>
      data_round("p.value", 3) %>%
      dplyr::mutate(dplyr::across(tidyselect::any_of(c("n", "df", "df.residual")), function(x) as.character(x))) |>
      data_round(tidyselect::where(is.numeric), 2) %>%
      tidyr::pivot_longer(
        tidyselect::everything(),
        names_to="Statistic",
        values_to="Value"
      ) |>
      labs_replace("Statistic", tibble::tibble(
        value_name=c(
          "adj.r.squared", "statistic", "df", "df.residual",
          "p.value", "stars"
        ),
        value_label=c(
          "Adjusted R-squared","F", "df", "residual df",
          "p", "stars"
        )
      ), na.missing = TRUE) |>
      stats::na.omit() |>
      dplyr::arrange(.data$Statistic)


    result <- c(
      result,
      list(.to_vlkr_tab(lm_params)),
      list(.to_vlkr_tab(lm_model))
    )
  }

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_list(result)
}


#' Test whether the correlation is different from zero
#'
#' The correlation is calculated using \code{stats::\link[stats:cor.test]{cor.test}}.
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param col The column holding metric values.
#' @param cross The column holding metric values to correlate.
#' @param method The output metrics, TRUE or pearson = Pearson's R, spearman = Spearman's rho.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_metrics}.
#' @return A volker table containing the requested statistics.
#'
#' If `method = "pearson"`:
#' - **R-squared**: Coefficient of determination.
#' - **n**: Number of cases the calculation is based on.
#' - **Pearson's r**: Correlation coefficient.
#' - **ci low / ci high**: Lower and upper bounds of the 95% confidence interval.
#' - **df**: Degrees of freedom.
#' - **t**: t-statistic.
#' - **p**: p-value for the statistical test, indicating whether the correlation differs from zero.
#' - **stars**: Significance stars based on the p-value (*, **, ***).
#'
#' If `method = "spearman"`:
#' - **Spearman's rho** is displayed instead of Pearson's r.
#' - **S-statistic** is used instead of the t-statistic.
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics_one_cor(data, sd_age, use_private, metric = TRUE)
#'
#' @export
#' @importFrom rlang .data
effect_metrics_one_cor <- function(data, col, cross, method = "pearson", adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ col }}, {{ cross }}, cols.numeric = c({{ col }}, {{ cross }}), clean = clean)

  check_is_param(method, c("pearson", "spearman"))

  # 2. Calculate correlation
  result <- .effect_correlations(data, {{ col }}, {{ cross}}, adjust = adjust, method = method, labels = labels)

  # 3. Labeling
  # Remove common prefix
  prefix <- get_prefix(c(result$item1, result$item2))
  result <- dplyr::mutate(result, item1 = trim_prefix(.data$item1, prefix))
  result <- dplyr::mutate(result, item2 = trim_prefix(.data$item2, prefix))

  if (prefix == "") {
    prefix <- "Item"
  }

  result <- result %>%
    dplyr::rename("Item 1" = tidyselect::all_of("item1")) |>
    dplyr::rename("Item 2" = tidyselect::all_of("item2"))

  if (prefix == "") {
    title <- NULL
  }

  result <- result |>
    data_round("p", 3) %>%
    dplyr::mutate(dplyr::across(tidyselect::any_of(c("n", "df", "df.residual")), function(x) as.character(x))) |>
    data_round(tidyselect::where(is.numeric), 2) %>%

    #dplyr::mutate(dplyr::across(tidyselect::everything(), \(x) as.character(x))) |>
    tidyr::pivot_longer(
      cols = -tidyselect::all_of(c("Item 1", "Item 2")),
      names_to ="Statistic"
    ) |>
    dplyr::select(-tidyselect::all_of(c("Item 1", "Item 2")))

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result, digits= 2, caption=title)
}

#' Test whether a distribution is normal for each item
#'
#' The test is calculated using \code{stats::\link[stats:shapiro.test]{shapiro.test}}.
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols The column holding metric values.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_metrics}.
#' @return A volker table containing itemwise statistics:
#'
#' - **skewness**: Measure of asymmetry in the distribution. A value of 0 indicates perfect symmetry.
#' - **kurtosis**: Measure of the "tailedness" of the distribution.
#' - **W**: W-statistic from the Shapiro-Wilk normality test.
#' - **p**: p-value for the statistical test.
#' - **stars**: Significance stars based on p-value (*, **, ***).
#' - **normality**: Interpretation of normality based on Shapiro-Wilk test.
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics_items(data, starts_with("cg_adoption"))
#'
#'
#' @importFrom rlang .data
#' @export
effect_metrics_items <- function(data, cols, adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ cols }}, cols.numeric = {{ cols }}, clean = clean)

  # 2. Calculate
  data <- dplyr::select(data, {{ cols }})

  result <- purrr::imap(
    data,
    \(.x, .y) {
      shapiro <- stats::shapiro.test(.x)
      stats <- psych::describe(.x)

      list(
        "Item" = .y,
        "skewness" = stats$skew,
        "kurtosis" = stats$kurt,
        "W" = shapiro$statistic,
        "p" = shapiro$p.value
      )

    }
  ) %>%
    dplyr::bind_rows()

  # Adjust and round
  result <- result %>%
    adjust_p("p", method = adjust) %>%
    .attr_setcolumn(
      tidyselect::all_of(c("skewness", "kurtosis", "W")),
      "round", 2
    )

  result$normality = ifelse(result$p > 0.05, "normal", "not normal")

  # 3. Labels
  if (labels) {
    result <- labs_replace(
      result, "Item",
      codebook(data, {{ cols }}),
      "item_name", "item_label"
    )
  }

  prefix <- get_prefix(result$Item)
  result <- dplyr::mutate(
    result, Item = trim_prefix(.data$Item, prefix)
  )

  # Rename first column
  if (prefix != "") {
    colnames(result)[1] <- prefix
  }

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result)
}

#' Compare groups for each item by calculating F-statistics and effect sizes
#'
#'
#' The models are fitted using \code{stats::\link[stats:lm]{lm}}.
#' ANOVA of type II is computed for each fitted model using \code{car::\link[car:Anova]{Anova}}.
#' Eta Squared is calculated for each ANOVA result
#' using \code{effectsize::\link[effectsize:eta_squared]{eta_squared}}.
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross The column holding groups to compare.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_metrics}.
#' @return A volker tibble with the following statistical measures:
#'
#'  - **Eta-squared**: Effect size indicating the proportion of variance in the dependent variable explained by the predictor.
#'  - **Eta**: Root of Eta-squared, a standardized effect size.
#'  - **n**: Number of cases the calculation is based on.
#'  - **F**: F-statistic from the linear model.
#'  - **p**: p-value for the statistical test.
#'  - **stars**: Significance stars based on p-value (*, **, ***).
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics(data, starts_with("cg_adoption_"), adopter)
#'
#' @export
#' @importFrom rlang .data
effect_metrics_items_grouped <- function(data, cols, cross, adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ cols }}, {{ cross }}, cols.categorical = {{ cross }}, cols.numeric = {{ cols }}, clean = clean)

  # 2. Pivot longer
  lm_data <- data %>%
    dplyr::rename(uv = {{ cross }}) %>%
    tidyr::pivot_longer(
      cols = {{ cols }},
      names_to = "item",
      values_to = "value"
    ) %>%
    dplyr::group_by(.data$item) %>%
    tidyr::nest() %>%
    tibble::deframe()

  # 3. Linear model per item
  result <- purrr::imap(
    lm_data,
    \(.x, .y) {

    model <- stats::lm(value ~ uv, data = .x)
    summ <- summary(model)
    eta_sq <- get_etasq(model)

    list(
      "item" = .y,
      "Eta-squared" = eta_sq$Eta2,
      "Eta" = sqrt(eta_sq$Eta2),
      "n" = nrow(.x),
      "F" = summ$fstatistic[1],
      "p" = summ$coefficients[2, 4]
    )
  }) %>%
    dplyr::bind_rows()

  # Adjust and round
  result <- result %>%
    adjust_p("p", method = adjust) %>%
    .attr_setcolumn(
      tidyselect::all_of(c("Eta-squared", "Eta", "F")),
      "round", 2
    )

  # 5. Labels
  if (labels) {
     result <- labs_replace(
       result, "item",
       codebook(data, {{ cols }}),
       "item_name", "item_label"
     )
   }

  prefix <- get_prefix(result$item)
  result <- dplyr::mutate(result, item = trim_prefix(.data$item, prefix))

  # Rename first column
  if (prefix != "") {
    colnames(result)[1] <- prefix
  } else {
    result <- dplyr::rename(result, Item = tidyselect::all_of("item"))
  }

   result <- .attr_transfer(result, data, "missings")
   .to_vlkr_tab(result)
}

#' Compare groups for each item with multiple target items by calculating F-statistics and effect sizes
#'
#' \strong{Not yet implemented. The future will come.}
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross The grouping items.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_counts}.
#' @return A volker tibble.
#' @importFrom rlang .data
effect_metrics_items_grouped_items <- function(data, cols, cross, clean = TRUE, ...) {
  warning("Not implemented yet. The future will come.", noBreaks. = TRUE)
}

#' Output correlation coefficients for items and one metric variable
#'
#' The correlation is calculated using \code{stats::\link[stats:cor.test]{cor.test}}.
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross The column holding metric values to correlate.
#' @param method The output metrics, pearson = Pearson's R, spearman = Spearman's rho.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_metrics}.
#' @return A volker table containing itemwise correlations:
#'
#' If `method = "pearson"`:
#' - **R-squared**: Coefficient of determination.
#' - **n**: Number of cases the calculation is based on.
#' - **Pearson's r**: Correlation coefficient.
#' - **ci low / ci high**: Lower and upper bounds of the 95% confidence interval.
#' - **df**: Degrees of freedom.
#' - **t**: t-statistic.
#' - **p**: p-value for the statistical test, indicating whether the correlation differs from zero.
#' - **stars**: Significance stars based on the p-value (*, **, ***).
#'
#' If `method = "spearman"`:
#' - **Spearman's rho** is displayed instead of Pearson's r.
#' - **S-statistic** is used instead of the t-statistic.
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics_items_cor(
#'   data, starts_with("cg_adoption_adv"), sd_age
#' )
#'
#' @export
#' @importFrom rlang .data
effect_metrics_items_cor <- function(data, cols, cross, method = "pearson", adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ cols }}, {{ cross }}, cols.numeric = c({{ cols }}, {{ cross }}), clean = clean)

  check_is_param(method, c("pearson", "spearman"))

  # 2. Calculate correlations
  result <- .effect_correlations(data, {{ cols }}, {{ cross }}, method = method, adjust = adjust, labels = labels)

  # 3. Labels
  prefix1 <- get_prefix(result$item1)

  if (labels) {
    prefix2 <- get_title(data, {{ cross }})
  } else {
    prefix2 <- rlang::as_string(rlang::ensym(cross))
  }

  result <- result %>%
    dplyr::mutate(item1 = trim_prefix(.data$item1, prefix1)) %>%
    dplyr::select(-tidyselect::all_of("item2"))

  # Rename first column
  if (prefix1 != "") {
    colnames(result)[1] <- paste0(prefix1, ": Correlation with ", prefix2)
  }

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result, digits = 2)
}


#' Output correlation coefficients for multiple items
#'
#' The correlation is calculated using \code{stats::\link[stats:cor.test]{cor.test}}.
#'
#' @keywords internal
#'
#' @param data A tibble containing item measures.
#' @param cols Tidyselect item variables (e.g. starts_with...).
#' @param cross Tidyselect item variables (e.g. starts_with...).
#' @param method The output metrics, pearson = Pearson's R, spearman = Spearman's rho.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @param clean Prepare data by \link{data_clean}.
#' @param ... Placeholder to allow calling the method with unused parameters from \link{effect_metrics}.
#' @return A volker table containing correlations.
#'
#' If `method = "pearson"`:
#' - **R-squared**: Coefficient of determination.
#' - **n**: Number of cases the calculation is based on.
#' - **Pearson's r**: Correlation coefficient.
#' - **ci low / ci high**: Lower and upper bounds of the 95% confidence interval.
#' - **df**: Degrees of freedom.
#' - **t**: t-statistic.
#' - **p**: p-value for the statistical test, indicating whether the correlation differs from zero.
#' - **stars**: Significance stars based on the p-value (*, **, ***).
#'
#' If `method = "spearman"`:
#' - **Spearman's rho** is displayed instead of Pearson's r.
#' - **S-statistic** is used instead of the t-statistic.
#'
#' @examples
#' library(volker)
#' data <- volker::chatgpt
#'
#' effect_metrics_items_cor_items(
#'   data,
#'   starts_with("cg_adoption_adv"),
#'   starts_with("use"),
#'   metric = TRUE
#' )
#'
#' @export
#' @importFrom rlang .data
effect_metrics_items_cor_items <- function(data, cols, cross, method = "pearson", adjust = "fdr", labels = TRUE, clean = TRUE, ...) {
  # 1. Checks, clean, remove missings
  data <- data_prepare(data, {{ cols }}, {{ cross }}, cols.numeric = c({{ cols }}, {{ cross }}), clean = clean)
  check_is_param(method, c("pearson", "spearman"))

  # 2. Calculate correlations
  result <- .effect_correlations(data, {{ cols }}, {{ cross }}, method = method, adjust = adjust, labels = labels)

  # 3. Labels
  prefix1 <- get_prefix(result$item1)
  prefix2 <- get_prefix(result$item2)

  result <- result %>%
    dplyr::mutate(item1 = trim_prefix(.data$item1, prefix1)) |>
    dplyr::mutate(item2 = trim_prefix(.data$item2, prefix2))

  if ((prefix1 == prefix2) && (prefix1 != "")) {
    prefix1 <- paste0("Item 1: ", prefix1)
    prefix2 <- paste0("Item 2: ", prefix2)
  }

  # Rename first column
  if (prefix1 != "") {
    colnames(result)[1] <- prefix1
  }

  # Rename second column
  if (prefix2 != "") {
    colnames(result)[2] <- prefix2
  }

  result <- .attr_transfer(result, data, "missings")
  .to_vlkr_tab(result, digits = 2)
}


#' Calculate correlation and cooccurrence coefficients and test whether they are different from zero
#'
#' This function is used to calculate coefficients for all pairwise items
#' by calling `get_correlation()` on each combination of the items in the `cols`- by `cross`-parameter.
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param cols The columns holding metric values.
#' @param cross The columns holding metric values to correlate.
#' @param method The output metrics, pearson = Pearson's R, spearman = Spearman's rho,
#'               cramer = Cramer's V, npmi = Normalized Pointwise Mutual Information.
#'               The reported R square value is simply the square of Spearman's rho respective Pearson's r.
#' @param category Calculating NPMI for multiple items requires a focus category.
#'                By default, for logical column types, only TRUE values are counted.
#'                For other column types, the first category is counted.
#'                Accepts both character and numeric values to override default counting behavior.
#' @param test Boolean, whether to perform significance tests (default = TRUE).
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param labels If TRUE (default) extracts labels from the attributes, see \link{codebook}.
#' @return A tibble with correlation results.
#' @importFrom rlang .data
.effect_correlations <- function(data, cols, cross, method = "pearson", category = NULL, test = TRUE, adjust = "fdr", labels = TRUE) {

  cols_eval <- tidyselect::eval_select(expr = enquo(cols), data = data)
  cross_eval <- tidyselect::eval_select(expr = enquo(cross), data = data)

  # Check method
  check_is_param(method, c("spearman", "pearson", "cramer", "npmi"))

  # Category
  if (is.null(category) & (method == "npmi")) {

    if ("TRUE" %in% as.character(data[[cols_eval[1]]])) {
      category <- "TRUE"
    }
    else if (is.factor(data[[cols_eval[1]]])) {
      category <- levels(data[[cols_eval[1]]])[1]
    }
    else  {
      category <- unique(as.character(data[[cols_eval[1]]]))[1]
    }

    # TODO: Recode as in plot_counts_items_grouped() to reduce number of tests
    # result <- result %>%
    #  mutate(.category = (.data$.value_name %in% base_category) | (.data$.value_label %in% base_category))

  }

  # Calculate
  result <- expand.grid(
    x = cols_eval, y = cross_eval,
    stringsAsFactors = FALSE
  ) %>%
    dplyr::mutate(
      .test = purrr::pmap(
        list(.data$x, .data$y),
        \(x, y) get_correlation(data[[x]], data[[y]], method = method, category = category, test = test)
      )
    )

  result <- result %>%
    dplyr::mutate(
      x_name = names(.data$x),
      y_name = names(.data$y)
    ) %>%
    tidyr::unnest_wider(".test") |>
    dplyr::select(-tidyselect::any_of(c("x", "y")))

  result <- dplyr::select(result, item1 = "x_name", item2 = "y_name", tidyselect::everything())
  result <- dplyr::arrange(result, .data$item1, .data$item2)

  # Adjust and round
  # TODO: omit adjusting for test for self-correlation (item matrices)?
  if (method == "npmi") {
    if (test) {
      result <- adjust_p(result, "fisher_p",stars = "fisher_stars", method = adjust)
    }
    result <- dplyr::mutate(result, npmi = round(.data$npmi, 2))
    attr(result, "focus") <- category
  }
  else if (test) {
    result <- adjust_p(result, "p", method = adjust)
  }


  # Get variable caption from the attributes
  if (labels) {
    result <- labs_replace(result, "item1", codebook(data, {{ cols }}), col_from="item_name", col_to="item_label")
    result <- labs_replace(result, "item2", codebook(data, {{ cross }}), col_from="item_name", col_to="item_label" )
  }

  # Baseline
  attr(result, "cases") <- nrow(data)

  result
}


#' Tidy lm results, replace categorical parameter names by their levels and add the reference level
#'
#' @keywords internal
#'
#' @param fit Result of a \code{\link[stats:lm]{lm}} call.
#' @author Created with the help of ChatGPT.
#' @return A tibble with regression parameters.
tidy_lm_levels <- function(fit) {
  lm_tidy <- broom::tidy(fit, conf.int = TRUE)
  lm_tidy <- dplyr::left_join(lm_tidy, get_betas(fit), by = "term")

  lm_data <- fit$model

  # Work through each factor in the model frame
  for (var in names(lm_data)) {
    if (is.character(lm_data[[var]])) {
      lm_data[[var]] <- as.factor(lm_data[[var]])
    }
    if (is.factor(lm_data[[var]])) {
      levels <- levels(lm_data[[var]])

      ref_level <- levels[1]
      ref_first <- paste0(var, levels[2])
      ref_index <- which(lm_tidy$term == ref_first)

      # Rename the coefficients in tidy_data
      for (level in levels[-1]) {
        old_name <- paste0(var, level)
        new_name <- paste0(level)
        lm_tidy$term <- sub(paste0("^\\Q", var, level,"\\E"), new_name, lm_tidy$term)
      }

      # Create reference level row, assuming the first level is the reference
      ref_row <- data.frame(term = paste0(ref_level, " (Reference)"))

      # Insert the ref_row at ref_index in lm_tidy
      lm_tidy <- dplyr::bind_rows(lm_tidy[1:ref_index-1,,drop = FALSE], ref_row, lm_tidy[ref_index:nrow(lm_tidy),,drop = FALSE])
    }
  }

  lm_tidy
}

#' Adjust p-values from multiple tests and optionally annotate significance stars
#'
#' @keywords internal
#'
#' @param df A data frame or tibble containing the column to adjust.
#' @param col A tidyselect expression specifying the p-value column to adjust.
#' @param method Character string specifying the p-value adjustment method.
#'   See \code{\link[stats]{p.adjust}} for available methods. Disable adjustment with FALSE.
#' @param digits Integer; number of decimal places for rounding.
#' @param stars Logical or character; if \code{TRUE}, add a "stars" column
#'              with significance symbols (e.g., `"***"`, `"**"`, `"*"`)
#'              based on the adjusted p-values.
#'              If set to a character value it determines the new column name.
#'
#' @return A modified data frame with:
#'   \itemize{
#'     \item Adjusted p-values in the selected column.
#'     \item (Optionally) a new column \code{stars} containing significance symbols.
#'     \item An attribute \code{"adjust"} on the data frame storing the method.
#'     \item A \code{"round"} attribute on the p-value column.
#'   }
adjust_p <- function(df, col, method = "fdr", digits = 3, stars = TRUE) {

  col_eval <- tidyselect::eval_select(expr = enquo(col), data = df)
  col_name <- names(col_eval)

  if (length(col_name) != 1) {
    stop("`col` must select exactly one column.")
  }


  # Adjust p-values
  if (nrow(df) > 1) {
    method <- ifelse(method == "none", FALSE, method)

    if (method != FALSE) {
      df[[col_name]] <- stats::p.adjust(df[[col_name]], method = method)
      attr(df, "adjust") <-  method
    }
  }

  # Set 'round' attribute on p-value column
  df <- .attr_setcolumn(df, !!sym(col_name), "round", digits)


  # Optionally add significance stars
  if (!isFALSE(stars)) {
    if (!is.character(stars)) {
      stars <- "stars"
    }
    df[[stars]] <- get_stars(df[[col_name]])
  }

  return(df)
}


#' Calculate correlation between two vectors
#'
#' @keywords internal
#'
#' @param x First vector
#' @param y Second vector
#' @param method One of "spearman" or "pearson" for metric vectors.
#'               For catecorical vectors, use "cramer" or "npmi".
#' @param category A vector of values to focus. Necessary for the npmi method only.
#' @param test Boolean; whether to perform significance tests.
#' @return The result of cor.test() for metric vectors, chisq.test for Cramer's V.
get_correlation <- function(x, y, method, category = NULL, test = TRUE) {

  result <- list()

  # TODO: round in print function, not here
  if (method == "npmi") {

    if (is.null(category)) {
      stop("The npmi method needs a focus category, check your parameters.")
    }

    df <- data.frame(x = x, y = y)
    result <- get_npmi(df, !!sym("x"), !!sym("y"), category = category, adjust = FALSE, test = test)
    result$p_y_if_x <- NULL
    result$p_x_if_y <- NULL
    result <- as.list(result[1, -c(1,2) , drop = TRUE])
  }
  else if (method == "cramer") {
    contingency <- table(as.character(x), as.character(y))
    n <- sum(contingency)
    sr <- rowSums(contingency)
    sc <- colSums(contingency)
    E <- outer(sr, sc) / sum(contingency)
    exact <- any(E < 5)
    isdiag = sum(diag(contingency)) == n

    if ((nrow(contingency) < 2) | (ncol(contingency) < 2)) {
      fit <- list(statistic = NA, parameter = NA, p.value = NA)
    } else {
      fit <- stats::chisq.test(contingency, simulate.p.value = exact)
    }


    cells <- min(dim(contingency)[1], dim(contingency)[2]) - 1
    cramer_v <- round(sqrt( (fit$statistic / n) / cells), 2)
    cramer_v <- ifelse(isdiag, 1.0, cramer_v)

    result <- list(
      "Cramer's V" = round(cramer_v, 2),
      "Chi-squared" = sprintf("%.2f", round(fit$statistic, 2)),
      "n" = as.character(n),
      "df" = as.character(fit$parameter),
      "p"= fit$p.value
    )
  }

  else if (method == "spearman") {
    fit <- stats::cor.test(x, y, method = method, exact = FALSE)

    result = list(
      "Spearman's rho" = round(as.numeric(fit$estimate),2),
      "R-squared" = round(as.numeric(fit$estimate^2),2),
      n = length(x),
      s = sprintf("%.2f", round(fit$statistic,2)),
      p = fit$p.value
    )

  }
  else {
    fit <- stats::cor.test(x, y, method = method, exact = TRUE)

    result = list(
      "Pearson's r" = round(as.numeric(fit$estimate),2),
      "R-squared" = round(as.numeric(fit$estimate^2),2),
      n = sum(!is.na(x) & !is.na(y)),
      "ci low" = round(as.numeric(fit$conf.int[1]),2),
      "ci high" = round(as.numeric(fit$conf.int[2]),2),
      df = as.numeric(fit$parameter),
      t = ifelse(all(x == y), "Inf", sprintf("%.2f", round(as.numeric(fit$statistic),2))),
      p = fit$p.value
    )
  }

  return (result)
}

#' Calculate nmpi
#'
#' @keywords internal
#'
#' @param data A tibble.
#' @param col The column holding factor values.
#' @param cross The column to correlate.
#' @param category A vector of values to focus. If not null, all other values will be removed from the result.
#' @param smoothing Add pseudocount. Calculate the pseudocount based on the number of trials
#'        to apply Laplace's rule of succession.
#' @param adjust Performing multiple significance tests inflates the alpha error.
#'               Thus, p values need to be adjusted according to the number of tests.
#'               Set a method supported by  \code{stats::\link[stats:p.adjust]{p.adjust}},
#'               e.g. "fdr" (the default) or "bonferroni". Disable adjustment with FALSE.
#' @param test Boolean; whether to perform significance tests (default TRUE).
#' @param ... Placeholder to allow calling the method with unused parameters from \link{tab_counts}.
#' @return A volker tibble.
#' @importFrom rlang .data
get_npmi <- function(data, col, cross, category = NULL, smoothing = 0, adjust = "fdr", test = TRUE, ...) {

  cols_eval <- tidyselect::eval_select(expr = enquo(col), data = data)
  cross_eval <- tidyselect::eval_select(expr = enquo(cross), data = data)

  col_name <- names(cols_eval)[1]
  cross_name <- names(cross_eval)[1]

  # Calculate marginal probabilities
  result <- data %>%
    dplyr::count(.data[[col_name]], .data[[cross_name]]) %>%
    #tidyr::complete({{ col }}, {{ cross }}, fill=list(n=0)) |>

    dplyr::group_by(!!sym(col_name)) %>%
    dplyr::mutate(.total_x = sum(.data$n)) %>%
    dplyr::ungroup() %>%
    dplyr::group_by(!!sym(cross_name)) %>%
    dplyr::mutate(.total_y = sum(.data$n)) %>%
    dplyr::ungroup()

  # Calculate joint probablities
  result <- result %>%
    dplyr::mutate(
      .total = sum(.data$n),
      p_x = (.data$.total_x + smoothing) / (.data$.total + (dplyr::n_distinct(!!sym(col_name)) * smoothing)),
      p_y = (.data$.total_y + smoothing) / (.data$.total + (dplyr::n_distinct(!!sym(cross_name)) * smoothing)),
      p_xy = (.data$n + smoothing) / (.data$.total + dplyr::n_distinct(!!sym(col_name)) * smoothing + dplyr::n_distinct(!!sym(cross_name)) *  smoothing),
      p_x_if_y = .data$n / .data$.total_y,
      p_y_if_x = .data$n / .data$.total_x,

      ratio = .data$p_xy / (.data$p_x * .data$p_y),
      pmi = dplyr::case_when(
        .data$p_xy == 0 ~ -Inf,
        TRUE ~ log2(.data$ratio)
      ),
      npmi = dplyr::case_when(
        .data$p_xy == 0 ~ -1,
        TRUE ~ .data$pmi / -log2(.data$p_xy)
      )
    )

  # Filter out categories
  if (!is.null(category)) {
    result <- result[as.character(result[[1]]) %in% category,, drop = FALSE]
    result <- result[as.character(result[[2]]) %in% category,, drop = FALSE]
  }

  # Add exact fisher test
  if (test & (nrow(result) > 0)) {
    result <- result %>%
      dplyr::rowwise() %>%
      dplyr::mutate(
        fisher_p = {
          a <- .data$n
          b <- .data$.total_x - .data$n
          c <- .data$.total_y - .data$n
          d <- .data$.total - a - b - c
          tbl <- matrix(c(a, b, c, d), nrow = 2)
          suppressWarnings(stats::fisher.test(tbl)$p.value)
        }) %>%
      dplyr::ungroup()

    # Adjust and round
    result <- result %>%
      adjust_p("fisher_p", method = adjust, stars = "fisher_stars")
  }
  else if (test & (nrow(result) == 0)) {
    result$fisher_p <- NA
    result$fisher_stars <- NA
  }

  result <- dplyr::select(result, -tidyselect::all_of(c(".total",".total_x", ".total_y")))

  result
}


#' Calculate Eta squared
#'
#' @keywords internal
#'
#' @param fit A model
#' @return A data frame with at least the column Eta2
get_etasq <- function(fit) {
  if(round(sum(fit$residuals),20) == 0) {
    result <- data.frame("Eta2"=0)
  } else  {
    result <- effectsize::eta_squared(car::Anova(fit, type = 2), verbose = FALSE)
  }
  result
}

#' Calculate the Gini coefficient
#'
#' @keywords internal
#'
#' @param x A vector of counts or other values
#' @return The gini coefficient
get_gini <- function(x) {

  x <- sort(x)
  n <- length(x)

  gini <- sum(x * c(1:n))
  gini <- 2 * gini/sum(x) - (n + 1)
  gini <- gini/n

  return(gini)
}


#' Calculate ci values to be used for error bars on a plot
#'
#' @keywords internal
#'
#' @param x A numeric vector.
#' @param conf The confidence level.
#' @return A named list with values for y, ymin, and ymax.
get_ci <- function(x, conf = 0.95) {
  n <- length(x)
  m <- mean(x)
  se <- stats::sd(x) / sqrt(n)
  error_margin <- stats::qt(conf + (1 - conf) / 2, df = n - 1) * se
  return(c(y = m, ymin = m - error_margin, ymax = m + error_margin))
}

#' Get significance stars from p values
#'
#' @keywords internal
#'
#' @param x A vector of p values.
#' @return A character vector with significance stars.
get_stars <- function(x) {
  sapply(x, function(p) {
    if (is.na(p)) {
      return(NA)
    } else if (p < 0.001) {
      return("***")
    } else if (p < 0.01) {
      return("**")
    } else if (p < 0.05) {
      return("*")
    } else if (p < 0.1) {
      return(".")
    } else {
      return("")
    }
  })
}

#' Calculate standardized betas
#'
#' @keywords internal
#'
#' @param fit A model fitted with lm()
#' @return A data frame with a row for each term
get_betas <- function(fit) {

  # Raw betas (exclude intercept)
  betas <- stats::coef(fit)[-1]

  # Response SD
  model_data <- stats::model.frame(fit)
  y_sd <- stats::sd(model_data[[1]], na.rm = TRUE)

  # Predictor SDs
  mm <- stats::model.matrix(fit)[, -1, drop = FALSE]
  x_sds <- apply(mm, 2, stats::sd, na.rm = TRUE)


  # Compute standardized betas
  beta_std <- betas * (x_sds / y_sd)

  tibble::tibble(
    term = names(beta_std),
    beta_std = beta_std
  )
}

Try the volker package in your browser

Any scripts or data that you put into this service are public.

volker documentation built on Nov. 5, 2025, 5:21 p.m.