# The script contains functions for comparing of multiple EDA objects and computing difference effect sizes.
# Testing function -----
#' Compare EDA objects.
#'
#' @description compare two or more EDA objects with a range of statistical tests.
#' @param ... EDA objects, at least two, created by \code{\link{eda}}.
#' @param type type of statistic test. For two numeric EDA objects: t test ('t_test', \code{\link[stats]{t.test}}),
#' paired t test ('paired_t_test', \code{\link[stats]{t.test}}), sign test ('sign_test', \code{\link[DescTools]{SignTest}}),
#' Mann-Whitney test ('mann_whitney_test', \code{\link[stats]{t.test}})
#' or paired Wilcoxon test ('wilcoxon_test', \code{\link[stats]{wilcox.test}}).
#' Multiple numeric-type EDA objects can be compared with one-way ANOVA ('anova', \code{\link[stats]{aov}}),
#' repeated measure one-way ANOVA ('rm_anova', \code{\link[stats]{aov}}), Kruskal-Wallis test
#' ('kruskal_test', \code{\link[stats]{kruskal.test}}) or Friedman test for repeated measurements
#' ('friedman_test', \code{\link[stats]{friedman.test}}). For factor-type EDA objects, Chi-squared test
#' may be applied ('chisq_test', \code{\link[stats]{chisq.test}}).
#' @param exact logical, should exact values for Chi-squared. Mann-Whitney and Wilcoxon test be returned?
#' @param ci logical, should confidence intervals for the test effect size be returned?
#' @param boot_method indicates how the bootstrap confidence intervals are calculated.
#' Can be any of 'percentile', 'bca', or 'normality', defaults to 'percentile'.
#' @details EDA object type is coerced to factor or numeric, as appropriate for the requested analysis.
#' The default effect or location sizes returned as estimates with along with the test results include difference in mean for t tests,
#' difference in (pseudo-) median for sign, Mann-Whitney and Wilcoxon tests, eta-squared (% explained variance)
#' for one-way ANOVA and repeated measure one-way ANOVA, eta-square for Kruskal-Wallis test (calculated by
#' \code{\link[rstatix]{kruskal_effsize}}) and Kendall W for Friedman test (\code{\link[rstatix]{friedman_effsize}}).
#' For Chi-squared test, Cramer's V is returned (\code{\link[rcompanion]{cramerV}}).
#' @return an eTest object with the test name, test statistic value and
#' the default effect/location size with 95/% confidence intervals.
#' @export
test <- function(...,
type = c('t_test', 'paired_t_test', 'sign_test', 'mann_whitney_test', 'wilcoxon_test',
'anova', 'rm_anova', 'kruskal_test', 'friedman_test', 'chisq_test'),
exact = TRUE,
ci = TRUE,
boot_method = c('percentile', 'bca', 'normality')) {
## entry control
stopifnot(is.logical(exact),
is.logical(ci))
type <- match.arg(type[1],
c('t_test', 'paired_t_test', 'sign_test',
'mann_whitney_test', 'wilcoxon_test',
'anova', 'rm_anova', 'kruskal_test',
'friedman_test', 'chisq_test'))
boot_method <- match.arg(boot_method[1],
c('percentile', 'bca', 'normality'))
boot_method <- switch(boot_method,
percentile = 'perc',
bca = 'bca',
normality = 'norm')
inp_list <- rlang::list2(...)
classes <- purrr::map_lgl(inp_list, is_eda)
if(any(!classes)) stop('EDA objects are required.', call. = FALSE)
if(length(inp_list) < 2) stop('Please provide at least two EDA objects.', call. = FALSE)
## factor testing
if(type == 'chisq_test') {
test_res <- exda:::chi_tester(..., test_mtx = FALSE, coerce = TRUE)
eff <- rcompanion::cramerV(exda:::chi_tester(..., test_mtx = TRUE, coerce = TRUE),
ci = ci,
type = boot_method)
return(dplyr::mutate(test_res,
estimate_name = 'V',
estimate = if(ci) eff[1, 1] else eff[1],
lower_ci = if(ci) eff[1, 2] else NA,
upper_ci = if(ci) eff[1, 3] else NA))
}
## numeric testing
paired <- type %in% c('paired_t_test', 'wilcoxon_test', 'rm_anova', 'friedman_test')
num_input <- exda:::convert_eda(!!!inp_list, paired = paired)
if(paired) {
n <- nrow(dplyr::filter(num_input,
complete.cases(num_input),
!duplicated(id)))
n <- n * length(inp_list)
} else {
n <- nrow(dplyr::filter(num_input,
complete.cases(num_input)))
}
## two-sample
if(type %in% c('t_test', 'paired_t_test', 'sign_test', 'mann_whitney_test', 'wilcoxon_test')) {
if(length(levels(num_input$group)) > 2) {
warning('More than two EDA objects provided. The first two will be compared.', call. = FALSE)
}
test_fun <- switch(type,
t_test = function(x, y) stats::t.test(x, y, paired = FALSE),
paired_t_test = function(x, y) stats::t.test(x, y, paired = TRUE),
sign_test = function(x, y) DescTools::SignTest(x, y),
mann_whitney_test = function(x, y) stats::wilcox.test(x, y,
paired = FALSE,
conf.int = TRUE,
exact = exact),
wilcoxon_test = function(x, y) stats::wilcox.test(x, y,
paired = TRUE,
conf.int = TRUE,
exact = exact))
test_res <- test_fun(as_numeric(inp_list[[1]]$value),
as_numeric(inp_list[[2]]$value))
test <- switch(type,
t_test = 'unpaired two-sided T test',
paired_t_test = 'paired two-sided T test',
sign_test = 'sign test',
mann_whitney_test = 'Mann-Whitney test',
wilcoxon_test = 'paired Wilcoxon test')
stat_name <- switch(type,
t_test = 't',
paired_t_test = 't',
sign_test = 'S',
mann_whitney_test = 'W',
wilcoxon_test = 'V')
df1 <- switch(type,
t_test = test_res[['parameter']],
paired_t_test = test_res[['parameter']],
sign_test = NA,
mann_whitney_test = NA,
wilcoxon_test = NA)
estimate_name <- switch(type,
t_test = 'mean difference',
paired_t_test = 'mean difference',
sign_test = 'median difference',
mann_whitney_test = 'median difference',
wilcoxon_test = 'median difference')
estimate <- switch(type,
t_test = test_res[['estimate']][1] - test_res[['estimate']][2],
paired_t_test = test_res[['estimate']][1],
sign_test = test_res[['estimate']][1],
mann_whitney_test = test_res[['estimate']][1],
wilcoxon_test = test_res[['estimate']][1])
return(exda::etest(test = test,
stat_name = stat_name,
stat = test_res[['statistic']],
df1 = df1,
estimate_name = estimate_name,
estimate = estimate,
lower_ci = if(ci) test_res[['conf.int']][1] else NA,
upper_ci = if(ci) test_res[['conf.int']][2] else NA,
p_value = test_res[['p.value']],
n = n))
}
if(type %in% c('anova', 'rm_anova')) {
tst_formula <- switch(type,
anova = variable ~ group,
rm_anova = variable ~ group + Error(id))
test <- switch(type,
anova = 'one-way ANOVA',
rm_anova = 'repeated-measure one-way ANOVA')
tst_result <- summary(stats::aov(formula = tst_formula,
data = num_input))
tst_result <- switch(type,
anova = tst_result[[1]],
rm_anova = tst_result[[2]][[1]])
tst_result <- tibble::as_tibble(tst_result)
tst_result <- dplyr::mutate(tst_result,
eta_sq = `Sum Sq`/sum(`Sum Sq`))
return(exda::etest(test = test,
stat_name = 'F',
stat = tst_result[['F value']][1],
df1 = tst_result[['Df']][1],
df2 = tst_result[['Df']][2],
estimate_name = 'eta squared',
estimate = tst_result[['eta_sq']][1],
p_value = tst_result[['Pr(>F)']][1],
n = n))
}
if(type %in% c('kruskal_test', 'friedman_test')) {
test <- switch(type,
kruskal_test = 'Kruskal-Wallis test',
friedman_test = 'Friedman test')
stat_name <- switch(type,
kruskal_test = 'rank sum',
friedman_test = 'chi squared')
test_res <- switch(type,
kruskal_test = stats::kruskal.test(formula = variable ~ group,
data = num_input),
friedman_test = stats::friedman.test(formula = variable ~ group | id,
data = num_input))
eff_name <- switch(type,
kruskal_test = 'eta squared',
friedman_test = 'W')
eff_res <- switch(type,
kruskal_test = rstatix::kruskal_effsize(formula = variable ~ group,
data = num_input,
ci = ci,
ci.type = boot_method),
friedman_test = rstatix::friedman_effsize(formula = variable ~ group | id,
data = num_input,
ci = ci,
ci.type = boot_method))
return(exda::etest(test = test,
stat_name = stat_name,
stat = test_res[['statistic']][1],
df1 = test_res[['parameter']][1],
estimate_name = eff_name,
estimate = eff_res[['effsize']][1],
lower_ci = if(ci) eff_res[['conf.low']][1] else NA,
upper_ci = if(ci) eff_res[['conf.high']][1] else NA,
p_value = test_res[['p.value']][1],
n = n))
}
}
# Effect size calculation ------
#' Effect size for difference between EDA objects.
#'
#' @description compare two or more EDA objects with a range of statistical tests.
#' @param ... EDA objects, at least two, created by \code{\link{eda}}.
#' @param type type of statistic the effect size. For two numeric EDA objects: independent variable Cohen's D
#' ('cohens_d', \code{\link[rstatix]{cohens_d}}), paired Cohen's D ('paired_cohen_d', \code{\link[rstatix]{cohen_d}}),
#' independent variable Wilcoxon r ('wilcoxon_r', \code{\link[rstatix]{wilcox_effsize}}) or paired Wilcoxon r
#' ('paired_wilcoxon_r', \code{\link[rstatix]{wilcox_effsize}}) are currently implemented. For two factor EDA objects:
#' Cohen's Kappa ('cohen_kappa', \code{\link[vcd]{Kappa}}) and Freeman's theta
#' ('freeman_theta', \code{\link[rcompanion]{freemanTheta}}) is implemented. For 2 or more factor EDAs: Cramer's V
#' ('cramer_v', \code{\link[DescTools]{CramerV}}) can be calculated. For two or more numeric EDAs: ANOVA-derived (partial)
#' eta-squared ('etasq' or 'partial_etasq', \code{\link[rstatix]{eta_squared}}) or
#' Kruskal-Wallis-derived eta-squared ('kruskal_etasq', \code{\link[rstatix]{kruskal_effsize}}) and
#' for aired data Kendall's W ('kendall_w', \code{\link[rstatix]{friedman_effsize}}) are currently available.
#' @param exact logical, should exact values for Chi-squared. Mann-Whitney and Wilcoxon test be returned?
#' @param ci logical, should confidence intervals for the test effect size be returned?
#' @param boot_method indicates how the bootstrap confidence intervals are calculated.
#' Can be any of 'percentile', 'bca', or 'normality', defaults to 'percentile'.
#' @details EDA object type is coerced to factor or numeric, as appropriate for the requested analysis. The default
#' statistic test results are returned as well: T test for Cohen's D, Mann-Whitney/Wilcoxon test for Wilcoxon r,
#' Chi-squared test results for Cohen's kappa, Cramer's V and Freeman's theta, one-way ANOVA for eta-squared and
#' partial eta-squared, Kruskal-Wallis test for Kruskal eta-squared and Friedman test for Kendall#s W. The p value
#' referst to the statistical test result.
#' @return an eTest object with the effect size statistic name as 'estimate' variable, effect size statistic value and
#' with 95/% confidence intervals.
#' @export
eff_size <- function(...,
type = c('cohen_d', 'paired_cohen_d', 'wilcoxon_r', 'paired_wilcoxon_r',
'cohen_kappa', 'freeman_theta', 'cramer_v', 'cohen_kappa',
'etasq', 'partial_etasq', 'kruskal_etasq', 'kendall_w'),
exact = TRUE,
ci = TRUE,
boot_method = c('percentile', 'bca', 'normality')) {
## entry check
stopifnot(is.logical(exact),
is.logical(ci))
type <- match.arg(type[1],
c('cohen_d', 'paired_cohen_d', 'wilcoxon_r', 'paired_wilcoxon_r',
'cohen_kappa', 'freeman_theta', 'cramer_v', 'cohen_kappa',
'etasq', 'partial_etasq', 'kruskal_etasq', 'kendall_w'))
boot_method <- match.arg(boot_method[1],
c('percentile', 'bca', 'normality'))
boot_method <- switch(boot_method,
percentile = 'perc',
bca = 'bca',
normality = 'norm')
inp_list <- rlang::list2(...)
classes <- purrr::map_lgl(inp_list, is_eda)
if(any(!classes)) stop('EDA objects are required.', call. = FALSE)
if(length(inp_list) < 2) stop('Please provide at least two EDA objects.', call. = FALSE)
if(type %in% c('cohen_d', 'paired_cohen_d', 'wilcoxon_r', 'paired_wilcoxon_r', 'cohen_kappa', 'freeman_theta')) {
if(length(inp_list) > 2) {
warning('More than two EDA objects provided. The first two will be used for analysis.', call. = FALSE)
}
inp_list <- inp_list[1:2]
}
## factor testing
if(type == 'cramer_v') {
return(exda::test(!!!inp_list,
type = 'chisq_test',
exact = exact,
ci = ci,
boot_method = boot_method))
}
if(type == 'cohen_kappa') {
if(length(inp_list[[1]]) != length(inp_list[[2]])) stop('EDA objects of the same length are required.', call. = FALSE)
test_res <- exda::test(!!!inp_list,
type = 'chisq_test',
exact = exact,
ci = ci,
boot_method = boot_method)
eff <- exda::correlation(inp_list[[1]],
y = inp_list[[2]],
type = 'kappa',
ci = ci)
return(dplyr::mutate(test_res,
estimate_name = 'kappa',
estimate = eff$estimate[1],
lower_ci = eff$lower_ci[1],
upper_ci = eff$upper_ci[1]))
}
if(type == 'freeman_theta') {
if(length(inp_list[[1]]) != length(inp_list[[2]])) stop('EDA objects of the same length are required.', call. = FALSE)
test_res <- exda::test(!!!inp_list,
type = 'chisq_test',
exact = exact,
ci = ci,
boot_method = boot_method)
eff <- rcompanion::freemanTheta(exda:::chi_tester(..., test_mtx = TRUE, coerce = TRUE),
ci = ci,
type = boot_method)
return(dplyr::mutate(test_res,
estimate_name = 'theta',
estimate = if(ci) eff[1, 1] else eff[1],
lower_ci = if(ci) eff[1, 2] else NA,
upper_ci = if(ci) eff[1, 3] else NA))
}
## numeric testing
paired <- type %in% c('paired_wilcoxon_r', 'paired_cohen_d', 'partial_etasq', 'kendall_w')
num_input <- exda:::convert_eda(!!!inp_list, paired = paired)
## two-sample
if(type %in% c('cohen_d', 'paired_cohen_d', 'wilcoxon_r', 'paired_wilcoxon_r')) {
## testing
tst_type <- switch(type,
cohen_d = 't_test',
paired_cohen_d = 'paired_t_test',
wilcoxon_r = 'mann_whitney_test',
paired_wilcoxon_r = 'wilcoxon_test')
test_res <- exda::test(!!!inp_list,
type = tst_type,
ci = FALSE,
boot_method = boot_method,
exact = exact)
## effect size
tst_formula <- variable ~ group
eff_size <- switch(type,
cohen_d = rstatix::cohens_d(data = num_input,
formula = tst_formula,
paired = FALSE,
var.equal = FALSE,
ci = ci,
conf.level = 0.95,
ci.type = boot_method),
paired_cohen_d = rstatix::cohens_d(data = num_input,
formula = tst_formula,
paired = TRUE,
var.equal = FALSE,
ci = ci,
conf.level = 0.95,
ci.type = boot_method),
wilcoxon_r = rstatix::wilcox_effsize(data = num_input,
formula = tst_formula,
paired = FALSE,
ci = ci,
onf.level = 0.95,
ci.type = boot_method),
paired_wilcoxon_r = rstatix::wilcox_effsize(data = num_input,
formula = tst_formula,
paired = TRUE,
ci = ci,
onf.level = 0.95,
ci.type = boot_method))
return(dplyr::mutate(test_res,
estimate_name = if(type %in% c('wilcoxon_r', 'paired_wilcoxon_r')) 'r' else 'd',
estimate = eff_size$effsize[1],
lower_ci = if(ci) eff_size$conf.low[1] else NA,
upper_ci = if(ci) eff_size$conf.low[2] else NA))
}
if(type %in% c('etasq', 'partial_etasq', 'kruskal_etasq', 'kendall_w')) {
tst_type <- switch(type,
etasq = 'anova',
partial_etasq = 'rm_anova',
kruskal_etasq = 'kruskal_test',
kendall_w = 'friedman_test')
return(exda::test(!!!inp_list,
type = tst_type,
exact = exact,
ci = ci,
boot_method = boot_method))
}
}
# Effect size plotting ------
#' Plot significance versus effect size.
#'
#' @description Plots effect size statistic and the adjusted significance for the given eTest object storing
#' results of a statistic test. Plot tag contains (range) n numbers of complete observations.
#' @param etest_object and eTest object, created e.g. with \code{\link{compare_variables}}.
#' @param point_alpha alpha of the plot points.
#' @param point_hjitter point jitter height.
#' @param point_wjitter point jitter width.
#' @param point_size size of the points in the plots.
#' @param point_color a pair of valid R colors: 1) for non-significant and 2) for significant effects.
#' @param plot_title text to be displayed in the plot title.
#' @param plot_subtitle text to be displayed in the plot subtitle.
#' @param cust_theme custom ggplot2 theme.
#' @param show_labels which points should be labeled? 'none' hides labels (default), 'all': labels all points,
#' 'signif': labels only the significant effects with the variable name.
#' @param txt_size size of the label text.
#' @param ... additional arguments passed to \code{\link[ggrepel]{geom_text_repel}}.
#' @return a ggplot point plot, significance is coded by the point fill color.
#' @export
plot_effect <- function(etest_object,
point_alpha = 0.5,
point_hjitter = 0,
point_wjitter = 0,
point_size = 2,
point_color = c('gray60', 'coral2'),
plot_title = NULL,
plot_subtitle = NULL,
cust_theme = ggplot2::theme_classic(),
show_labels = c('none', 'all', 'signif'),
txt_size = 2.75, ...) {
## entry control
if(!is_etest(etest_object)) stop('Please provide a valid, non-pub styled eTest object.', call. = FALSE)
stopifnot(any(class(cust_theme) == 'theme'))
show_labels <- match.arg(show_labels[1], c('none', 'all', 'signif'))
## plotting table and n numbers
etest_object <- dplyr::mutate(etest_object,
significant = ifelse(p_adjusted < 0.05, 'significant', 'ns'),
neg_log_p = -log10(p_adjusted))
if(show_labels == 'all') {
etest_object <- dplyr::mutate(etest_object,
plot_label = variable)
} else {
etest_object <- dplyr::mutate(etest_object,
plot_label = ifelse(significant == 'significant', variable, NA))
}
n_numbers <- etest_object$n
if(min(n_numbers) == max(n_numbers)) {
plot_tag <- paste('n =', n_numbers[1])
} else {
plot_tag <- paste0('n = ', min(n_numbers), ' - ', max(n_numbers))
}
## plotting
eff_plot <- ggplot2::ggplot(etest_object,
ggplot2::aes(x = .data[['estimate']],
y = .data[['neg_log_p']],
fill = .data[['significant']])) +
ggplot2::geom_point(shape = 21,
size = point_size,
alpha = point_alpha,
position = position_jitter(width = point_wjitter,
height = point_wjitter)) +
ggplot2::scale_fill_manual(values = point_color,
name = '') +
cust_theme +
ggplot2::labs(title = plot_title,
subtitle = plot_subtitle,
tag = plot_tag,
x = 'Effect size',
y = expression('-log'[10]*' p'))
if(show_labels != 'none') {
eff_plot <- eff_plot +
ggrepel::geom_text_repel(aes(label = .data[['plot_label']]),
size = txt_size, ...)
}
eff_plot
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.