Nothing
#' Estimate differences (unstandardized and Cohen's d)
#'
#' `estim_diff` determines point estimate, SD and SE, 95% Credibility Intervals,
#' and interval width, for both differences in raw means and Cohen's d's for
#' multiple sample sizes
#'
#' @param data Dataframe with the data to be analyzed
#' @param vars_of_interest Vector containing the names of the variables to be
#' compared on their means: `c("var1", "var2")`
#' @param sample_size The range of sample size to be used `min:max`
#' @param k The number of permutations to be used for each sample size. Defaults to 50
#' @param name The title of the dataset or variables to be displayed with the
#' figure. Defaults to `""`
#' @returns
#' * `tbl_select` returns a [tibble::tibble()] containing estimates of the
#' difference in raw means and of Cohen's d with associated SD, SE, 95% CI, and
#' width of the 95% CI (lower, upper) for five different sample sizes (starting
#' with the minimum sample size, then 1/5th parts of the total dataset).
#' * `fig_diff` returns a scatterplot for the difference in raw means, where for
#' the five different sample sizes, 10 out of the total number of HDCI's
#' computed are displayed (in light blue). The average estimate with credible
#' interval summarizing the total number of HDCIs for each sample size are
#' plotted in reddish purple
#' * `fig_nozero` returns a barplot where for each of the five sample sizes the
#' proportion of permutations not containing zero is displayed for the
#' difference in raw means
#' * `fig_cohens_d` returns a scatterplot for Cohen's d, where for the five
#' different sample sizes, 10 out of the total number of HDCI's computed are
#' displayed (in light blue). The average estimate with credible interval
#' summarizing the total number of HDCIs for each sample size are plotted in
#' reddish purple
#' * `fig_d_nozero` returns a barplot where for each of the five sample sizes the
#' proportion of permutations not containing zero is displayed for Cohen's d
#' * `tbl_total` returns a [tibble::tibble()] containing estimates of the difference
#' in raw means and of Cohen's d with associated SD, SE, 95% CI, and width of
#' the 95% CI (lower, upper) for all sample sizes, including the permutation
#' number.
#' @examples
#' data_feedback <- feedback
#' estim_diff(data_feedback,
#' c("mfg_learning", "mfg_application"), 20:71,
#' 10, "Feedback middle frontal gyrus")
#' @export
estim_diff <- function(data, vars_of_interest, sample_size, k = 50, name = ""){
# create a tibble to store the output in
output <- tibble::tibble()
output_total <- tibble::tibble()
# loop X number of times over the different sample sizes
for (j in 1:k) {
for (i in sample_size){
#return output as a vector
output_vector <- unlist(sample_diff(data, vars_of_interest,i))
# store vector output in the table
output[1,1] <- i
output[1,2] <- output_vector[1]
output[1,3] <- output_vector[2]
output[1,4] <- output_vector[3]
output[1,5] <- output_vector[4]
output[1,6] <- output_vector[5]
output[1,7] <- output_vector[6]
output[1,8] <- output_vector[7]
output[1,9] <- output_vector[8]
output[1,10] <- output_vector[9]
output[1,11] <- j
# add output to output_total tibble
output_total <- rbind(output_total, output)
}
}
colnames(output_total) <- c(
"N",
"estimate",
"variance",
"stdev",
"sterror",
"lower",
"upper",
"cohens_d",
"d_lower",
"d_upper",
"permutation"
)
# calculate overall intervals per sample size
overall_output <- output_total %>%
dplyr::mutate(nozero = (.data$lower > 0 &
.data$upper > 0) | (.data$lower < 0 & .data$upper < 0)) %>%
dplyr::mutate(d_nozero = (.data$d_lower > 0 &
.data$d_upper > 0) | (.data$d_lower < 0 & .data$d_upper < 0)) %>%
dplyr::group_by(.data$N) %>%
dplyr::summarise(
estimate = mean(.data$estimate, na.rm = TRUE),
variance = mean(.data$variance, na.rm = TRUE),
stdev = mean(.data$stdev, na.rm = TRUE),
sterror = mean(.data$sterror, na.rm = TRUE),
lower = mean(.data$lower, na.rm = TRUE),
upper = mean(.data$upper, na.rm = TRUE),
nozero = mean(.data$nozero, na.rm = TRUE),
cohens_d = mean(.data$cohens_d, na.rm = TRUE),
d_lower = mean(.data$d_lower, na.rm = TRUE),
d_upper = mean(.data$d_upper, na.rm = TRUE),
d_nozero = mean(.data$d_nozero, na.rm = TRUE),
permutation = 999999
) %>%
dplyr::ungroup()
# function to divide the total dataset by 5 and to filter the sample sizes
filt_sample <- function(sample_size, output_total) {
filt_sel <- round((sample_size[length(sample_size)] - sample_size[1])/5)
dplyr::filter(output_total, .data$N == .data$N[1] |
.data$N == (.data$N[1] + filt_sel) |
.data$N == (.data$N[1] + 2 * filt_sel) |
.data$N == (.data$N[1] + 3 * filt_sel) |
.data$N == .data$N[length(.data$N)] )
}
# select 10 random permutations for the 5 different sample sizes for every permutation for visualization
# (only when k >50 random, otherwise select the first 10 permutations)
output_selection <- filt_sample(sample_size, output_total)
output_selection <- if(k > 10) {
dplyr::filter(output_selection,
.data$permutation %in% sample(unique(.data$permutation),
size = 10,
replace = FALSE))
} else
{
dplyr::filter(output_selection,
.data$permutation %in% 1:10)
}
# select the 5 different sample sizes of the overall interval for visualization
overall_selection <- filt_sample(sample_size, overall_output)
# combine 10 datasets per sample size with overall per sample size
total_selection <- dplyr::bind_rows(output_selection, overall_selection)
# turn permutations and N into factors for visualisation
lvl_plot <- levels(factor(total_selection$permutation))
lvl_plot[lvl_plot == "999999"] <- "Overall"
total_selection$permutation <- factor(total_selection$permutation, labels=lvl_plot)
total_selection$N <- as.factor(total_selection$N)
overall_selection$N <- as.factor(overall_selection$N)
# plot figure for the differences
figure_diff <- ggplot2::ggplot(data = total_selection,
ggplot2::aes(x = .data$N,
y = .data$estimate,
colour = .data$permutation,
linetype = .data$permutation) ) +
ggplot2::theme_classic() +
ggplot2::geom_point(position = ggplot2::position_dodge(.8),
ggplot2::aes(x = .data$N,
y = .data$estimate,
colour = .data$permutation,
size = .data$permutation)) +
ggplot2::scale_size_manual(values = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4)) +
ggplot2::scale_linetype_manual(values = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6)) +
ggplot2::geom_errorbar(ggplot2::aes(ymin = .data$lower, ymax = .data$upper),
width = .1,
position = ggplot2::position_dodge(.8)) +
ggplot2::scale_color_manual(values = c("#56B4E9","#56B4E9","#56B4E9","#56B4E9","#56B4E9",
"#56B4E9","#56B4E9","#56B4E9","#56B4E9","#56B4E9","#CC79A7") ) +
ggplot2::labs(title = name) +
ggplot2::geom_hline(yintercept=0, linetype="dashed")
# plot proportion of non-zero values for selected samples
figure_nozero <- ggplot2::ggplot(data = overall_selection,
ggplot2::aes(x = .data$N,
y = .data$nozero) ) +
ggplot2::theme_classic() +
ggplot2::geom_col(color = "#000000",
fill = "#CC79A7",
width = 0.6) +
ggplot2::ylim(0,1) +
ggplot2::labs(title = name,
y = "Proportion not containing zero (raw differences)")
# plot figure for Cohen's d
figure_cohens_d <- ggplot2::ggplot(data = total_selection,
ggplot2::aes(x = .data$N,
y = .data$cohens_d,
colour = .data$permutation,
linetype = .data$permutation) ) +
ggplot2::theme_classic() +
ggplot2::geom_point(position = ggplot2::position_dodge(.8),
ggplot2::aes(x = .data$N,
y = .data$cohens_d,
colour = .data$permutation,
size = .data$permutation)) +
ggplot2::scale_size_manual(values = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4)) +
ggplot2::scale_linetype_manual(values = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6)) +
ggplot2::geom_errorbar(ggplot2::aes(ymin = .data$d_lower, ymax = .data$d_upper),
width = .1,
position = ggplot2::position_dodge(.8)) +
ggplot2::scale_color_manual(values = c("#56B4E9","#56B4E9","#56B4E9","#56B4E9","#56B4E9",
"#56B4E9","#56B4E9","#56B4E9","#56B4E9","#56B4E9","#CC79A7") ) +
ggplot2::labs(title = name, y = "Cohen's d") +
ggplot2::geom_hline(yintercept=0, linetype="dashed")
# plot proportion of non-zero values for selected samples
figure_d_nozero <- ggplot2::ggplot(data = overall_selection,
ggplot2::aes(x = .data$N,
y = .data$d_nozero) ) +
ggplot2::theme_classic() +
ggplot2::geom_col(color = "#000000",
fill = "#CC79A7",
width = 0.6) +
ggplot2::ylim(0,1) +
ggplot2::labs(title = name,
y = "Proportion not containing zero (Cohen's d)")
return(list(tbl_select = total_selection,
fig_diff = figure_diff,
fig_nozero = figure_nozero,
fig_cohens_d = figure_cohens_d,
fig_d_nozero = figure_d_nozero,
tbl_total = output_total))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.