Nothing
rnorm_mix <- function(n, mu1, sig1, mu2, sig2, gamma) {
x <- sample(c(1,0), n, replace = T, prob = c(gamma, 1 - gamma))
x <- ifelse(x,
rnorm(sum(x), mean = mu1, sd = sig1),
rnorm(sum(!x), mean = mu2, sd = sig2))
return(x)
}
#' @title Draw Samples from a Gaussian Mixture Distribution
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Draws exemplary samples with a certain effect size for the sequential one-oway ANOVA or the sequential t-test, see Steinhilber et al. (2023) <doi:10.31234/osf.io/m64ne>
#' @param k_groups number of groups (levels of factor_A)
#' @param f Cohen's f. The simulated effect size.
#' @param max_n sample size for the groups (total sample size = max_n*k_groups)
#' @param counter_n number of times the function tries to find a possible parameter combination for the distribution. Default value is set to 100.
#' @param verbose `TRUE` or `FALSE.` Print out more information about the internal process of sampling the parameters
#' (the internal counter that was reached, some additional hints and the drawn parameters for the Gaussian Mixture distributions.)
#'
#' @return returns a data.frame with the columns y (observations) and x (factor_A).
#'
#' @export
#'
#' @example inst/examples/draw_sample_mixture.R
draw_sample_mixture <- function(
k_groups,
f,
max_n,
counter_n = 100,
verbose = FALSE
) {
gamma <- 0.5
sd <- rep(1, k_groups)
sample_ratio <- rep(1, k_groups)
if (!is.numeric(k_groups)) {stop("argument k_groups must be numeric.")}
if (k_groups <= 1) {stop("argument k_groups must be larger than 1.")}
if (!is.numeric(f)) {stop("argument f must be numeric.")}
if (f < 0) {stop("argument f must be equal to or larger than 0.")}
if (!is.numeric(max_n)) {stop("argument max_n must be numeric.")}
if (max_n <= 0) {stop("argument max_n must be larger than 1.")}
if (!is.numeric(counter_n)) {stop("argument counter_n must be numeric.")}
if (counter_n < 1) {stop("argument counter_n must be equal to or larger than 1.")}
sample_sizes_max <- max_n * sample_ratio
total_sample_size <- sum(max_n*sample_ratio)
if (total_sample_size <= k_groups) {stop("total sample size must be greater than k_groups. Try to increase max_n argument.")}
seq_step_increase <- sum(sample_ratio)
seq_steps <- seq((seq_step_increase), total_sample_size, seq_step_increase)
# seq_steps <- seq_steps[2:length(seq_steps)]
test <- TRUE
counter <- 0
while (any(test) & counter < counter_n) {
raw_means <- rnorm(k_groups)
pop_means = (raw_means - mean(raw_means)) / sd(raw_means) * sqrt(k_groups / (k_groups - 1)) * f
# draw factor randomly & calulate means
factor <- runif(k_groups, min = 0.8, max = 1)
mean1 = pop_means*2*factor
mean2 = pop_means*2*(1-factor)
test <- (sd^2 - 0.25*(mean1 - mean2)^2) <= 0
counter <- counter + 1
}
if (counter == counter_n) {message("Maximum of counter_n was reached.")}
if (verbose == TRUE) {message(paste(c("Internal counter reached = ", counter)))}
if (verbose == TRUE & f > 1.5) {message(paste(c("f values larger than 1.5 should not be used in this function.")))}
if (any(test)) {stop("Internal calulation failed: try to increase counter_n or decrease f and set verbose to TRUE.")}
# calculate variance
factor <- runif(k_groups, min = 0.8, max = 1)
variance_12 = 2*sd^2-0.5*(mean1-mean2)^2
sigma1 = sqrt(variance_12 * factor)
sigma2 = sqrt(variance_12 * (1-factor))
if (verbose) {print(glue::glue("{rep('\n',k_groups)}group{1:k_groups}:\nmean1 = {mean1}, mean2 = {mean2},\nsigma1 = {sigma1}, sigma2 = {sigma2}"))}
position <- 1
data <- matrix(double(total_sample_size*2), ncol = 2)
for (steps in seq_steps) {
y <- 1:k_groups %>%
# purrr::map(~EnvStats::rnormMix(n = sample_ratio[.x], mean1 = mean1[.x], sd1 = sd[.x], mean2 = mean2[.x], sd2 = sd[.x], p.mix = gamma)) %>%
purrr::map(~rnorm_mix(n = sample_ratio[.x], mu1 = mean1[.x], sig1 = sigma1[.x], mu2 = mean2[.x], sig2 = sigma2[.x], gamma = gamma)) %>%
unlist()
indice <- 1:100
x <- 1:k_groups %>%
purrr::map(~rep(indice[.x], sample_ratio[.x])) %>%
unlist()
data[position:steps, 1] <- y
data[position:steps, 2] <- x
position <- position + seq_step_increase
}
data.frame(y = data[, 1], x = as.factor(data[, 2]))
}
# data <- draw_sample_mixture(
# k_groups = 2,
# f = 0.4,
# max_n = 10,
# counter_n = 100,
# verbose = TRUE
# )
# k_groups = 4
# f = 0.4
# # f = 1.51
# max_n = 50
# counter_n = 100
# verbose = TRUE
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.