Nothing
#' @title AB_permutation_test
#'
#' @description Performs a regular permutations test for two conditions or phases (A and B).
#'
#' @param x factor vector to indicate conditions or phases (e.g., "A" and "B")
#' @param y numerical vector with the observed y-values
#' @param test_statistic character how to compute the test statistic c("A-B", "B-A", "*") *=two-sided
#' @param test_statistic_function character compute and compare "mean" or "median" for A and B
#' @param reps_max numerical maximum number of permutation replications (the theoretical number= n!)
#' @param no_duplicates boolean do a permutation test without duplicates (makes it much slower)
#' @param show_plot boolean show test plot of statistical test
#' @param show_plot_header character header of test plot
#'
#' @return List with the permutation test results:
#' observed_test_statistic = computed test statistic,
#' effect_size = computed effect size (similar to Cohen's d),
#' random_assignments,
#' p_randomization_AB = p value randomization AB test,
#' one_sided_p = one-sided p-value in case of B-A or A-B.
#'
#' @examples
#' pdt::AB_permutation_test(
#' as.factor(c(rep("A",20), rep("B",20))),
#' c(rnorm(20), rnorm(20)+2),
#' test_statistic="B-A",
#' test_statistic_function="mean",
#' reps_max=1000,
#' no_duplicates=FALSE,
#' show_plot=FALSE,
#' show_plot_header="")
#'
#' @export
AB_permutation_test <- function(x, y, test_statistic="*", test_statistic_function="mean", reps_max=2000, no_duplicates=FALSE, show_plot=FALSE, show_plot_header="") {
# do a permutation test without duplicates
# theoretical number of permutations = n! = factorial(length(y))
#return NA below this threshold
MIN_OBSERVATIONS <- 2
MAX_COUNT <- reps_max*100
rperm <- function(m, size, max_count=1000) { # Obtain m unique permutations of 1:size
# Returns a `size` by `m` matrix; each column is a permutation of 1:size.
# Function to add a new permutation of 1:size as a new column to the permutations matrix
newperm <- function(permutations, i, size, max_count=1000) {
count <- 0 # Protects against infinite loops
p <- sample(1:size)
is.new <- FALSE
# Generate another permutation column and check against previous ones.
while (count < max_count && !is.new) {
for (j in 1:i) {
if (!all(is.na(permutations[, j])) && all(permutations[, j] == p)) {
is.new <- FALSE
#message("not new")
p <- sample(1:size)
break
} else {
count <- count+1
}
}
if (j==i) {
is.new <- TRUE
permutations[, i] <- p
}
}
if (count>=max_count) warning("maxcount reached in generating permutations without duplicates")
permutations
}
# Obtain m unique permutations.
permutations <- matrix(nrow=size, ncol=m) #start with empty matrix filled with NA
for (i in 1:m) permutations <- newperm(permutations, i, size, max_count=max_count)
permutations
}
if (length(levels(x)) > 2) stop("More than two levels found, only two allowed")
if (!all(x == x[order(x)])) stop("No AB order found, only AB designs allowed")
A <- y[x==levels(x)[1]]
B <- y[x==levels(x)[2]]
sample_size_overall <- length(x)
if (all(is.na(A)) || all(is.na(B)) || length(A[!is.na(A)])<MIN_OBSERVATIONS || length(B[!is.na(B)])<MIN_OBSERVATIONS ||
(stats::sd(A, na.rm=TRUE)+stats::sd(B, na.rm=TRUE))==0) {
observed_test_statistic <- NA
effect_size <- NA
random_assignments <- c()
p_randomization_AB <- NA
} else {
#observed_test_statistic_sign is used for unknown test_statistic
if (test_statistic_function=="median") {
if (test_statistic=="B-A") {
observed_test_statistic <- stats::median(B, na.rm=TRUE)-stats::median(A, na.rm=TRUE)
} else if (test_statistic=="A-B") {
observed_test_statistic <- stats::median(A, na.rm=TRUE)-stats::median(B, na.rm=TRUE)
} else {
observed_test_statistic <- stats::median(A, na.rm=TRUE)-stats::median(B, na.rm=TRUE)
if (observed_test_statistic < 0) observed_test_statistic_sign <- -1 else observed_test_statistic_sign <- 1
observed_test_statistic <- observed_test_statistic_sign*observed_test_statistic
}
} else {
if (test_statistic=="B-A") {
observed_test_statistic <- mean(B, na.rm=TRUE)-mean(A, na.rm=TRUE)
} else if (test_statistic=="A-B") {
observed_test_statistic <- mean(A, na.rm=TRUE)-mean(B, na.rm=TRUE)
} else {
observed_test_statistic <- mean(A, na.rm=TRUE)-mean(B, na.rm=TRUE)
if (observed_test_statistic < 0) observed_test_statistic_sign <- -1 else observed_test_statistic_sign <- 1
observed_test_statistic <- observed_test_statistic_sign*observed_test_statistic
}
}
sd_pooled <- (stats::sd(A, na.rm=TRUE)+stats::sd(B, na.rm=TRUE))/2
effect_size <- observed_test_statistic/sd_pooled
if (factorial(sample_size_overall) < reps_max) reps <- factorial(sample_size_overall) else reps <- reps_max
#set.seed(seed = 14412)
if (no_duplicates) {
perm_matrix <- rperm(reps, size=sample_size_overall, max_count=MAX_COUNT) # Obtain m unique permutations; each column is a permutation of 1:size
random_assignments <- c()
for (i in 1:ncol(perm_matrix)) {
perm_indices_i <- perm_matrix[,i]
perm_indices_i_A <- perm_indices_i[1:length(A)]
perm_indices_i_B <- perm_indices_i[(length(A)+1):(length(A)+length(B))]
if (test_statistic_function=="median") {
if (test_statistic=="B-A") {
random_assignments_i <- stats::median(y[perm_indices_i_B], na.rm=TRUE)-stats::median(y[perm_indices_i_A], na.rm=TRUE)
} else if (test_statistic=="A-B") {
random_assignments_i <- stats::median(y[perm_indices_i_A], na.rm=TRUE)-stats::median(y[perm_indices_i_B], na.rm=TRUE)
} else {
random_assignments_i <- observed_test_statistic_sign*(stats::median(y[perm_indices_i_A], na.rm=TRUE)-stats::median(y[perm_indices_i_B], na.rm=TRUE))
}
} else {
if (test_statistic=="B-A") {
random_assignments_i <- mean(y[perm_indices_i_B], na.rm=TRUE)-mean(y[perm_indices_i_A], na.rm=TRUE)
} else if (test_statistic=="A-B") {
random_assignments_i <- mean(y[perm_indices_i_A], na.rm=TRUE)-mean(y[perm_indices_i_B], na.rm=TRUE)
} else {
random_assignments_i <- observed_test_statistic_sign*(mean(y[perm_indices_i_A], na.rm=TRUE)-mean(y[perm_indices_i_B], na.rm=TRUE))
}
}
random_assignments <- c(random_assignments, random_assignments_i)
}
} else {
random_assignments <- numeric(reps)
for (i in 1:reps) {
temp <- sample(y)
if (test_statistic_function=="median") {
if (test_statistic=="B-A") {
random_assignments[i] <- stats::median(temp[(length(A)+1):length(y)], na.rm=TRUE)-stats::median(temp[1:length(A)], na.rm=TRUE)
} else if (test_statistic=="A-B") {
random_assignments[i] <- stats::median(temp[1:length(A)], na.rm=TRUE)-stats::median(temp[(length(A)+1):length(y)], na.rm=TRUE)
} else {
random_assignments[i] <- observed_test_statistic_sign*(stats::median(temp[1:length(A)], na.rm=TRUE)-stats::median(temp[(length(A)+1):length(y)], na.rm=TRUE))
}
} else {
if (test_statistic=="B-A") {
random_assignments[i] <- mean(temp[(length(A)+1):length(y)], na.rm=TRUE)-mean(temp[1:length(A)], na.rm=TRUE)
} else if (test_statistic=="A-B") {
random_assignments[i] <- mean(temp[1:length(A)], na.rm=TRUE)-mean(temp[(length(A)+1):length(y)], na.rm=TRUE)
} else
random_assignments[i] <- observed_test_statistic_sign*(mean(temp[1:length(A)], na.rm=TRUE)-mean(temp[(length(A)+1):length(y)], na.rm=TRUE))
}
}
}
random_assignments[is.na(random_assignments)] <- NA
if (show_plot) {
graphics::hist(random_assignments, main=show_plot_header)
graphics::abline(v=observed_test_statistic, col="red")
}
# correct reps by number of NA's
p_randomization_AB <- sum(random_assignments>observed_test_statistic, na.rm=TRUE)/(reps-length(which(is.na(random_assignments))))
}
one_sided_p <- NA
if (test_statistic=="B-A" || test_statistic=="A-B") one_sided_p <- p_randomization_AB
list(
observed_test_statistic = observed_test_statistic,
effect_size = effect_size,
random_assignments = random_assignments,
p_randomization_AB = p_randomization_AB,
one_sided_p = one_sided_p
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.