Nothing
#' @title AB_permutation_distancing_test
#'
#' @description Performs a randomisation test for two phases (A and B) that corrects for dependency.
#' The correcting is done through stepwise resampling the time series while varying
#' the distance between observations. The required distance 0,1,2,3.. is determined based
#' on repeated dependency testing while stepwise increasing the distance.
#' The input x and y values should be equidistant (with NA's included) using insert_NA_and_try_to_shift.
#' The distance per cycles = k-1.
#' k_max should be max 25% of (non missing) observations.
#' If de_A_trend=TRUE, phase A will be first de_A_trended.
#' If de_B_trend=TRUE, phase B will be first de_B_trended.
#' If detrend_x_position = "first" : take predicted value for first valid observation.
#' If detrend_x_position = "center": take predicted value for center observation.
#' If detrend_x_position = "last" : take predicted value for last valid observation.
#' The p-value returned corresponds with the lowest Ljung-Box test (minimal) p-value found.
#' The statistic returned correspond with median chi-square k with p_box larger than alpha_p_box_test
#' or statistic_box smaller than max_statistic_p_box_test (i.e., not dependent).
#'
#' @param x factor vector to indicate conditions or phases (e.g., "A" and "B")
#' @param y numerical vector with the observed y-values
#' @param test_statistic character how to compute the test statistic c("A-B", "B-A", "*") *=two-sided
#' @param test_statistic_function character compute and compare "mean" or "median" for A and B
#' @param reps_max numerical maximum number of permutation replications (the theoretical number= n!)
#' @param k_max numerical maximum k value
#' @param alpha_p_box_test numerical see above
#' @param max_statistic_p_box_test numerical see above
#' @param no_duplicates boolean do a permutation test without duplicates (makes it much slower)
#' @param remove_NA_series_above_k boolean first clean the data by skipping repeated NA's
#' @param de_A_trend boolean de-trend A first (optional)
#' @param detrend_A_position character c("first", "center", "last"), see detrend_A
#' @param de_B_trend boolean de-trend B first (optional)
#' @param detrend_B_position character c("first", "center", "last"), see detrend_B
#' @param show_plot boolean show test plot of statistical test
#' @param show_plot_header character header of test plot
#'
#' @return List with the permutation distancing test results:
#' de_A_trend setting in call,
#' detrend_A_position in call,
#' de_B_trend setting in call,
#' detrend_B_position in call,
#' ar1 = vector of computed ar1 values per distancing step (0,1,2, etc),
#' p_box = vector of computed box-test p-values per distancing step,
#' statistic_box = vector of box-test statistics per distancing step,
#' observed_test_statistic = computed overall AB test statistic (before distancing),
#' effect_size_overall = computed overall effect size (before distancing),
#' p = vector of computed permutation test p-values per distancing step,
#' effect_size vector of computed permutation test effect-sizes per distancing step,
#' p_fitted = vector of lm-fitted line p-values through p_box,
#' k_max = k_max setting in call or computed based on the number of observations,
#' k_selected_based_on_Box_test = selected k values,
#' p_selected_based_on_Box_test = selected p-value,
#' effect_size_selected_based_on_Box_test = selected effect-size values.
#'
#' @examples
#' pdt::AB_permutation_distancing_test(
#' as.factor(c(rep("A",20), rep("B",20))),
#' c(rnorm(20), rnorm(20)+2),
#' test_statistic="B-A",
#' test_statistic_function="mean",
#' reps_max=1000,
#' k_max=NULL,
#' alpha_p_box_test=0.1,
#' max_statistic_p_box_test=2.7,
#' no_duplicates=FALSE,
#' remove_NA_series_above_k=TRUE,
#' de_A_trend=FALSE,
#' detrend_A_position="center",
#' de_B_trend=FALSE,
#' detrend_B_position="center",
#' show_plot=FALSE,
#' show_plot_header="")
#'
#' @export
AB_permutation_distancing_test <- function(x, y, test_statistic="*", test_statistic_function="mean", reps_max=2000,
k_max=NULL, alpha_p_box_test=0.1, max_statistic_p_box_test=2.7, no_duplicates=FALSE, remove_NA_series_above_k=TRUE,
de_A_trend=FALSE, detrend_A_position="center", de_B_trend=FALSE, detrend_B_position="center",
show_plot=FALSE, show_plot_header="") {
#if (de_A_trend && length(y[x==levels(x)[1]][!is.na(y[x==levels(x)[1]])]) < 20 ||
# length(y[x==levels(x)[2]][!is.na(y[x==levels(x)[2]])]) < 20) warning("de_A_trend produces unreliable results in case of small (< 20) A or B number of observations")
if (is.null(k_max)) { # auto_break when Ljung-Box test not significant
k_max <- trunc(length(y[!is.na(y)])/4)
auto_break <- TRUE
} else {
#test for too large setting of k_max; use minimum number of observations of at least 4 * k
if (length(y[!is.na(y)])/k_max < 4){
k_max <- trunc(length(y[!is.na(y)])/4)
warning(paste("reducing k_max to 25% of (non missing) observations: ", as.character(k_max), sep=''))
}
auto_break <- FALSE
}
#de_A_trend and de_B_trend if requested; do it separately
#plot(c(1:length(x)), y, type = "l", col="blue")
if (de_A_trend) y <- detrend_A(x, c(1:length(x)), y, detrend_A_position=detrend_A_position)$y_detrended
if (de_B_trend) y <- detrend_B(x, c(1:length(x)), y, detrend_B_position=detrend_B_position)$y_detrended
#graphics::lines(c(1:length(x)), y, type = "l", col="red")
test_results <- AB_permutation_test(x, y, test_statistic=test_statistic, test_statistic_function=test_statistic_function, reps_max=reps_max, no_duplicates=FALSE, show_plot=FALSE)
observed_test_statistic <- test_results$observed_test_statistic
effect_size_overall <- test_results$effect_size
if (is.na(observed_test_statistic)) {
ar1 <- NA
p_box <- NA
statistic_box <- NA
observed_test_statistic <- NA
effect_size_overall <- NA
p <- NA
effect_size <- NA
p_fitted <- NA
k_max <- NA
k_selected_based_on_Box_test <- NA
p_selected_based_on_Box_test <- NA
effect_size_selected_based_on_Box_test <- NA
} else {
indices <- 1:length(x) # indices based on equidistant x,y
p <- c()
ar1 <- c()
p_box <- c()
statistic_box <- c()
effect_size <- c()
for (k in 1:k_max) {
# k-1 is number of distancing NA's between observations; k=1 : no distancing
# remove NA-series of more than k NA's in y
# this will protect against inflation of p as a result of too many NA combinations during distancing
NA_reduced <- do_remove_NA_series_above_k(x, y, k-1, x_values=indices)
x_tmp <- NA_reduced$x
y_tmp <- NA_reduced$y
indices_tmp <- NA_reduced$x_values # these indices are based on original equidistant y
#print(k)
#print(y_tmp)
# n_k <- sample_size_overall/(k+1)
# for each k, create k (l=1 to k) new distancing time series by NA intermediate values,
# and then perform the random assignments
random_assignments <- c()
ar1_l <- c()
p_box_l <- c()
statistic_box_l <- c()
effect_size_l <- c()
for (l in 1:k) {
indices_distancing <- indices[seq(l, length(y), by=k)] # these indices are based on original equidistant y
indices_distancing_in_indices_tmp <- indices_distancing[indices_distancing %in% indices_tmp] # these indices are based on original equidistant y
#print(indices_distancing)
#print(indices_distancing_in_indices_tmp)
#method without remove_NA_series_above_k
if (!remove_NA_series_above_k) {
x_distancing <- x
y_distancing <- rep(NA, length(y))
y_distancing[indices_distancing] <- y[indices_distancing]
} else {
#method with remove_NA_series_above_k
x_distancing <- x_tmp
y_distancing <- rep(NA, length(y_tmp))
for (m in 1:length(indices_distancing_in_indices_tmp)) y_distancing[which(indices_tmp %in% indices_distancing_in_indices_tmp[m])] <- y[indices_distancing_in_indices_tmp[m]]
#onderstaande is sneller, maar gaat het ook goed?
#y_distancing <- rep(NA, length(y_tmp))
#y_distancing[indices_tmp %in% indices_distancing_in_indices_tmp] <- y[indices_distancing_in_indices_tmp]
}
#print(y_distancing)
plot_header <- paste("distancing k: ", as.character(k), " series l: ", as.character(l), sep='')
test_results_l <- AB_permutation_test(x_distancing, y_distancing, test_statistic=test_statistic, test_statistic_function=test_statistic_function, reps_max=reps_max, no_duplicates=no_duplicates,
show_plot=FALSE)
random_assignments <- c(random_assignments, test_results_l$random_assignments)
ar1_l <- c(ar1_l, stats::acf(y_distancing[!is.na(y_distancing)], lag.max=1, type="correlation", plot=FALSE, na.action=stats::na.pass, demean=TRUE)$acf[2])
p_box_l <- c(p_box_l, stats::Box.test(y_distancing[!is.na(y_distancing)], type = "Ljung-Box")$p.value)
statistic_box_l <- c(statistic_box_l, stats::Box.test(y_distancing[!is.na(y_distancing)], type = "Ljung-Box")$statistic)
effect_size_l <- c(effect_size_l, test_results_l$effect_size)
}
#print(ar1_l)
ar1 <- c(ar1, stats::median(ar1_l, na.rm=TRUE)) # median because ar1 values are skewed
p_box <- c(p_box, min(p_box_l, na.rm=TRUE)) #take smallest: one of them is significant
statistic_box <- c(statistic_box, stats::median(statistic_box_l, na.rm=TRUE)) #median as well
effect_size <- c(effect_size, mean(effect_size_l, na.rm=TRUE)) #mean
# correct length(random_assignments) by number of NA's
p_randomization_AB_k <- sum(random_assignments>observed_test_statistic, na.rm=TRUE) / length(random_assignments[!is.na(random_assignments)])
p <- c(p, p_randomization_AB_k)
if (auto_break && (p_box[length(p_box)]>alpha_p_box_test || statistic_box[length(statistic_box)]<max_statistic_p_box_test) ) break
}
p[is.nan(p)] <- NA
if (!all(is.na(p))) {
indices <- which(!is.na(p))
p <- p[indices]
ar1 <- ar1[indices]
p_box <- p_box[indices]
statistic_box <- statistic_box[indices]
effect_size <- effect_size[indices]
# get second order fitted regression line for p
k <- 1:length(p) # length of vectors with computed pdt values
df_tmp <- data.frame(k, ar1, p, effect_size)
p_fitted <- as.numeric(stats::lm(formula = p ~ k + I(k^2), data=df_tmp)$fitted.values)
effect_size_fitted <- as.numeric(stats::lm(formula = effect_size ~ k + I(k^2), data=df_tmp)$fitted.values)
#get first k with Box test not significant or with small chi-square
k_selected_based_on_Box_test <- which(p_box>alpha_p_box_test | statistic_box<max_statistic_p_box_test)[1]
} else {
p_fitted <- NA
effect_size_fitted <- NA
k_selected_based_on_Box_test <- NA
}
if (!is.na(k_selected_based_on_Box_test)) {
p_selected_based_on_Box_test <- p_fitted[k_selected_based_on_Box_test]
effect_size_selected_based_on_Box_test <- effect_size_fitted[k_selected_based_on_Box_test]
} else {
p_selected_based_on_Box_test <- NA
effect_size_selected_based_on_Box_test <- NA
}
if (show_plot) {
plot(k, ar1, type="l", col="blue", xlab="distancing k", ylab="ar1", main=show_plot_header)
graphics::lines(k_selected_based_on_Box_test, ar1[k_selected_based_on_Box_test], type="p", col="red")
plot(k, p_box, type="l", col="blue", xlab="distancing k", ylab="Ljung Box test p-value", main=show_plot_header)
graphics::lines(k_selected_based_on_Box_test, p_box[k_selected_based_on_Box_test], type="p", col="red")
plot(k, statistic_box, type="l", col="blue", xlab="distancing k", ylab="Ljung Box test statistic", main=show_plot_header)
graphics::lines(k_selected_based_on_Box_test, statistic_box[k_selected_based_on_Box_test], type="p", col="red")
plot(k, p, type="l", col="blue", xlab="distancing k", ylab="one-sided p-value", main=show_plot_header)
graphics::lines(k, p_fitted, type="l", col="green")
graphics::lines(k_selected_based_on_Box_test, p_selected_based_on_Box_test, type="p", col="red")
plot(k, effect_size, type="l", col="blue", xlab="distancing k", ylab="effect size", main=show_plot_header)
graphics::lines(k, effect_size_fitted, type="l", col="green")
graphics::lines(k_selected_based_on_Box_test, effect_size_selected_based_on_Box_test, type="p", col="red")
}
}
list(
de_A_trend = de_A_trend,
detrend_A_position = detrend_A_position,
de_B_trend = de_B_trend,
detrend_B_position = detrend_B_position,
ar1 = ar1,
p_box = p_box,
statistic_box = statistic_box,
observed_test_statistic = observed_test_statistic,
effect_size_overall = effect_size_overall,
p = p,
effect_size = effect_size,
p_fitted = p_fitted,
k_max = k_max,
k_selected_based_on_Box_test = k_selected_based_on_Box_test,
p_selected_based_on_Box_test = p_selected_based_on_Box_test,
effect_size_selected_based_on_Box_test = effect_size_selected_based_on_Box_test
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.