shuffled_ttest.version = "1.20";
#initial program for running the shuffling of t-tests
#Data should be organized with Var 1 in column 1 and Var 2 in column 2
#runs independant sample t-test or paired t test, a number of times, with the data being segmented by minimum number of participants needed to reach significance
#####Parameters--------------
#data_set should refer to a dataset available in the global envir
#shuffle - denotes how many times the data will be shuffled
#alpha - desired alpha value
#paired - optional TRUE/FALSE for Paired Sample t-Test, default is False
#Required packages
packages = c("tictoc");
#use this function to check if each package is on the local machine
#if a package is installed, it will be loaded
#if any are not, the missing package(s) will be installed and loaded
package.check <- lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE);
}
})
######End Packages
######Shuffled t-Test Function---------------------------
#'
#' This function runs iterative randomized replication t-tests on a dataset based on base-n value
#' @param g1 Group 1
#' @param g2 Group 2
#' @param shuffle number of times to replicate data, defaults to 50
#' @param alpha value of alpha, defaults to .05
#' @param paired Boolean to run a paired-sample t-test, defaults to FALSE
#' @param csvFileName Optional parameter to save results to .csv
#' @keywords shuffle, t-test
#' @export
#' @examples
#' shuffled_ttest(Group1, Group2, shuffle=500, alpha=.05)
#' shuffled_ttest(data$Var1, data$Var2, shuflled=10000, alpha=.01, paired=TRUE)
shuffled_ttest <- function(g1, g2, shuffle=50, alpha = 0.05, paired=FALSE, csvFileName){
######Variable setup-----------------------------------
#convert data to data.frame
data_set_orig <- as.data.frame(cbind(g1, g2), row.names = NULL, optional = FALSE,
cut.names = FALSE, col.names = names(c(g1,g2)), fix.empty.names = TRUE,
stringsAsFactors = default.stringsAsFactors());
shuffle <- shuffle;
tic("Run time") #start timer----
###Paired t-Test Check----
#Option for Paired-Sample t-test - Default is FALSE
if (paired == FALSE) {
paired_test <- FALSE;
t.method <- "Independant Sample t-Test";
}
else {
paired_test <- TRUE;
t.method <- "Paired Sample t-Test";}
###CSV Output Check----
#if csvFileName parameter is included, save the variable
if (!missing(csvFileName)) {
#Appends '.csv' and saves desired file name as variable csvFileName
csvFileName <- paste(csvFileName,".csv",sep="");
}
#Create statistical output data frame named "results", with 7 headers, for ind. sampled t-tests, clears old data with each new run
results <- data.frame("iteration" = 0, "sample" = 0,"range" = character(1), "base n" = 0, "t" = double(1),"df" = double(1),"p value" = double(1), stringsAsFactors = FALSE);
sum_ZeroVar <- 0; #counter for no variance comparisons
########End Variable Setup---------------------------------
#Warning for large shuffling amounts
if (shuffle > 50) {
print("Please wait...");
}
######Shuffling and replication ------------------------------
#Shuffles the data a number of times = to shuffle amount, running the replication tests for each iteration
for (i in 1:shuffle) {
cycle <- 1; #keep track of replications within shuffles
x<-1; #Used for lower bounds of current selection - resets on new iteration
#shuffles data set using 'sample()'
data_set <- data_set_orig[sample(1:nrow(data_set_orig)),];
#assigns shuffled data to global env
assign("shuffled_data", data_set, envir=.GlobalEnv);
#finds base n for each iteration
base_n <- find_base_n(data_set, alpha, paired_test);
#If there are no significant findings, add NA results and continue
if (base_n == 0) {
results[nrow(results) + 1,] <- list(i,cycle,paste(1,':',nrow(data_set), sep=""), NA, NA, NA, NA);
next;
}
#If the test shows 0 variance, increase ZeroVar counter, ignore the findings: redo current iteration
if (base_n == -1) {
sum_ZeroVar = sum_ZeroVar + 1;
i = i-1;
next;
} else {
#y=set to min number of participants needed for each shuffle
y <- base_n;
}
#Repeats while the current selection of participants is less than the max number of participants - does not run less than base_n number of participants, so there may be missing data at the end
#TODO - Add option for include/exclude uneven N
while (y <= nrow(data_set)) {
#Ind sample t-test---------------------
if (paired_test == FALSE) {
#Check that variance is greater than 0 in current selections
#Use apply to get variance values of rows x:y, in columns 1 and 2
#Use all to compare variance to 0
if (all(apply(data_set[x:y,], 2, var) != 0 )){
#t test on Group 1 and Group 2 using current selection of participants x through y
ttestresults <- t.test(data_set[x:y,1],data_set[x:y,2]);
#if the test is signficant, increase count by 1
if (ttestresults$p.value <= alpha) {
}
#add statistical output to new row in results data.frame, rounding down the decimals
#Organized as [iteration, cycle number, range, t-test statistic, degrees of freedom, p value].
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),base_n,round(ttestresults$statistic,3), round(ttestresults$parameter,4), round(ttestresults$p.value,5));
}
#If there is an error, NA for test statistics
else {
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""), base_n, NA, NA, NA);
sum_ZeroVar <- sum_ZeroVar + 1;
}
} #end if
#Paired sample t-test--------------------
if (paired_test == TRUE) {
#Check that variance is greater than 0 in current selections
#Use apply to get variance values within-subject and between subject - 1=rows, 2=cols
#Use all to compare variance to 0
if (#all(apply(data_set[x:y,], 1, var) != 0) &
#all(apply(data_set[x:y,1], 2, var) != 0) &
all(apply(data_set[x:y,], 2, var) != 0)){
#debug variable values when errors occur
#assign("x",x, envir=.GlobalEnv);
#assign("y",y, envir=.GlobalEnv);
assign("base n",base_n, envir=.GlobalEnv);
#Catch data is constant errors
tryCatch({
#t test on Group 1 and Group 2 using current selection of participants x through y
ttestresults <- t.test(data_set[x:y,1],data_set[x:y,2],paired=TRUE);
},
error = function(err) {
sum_ZeroVar <- sum_ZeroVar + 1;
ttestresults$p.value <- 9;
})
#if the test is signficant, increase count by 1
if (is.null(ttestresults$p.value) == FALSE &
ttestresults$p.value <= alpha) {
}
#add statistical output to new row in results data.frame, rounding down the decimals
#Organized as [iteration, cycle number, range, t-test statistic, degrees of freedom, p value].
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),base_n,round(ttestresults$statistic,3), round(ttestresults$parameter,4), round(ttestresults$p.value,5));
}
else {
#Skips t-test and report NA findings when variance = 0
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),base_n, NA, NA, NA);
sum_ZeroVar <- sum_ZeroVar + 1;
}
} #end if
#Selects new range of participants of length base_n and increase cycle count
x<-x+base_n;
y<-y+base_n;
cycle <- cycle + 1;
}#end while loop
end_time=Sys.time(); #End Timer
}#End for loop
#######End shuffling and replication---------------------
#######Export--------------------------------------------
#Saves results to custom external file if option is include in parameters, if none included in argument, defaults output to 'results.csv'
if (missing(csvFileName)){
assign('results',results, envir=.GlobalEnv);
write.csv(results, file="results.csv", row.names=TRUE);
}
#if there IS a name included
else {
#writes to a csv file using the variable output_fname
write.csv(results, file=csvFileName, row.names=TRUE);
}
#######End Export ------------------------------------------
#######Output-----------------------------------------------
#shows the results in console if there are less than 50 rows
if (nrow(results) < 50) {
show(results);
}
toc(); #total time taken
sum_sig_p <- NROW(results[results$p.value <= alpha & !is.na(results$p.value),]);
#Summary of results - cat() allows use of \n for line breaks
cat("\n*",t.method,
"*\nGroup 1: ", names(g1), " | Group 2: ", names(g2),
"\n\nSignificant findings: \n(p < ", alpha,"): ", sum_sig_p, "/", nrow(results), " (", round((sum_sig_p/nrow(results))*100,2),"%) | Avg: ", round(mean(results[,7], na.rm = TRUE),4),
"\nZero Variances: ", sum_ZeroVar, " | Mean Base n: ", round(mean(results[,4], na.rm=TRUE), 2), sep="");
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.