shuffled_ttest.Version = 1.06;
#initial program for running the shuffling of t-tests
#Data should be organized with Var 1 in column 1 and Var 2 in column 2
#runs independant sample t-test or paired t test, a number of times, with the data being segmented by minimum number of participants needed to reach significance
#####Parameters--------------
#data_set should refer to a dataset available in the global envir
#shuffle_amount - denotes how many times the data will be shuffled
#alpha - desired alpha value
#paired - optional TRUE/FALSE for Paired Sample t-Test, default is False
#csvFileName, if included allows the results to be added to a separate data.frame - Must be in " "
#-------------------------------
#Required packages
packages = c("tictoc");
#use this function to check if each package is on the local machine
#if a package is installed, it will be loaded
#if any are not, the missing package(s) will be installed and loaded
package.check <- lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE);
}
})
######End Packages
######Base n--------------------------------------------------
find_base_n <- function(data_set, alpha, method) {
#determine max participants
maxp <- nrow(data_set);
alpha <- alpha;
test_method <- method;
#Convert inported dataset to a data frame
data_set <- as.data.frame(
data_set,
row.names = NULL,
optional = FALSE,
cut.names = FALSE,
col.names = names(data_set),
fix.empty.names = TRUE,
stringsAsFactors = FALSE);
#start analysis at 2 participants
xrow <- 2;
#initialize pvalues variable as vector
pvalues <- vector(mode="double",length=maxp);
#t-Test on incrimentally increasing participants
#i.e. 1-2, 1-3, 1-4, 1-5, etc..
for(k in xrow:maxp) {
#Makes sure there is variance prior to running 2-sample t-test
if (test_method == FALSE & var(data_set[1:xrow,1]) != 0 & var(data_set[1:xrow,2]) != 0 ) {
#Saves iterative p values in a vector
pvalues[xrow] <- t.test(data_set[1:xrow,1],data_set[1:xrow,2])$p.value;
}
#Makes sure there is variance prior to running paired sample t-test
if (test_method == TRUE & var(data_set[1:xrow,1]) != 0 & var(data_set[1:xrow,2]) != 0 ) {
#var(c(data_set[1,1],data_set[1,2])) != 0 & var(c(data_set[xrow,1], data_set[xrow,2])) != 0 ){
#Saves iterative p values in a vector
pvalues[xrow] <- t.test(data_set[1:xrow,1],data_set[1:xrow,2], paired = test_method)$p.value;
}
#continue until you reach the end of the dataset
if (xrow <= nrow(data_set)) {
xrow <- xrow + 1;
}
}
i=2; #starts with atleast 2 participants
#While-Loop through pvalues vector to find first significant p value
while(i <= length(pvalues)) {
###Minimum base_n allowed
min_n <- 10;
#Return number of participants needed for significance with a minimum number of participants examined
if (pvalues[i] <= alpha & i >= min_n) {
return(i);
}
#Continues loop if not in the current row
else {
i <- i + 1;
}
}
#if no significant p values are found, return 0.
return(0);
}
######Shuffled t-Test Function
shuffled_ttest <- function(data_set, shuffle_amount, alpha, paired=FALSE, csvFileName){
######Variable setup-----------------------------------
#convert data to data.frame
data_set <- as.data.frame(data_set, row.names = NULL, optional = FALSE,
cut.names = FALSE, col.names = names(data_set), fix.empty.names = TRUE,
stringsAsFactors = default.stringsAsFactors());
shuffle_amount <- shuffle_amount;
tic("Run time") #start timer
#if alpha parameter is included, save the variable
if (!missing(alpha)) {
#Set alpha value from input
alpha <- alpha;
}
else {
alpha <- .05;
}
#Option for Paired-Sample t-test - Default is FALSE
if (paired == FALSE) {
test_method <- FALSE;
t.method <- "Independant Sample t-Test"
}
else {
test_method <- TRUE;
t.method <- "Paired Sample t-Test"}
#if csvFileName parameter is included, save the variable
if (!missing(csvFileName)) {
#Appends '.csv' and saves desired file name as variable csvFileName
csvFileName <- paste(csvFileName,".csv",sep="");
}
#Create statistical output data frame named "results", with 7 headers, for ind. sampled t-tests, clears old data with each new run
results <- data.frame("iteration" = numeric(0), "sample" = numeric(0),"range" = character(0), "base n" = numeric(0), "t" = double(0),"df" = double(0),"p value" = double(0), stringsAsFactors = FALSE);
group1_col <- 1; #group 1 column = variable 1, change as needed
group2_col <- 2; #group 2 column = variable 2, change as needed
sum_sig_p <- 0; #used to keep track of number of significant findings
sum_NA <- 0; #counter for no variance comparisons
#Warning for large shuffling amounts
if (shuffle_amount > 100) {
print("Please wait...");
}
######Shuffling and replication ------------------------------
#Shuffles the data a number of times = to shuffle amount, runing the replication tests for each iteration
for (i in 1:shuffle_amount) {
cycle <- 1; #keep track of replications within shuffles
x<-1; #resets x to 1 when started a new shuffled dataset
#shuffles data set using 'sample()'
data_set <- data_set[sample(1:nrow(data_set)),];
#finds base n for each iteration
base_n <- find_base_n(data_set, alpha, test_method);
#y=set to min number of participants needed for each shuffle
y <- base_n;
#Error Check to make sure there are significant findings
if (base_n == 0) {
#If base n is 0, end program with error
stop("No significant p values found!");
}
#Repeats while the current selection of participants is less than the max number of participants - does not run less than base_n number of participants, so there may be missing data at the end
#TODO - Add option for include/exclude uneven N
while (y <= nrow(data_set)) {
#Section for two sample t-test
if (test_method == FALSE) {
#Check that variance is greater than 0 in current selection
if (var(data_set[x:y,1]) != 0 &
var(data_set[x:y,2]) != 0 ){
#t test on Group 1 and Group 2 using current selection of participants x through y
ttestresults <- t.test(data_set[x:y,group1_col],data_set[x:y,group2_col],
paired=test_method,);
#if the test is signficant, increase count by 1
if (ttestresults$p.value < alpha) {
sum_sig_p <- sum_sig_p + 1;
}
#add statistical output to new row in results data.frame, rounding down the decimals
#Organized as [iteration, cycle number, range, t-test statistic, degrees of freedom, p value].
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),base_n,round(ttestresults$statistic,3), round(ttestresults$parameter,4), round(ttestresults$p.value,5));
}
#If there is no variance, report NA for test statistics
else {
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),base_n,"NA", "NA", "NA");
sum_NA <- sum_NA + 1;
}
}
#Section for paired sample t-test
if (test_method == TRUE) {
#Check that variance is greater than 0 in current selection
if (var(data_set[x:y,1]) != 0 &
var(data_set[x:y,2]) != 0 ){
z <- x;
while (z <= y) {
row_var <- var(c(data_set[z,1],data_set[z,2]));
if (row_var > 0) {
z <- z + 1;
next;
}
if (row_var == 0) {
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),base_n,"NA", "NA", "NA");
sum_NA <- sum_NA + 1;
break;
}
break;
}
#t test on Group 1 and Group 2 using current selection of participants x through y
ttestresults <- t.test(data_set[x:y,group1_col],data_set[x:y,group2_col],
paired=test_method,);
#if the test is signficant, increase count by 1
if (ttestresults$p.value < alpha) {
sum_sig_p <- sum_sig_p + 1;
}
#add statistical output to new row in results data.frame, rounding down the decimals
#Organized as [iteration, cycle number, range, t-test statistic, degrees of freedom, p value].
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),base_n,round(ttestresults$statistic,3), round(ttestresults$parameter,4), round(ttestresults$p.value,5));
}
}
#Selects new range of participants of length base_n and increase cycle count
x<-x+base_n;
y<-y+base_n;
cycle <- cycle + 1;
}
end_time=Sys.time();
}
#######End shuffling and replication---------------------
#######Export--------------------------------------------
#Saves results to custom external file if option is include in parameters, if none included in argument, defaults output to 'results.csv'
if (missing(csvFileName)){
assign('results',results, envir=.GlobalEnv);
write.csv(results, file="results.csv", row.names=TRUE);
}
#if there IS a name included
else {
#writes to a csv file using the variable output_fname
write.csv(results, file=csvFileName, row.names=TRUE);
}
#######End Export ------------------------------------------
#######Output-----------------------------------------------
#shows the results in console if there are less than 50 rows
if (nrow(results) < 50) {
show(results);
}
#show total time, relative number of successful replications and 0 variance comparison count
toc();
cat(t.method,
"\nSignificant findings (p < ",alpha,"): ", sum_sig_p, "/", nrow(results),
"\nZero Variances: ",sum_NA, " | Mean Base n: ", mean(results[,4]),sep="");
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.