pwr_ttest.version = "0.2.8";
#Shuffled analysis based on Power
#runs independant sample t-test or paired t test, a number of times, with the data being segmented by
# minimum number of participants needed to reach significant power
#####Parameters--------------
#x Group 1
#y Group 2
#shuffle - denotes how many times the data will be shuffled
#alpha - desired alpha value, defaults = 0.05
#power - desired power level
#paired - optional TRUE/FALSE for Paired Sample t-Test, default is False
#Required packages
packages = c("tictoc", "pwr", "effsize");
#use this function to check if each package is on the local machine
#if a package is installed, it will be loaded
#if any are not, the missing package(s) will be installed and loaded
package.check <- lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE);
}
})
######End Packages
###### pwr.ttest Function---------------------------
#'
#' This function
#' @param g1 Group 1
#' @param g2 Group 2
#' @param shuffle number of times to replicate data, defaults to 50
#' @param alpha value of alpha, defaults to .05
#' @param power value of power criteria, defaults to .85
#' @param paired Boolean to run a paired-sample t-test, defaults to FALSE
#' @param csvFileName Optional parameter to save results to .csv
#' @keywords shuffle, t-test, power
#' @export
#' @examples
#' shuffled_ttest(g1, g2, 500, alpha=.05, power=.9)
#' shuffled_ttest(group1, group2, 10000, alpha=.01, paired=TRUE, paired_test1)
pwr.ttest <- function(g1, g2, shuffle=50, alpha=.05, power=0.85, paired=FALSE, csvFileName){
######Variable setup-----------------------------------
#convert g1 and g2 to data.frame
data_set <- as.data.frame(cbind(g1,g2), row.names = NULL, optional = FALSE,
cut.names = FALSE, col.names = c("group1", "group2"), fix.empty.names = TRUE,
stringsAsFactors = default.stringsAsFactors());
shuffle <- shuffle;
tic("Run time") #start timer----
###Paired t-Test Check----
#Option for Paired-Sample t-test - Default is FALSE
if (paired == FALSE) {
paired_test <- FALSE;
t.method <- "Independant Sample t-Test";
}
else {
paired_test <- TRUE;
t.method <- "Paired Sample t-Test";}
###CSV Output Check----
#if csvFileName parameter is included, save the variable
if (!missing(csvFileName)) {
#Appends '.csv' and saves desired file name as variable csvFileName
csvFileName <- paste(csvFileName,".csv",sep="");
}
#Create statistical output data frame named "results", with 8 headers, for t-test, clears old data with each new run
results <- data.frame("iteration" = numeric(0), "sample" = numeric(0),"range" = character(0), "base n" = numeric(0),
"t" = double(0), "df" = double(0), "power" = double(0), "p value" = double(0), stringsAsFactors = FALSE);
sum_ZeroVar <- 0; #counter for no variance comparisons
########End Variable Setup---------------------------------
# Warning for large shuffling amounts
if (shuffle > 50) {
print("Please wait...");
}
###### Shuffling and replication ------------------------------
#Shuffles the data a number of times = to shuffle amount, running the replication tests for each iteration
for (i in 1:shuffle) {
cycle <- 1; #keep track of replications within shuffles
x<-1; #Used for lower bounds of current selection - resets on new iteration
#shuffles data set using 'sample()'
data_set <- data_set[sample(1:nrow(data_set)),];
#assigns shuffled data to global env for debugging
assign("shuffled_data",data_set, envir=.GlobalEnv);
#finds base n for each iteration
base_n <- pwr.base_n(data_set[,1], data_set[,2], alpha, power, paired_test);
### Handle criteria not met and 0 variance----------------
#If there are no significant findings, stop add NA results and continue
if (base_n$participants == 0) {
results[nrow(results) + 1,] <- list(i,cycle,paste(1,':',nrow(data_set), sep=""), NA, NA, NA, NA, NA);
next;
}
#If the test shows 0 variance, increase NA counter, ignore the findings: redo current iteration
if (base_n$participants == -1) {
sum_ZeroVar = sum_ZeroVar + 1;
i = i-1;
next;
} else {
#y=set to min number of participants needed for each shuffle
y <- base_n$participants;
}
#Repeats while the current selection of participants is less than the max number of participants -
#does not run less than base_n number of participants, so there may be missing data at the end
#TODO - Add option for include/exclude uneven N
while (y <= nrow(data_set)) {
#### Ind sample t-test---------------------
if (paired_test == FALSE) {
#Check that variance is greater than 0 in current selections
#Use apply to get variance values of rows x:y, in columns 1 and 2
#Use all to compare variance to 0
if (all(apply(data_set[x:y,], 2, var) != 0 )){
#t test on Group 1 and Group 2 using current selection of participants x through y
ttestresults <- t.test(data_set[x:y,1],data_set[x:y,2]);
#calculate effect size
d_value <- cohen.d(data_set[x:y,1],data_set[x:y,2])$estimate;
#calculate power
pwr_value <- pwr.t.test(n = base_n$participants, d_value, sig.level = alpha, power=NULL, type="two.sample")$power;
#add statistical output to new row in results data.frame, rounding down the decimals
#Organized as [iteration, cycle number, range, t-test statistic, degrees of freedom, p value].
results[nrow(results) + 1,] <- list(i, cycle,paste(x,':',y, sep=""), base_n$participants, round(ttestresults$statistic,5),
round(ttestresults$parameter,5), round(pwr_value,5), round(ttestresults$p.value,5));
}
#If there is 0 variance, increase NA count and retry
else {
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""), NA, NA, NA, NA, NA);
sum_ZeroVar <- sum_ZeroVar + 1;
}
} #end if
#### Paired sample t-test--------------------
if (paired_test == TRUE) {
#Check that variance is greater than 0 in current selections
#Use apply to get variance values within-subject and between subject - 1=rows, 2=cols
#Use all to compare variance to 0
if (all(apply(data_set[x:y,], 2, var) != 0)){
##debug variable values when errors occur
#assign("x",x, envir=.GlobalEnv);
#assign("y",y, envir=.GlobalEnv);
#assign("base-n",base_n$participants, envir=.GlobalEnv);
#Catch data is constant errors
tryCatch({
#t test on Group 1 and Group 2 using current selection of participants x through y
ttestresults <- t.test(data_set[x:y,1], data_set[x:y,2], paired=TRUE);
},
error = function(err) {
sum_ZeroVar <- sum_ZeroVar + 1;
ttestresults$p.value <- 9;
})
#calculate effect size
d_value <- cohen.d(data_set[x:y,1], data_set[x:y,2])$estimate;
#calculate power
pwr_value <- pwr.t.test(n = base_n$participants, d_value, sig.level=alpha, power=NULL, type="paired")$power;
#add statistical output to new row in results data.frame, rounding down the decimals
#Organized as [iteration, cycle number, range, t-test statistic, degrees of freedom, power, p value].
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""), base_n$participants, round(ttestresults$statistic,3),
round(ttestresults$parameter,4), round(pwr_value,4), round(ttestresults$p.value,5));
}
else {
#Skips t-test and report NA findings when variance = 0
results[nrow(results) + 1,] <- list(i,cycle,paste(x,':',y, sep=""),NA, NA, NA, NA, NA);
sum_ZeroVar <- sum_ZeroVar + 1;
}
} #end if
#Selects new range of participants of length base_n and increase cycle count
x<-x+base_n$participants;
y<-y+base_n$participants;
cycle <- cycle + 1;
}#end while loop
end_time=Sys.time(); #End Timer
}#End for loop
#######End shuffling and replication
####### Export to CSV--------------------------------------------
#Saves results to custom external file if option is include in parameters, if none included in argument, defaults output to 'results.csv'
if (missing(csvFileName)){
assign('results',results, envir=.GlobalEnv);
write.csv(results, file="results.csv", row.names=TRUE);
}
#if there IS a name included
else {
#writes to a csv file using the variable output_fname
write.csv(results, file=csvFileName, row.names=TRUE);
}
####### End Export
####### Output Results-----------------------------------------------
#shows the results in console if there are less than 50 rows
if (nrow(results) < 50) {
show(results);
}
toc(); #total time taken
sum_sig_p <- NROW(results[results$p.value <= alpha & !is.na(results$p.value),]);
sum_sig_power <- NROW(results[results$power >= power & !is.na(results$power),]);
#Summary of results - cat() allows use of \n for line breaks
cat("\n*",t.method,
"*\nGroup 1: ", names(g1), " | Group 2: ", names(g2),
"\n\nSignificant findings: \n(p < ",alpha,"): ", sum_sig_p, "/", nrow(results), " (", round((sum_sig_p/nrow(results))*100,2),"%) | Avg: ", round(mean(results[,8], na.rm = TRUE),4),
"\n(power >= ", power, "): ", sum_sig_power, "/", nrow(results), " (",round((sum_sig_power/nrow(results))*100,2),"%) | Avg: ", round(mean(results[,7], na.rm = TRUE),4),
"\nZero Variances: ", sum_ZeroVar, " | Mean Base n: ", round(mean(results[,4], na.rm=TRUE), 2), sep="");
}
###### End Output
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.