#' Design function used to specify the parameters used in the simulation
#' @param design String specifying the ANOVA design.
#' @param n Sample size in each condition
#' @param mu Vector specifying mean for each condition
#' @param sd standard deviation for all conditions
#' @param r Correlation between dependent variables (single value or matrix)
#' @param labelnames Optional vector to specifying factor and condition names (recommended, if not used factors and levels are indicated by letters and numbers)
#' @param plot Should means plot be printed (defaults to TRUE)
#' @return Returns Single data-frame with simulated data, design, design list, factor names, formulas for ANOVA, means, sd, correlation, sample size per condition, correlation matrix, covariance matrix, design string, labelnames, labelnameslist, factor names, meansplot
#' @examples
#' ## Set up a within design with 2 factors, each with 2 levels,
#' ## with correlation between observations of 0.8,
#' ## 40 participants (who do all conditions), and standard deviation of 2
#' ## with a mean pattern of 1, 0, 1, 0, conditions labeled 'condition' and
#' ## 'voice', with names for levels of "cheerful", "sad", and "human", "robot"
#' ANOVA_design(design = "2w*2w", n = 40, mu = c(1, 0, 1, 0), sd = 2, r = 0.8,
#' labelnames = c("condition", "cheerful", "sad", "voice", "human", "robot"))
#' @section References:
#' too be added
#' @importFrom stats pnorm pt qnorm qt as.formula median
#' @importFrom utils combn
#' @importFrom reshape2 melt
#' @importFrom MASS mvrnorm
#' @importFrom afex aov_car
#' @importFrom grDevices colorRampPalette
#' @import ggplot2
#' @export
#'
ANOVA_design <- function(design, n, mu, sd, r = 0, labelnames = NULL, plot = TRUE){
#Check String for an acceptable digits and factor (w or b)
if (grepl("^(\\d{1,3}(w|b)\\*){0,2}\\d{1,3}(w|b)$", design, ignore.case = FALSE, perl = TRUE) == FALSE) {
stop("Problem in the design argument: must input number of levels as integer (2-999) and factor-type (between or within) as lower case b (between) or w (within)")
}
#Ensure sd is greater than 0
if (any(sd <= 0)) {
stop("Standard deviation (sd) is less than or equal to zero; input a value greater than zero")
}
#Ensure, if single correlation is input, that it is between 0 and 1
if (any(r < -1) | any(r >= 1) ) {
stop("Correlation must be greater than -1 and less than 1")
}
#Ensure proper n input
if (length(n) != 1 ) {
stop("Only balanced designs allowed: n can only be one value")
}
#If labelnames are not provided, they are generated.
#Store factor levels (used many times in the script, calculate once)
factor_levels <- as.numeric(strsplit(design, "\\D+")[[1]])
if (is.null(labelnames)) {
for(i1 in 1:length(factor_levels)){
labelnames <- append(labelnames,paste(paste(letters[i1]), sep = ""))
for(i2 in 1:factor_levels[i1]){
labelnames <- append(labelnames,paste(paste(letters[i1]), paste(i2), sep = ""))
}
}
}
if (length(labelnames) != length(factor_levels) + sum(factor_levels)) {
stop("Variable 'design' does not match the length of the labelnames")
}
###############
# 1. Specify Design and Simulation----
###############
# String used to specify the design
# Add numbers for each factor with 2 levels, e.g., 2 for a factor with 2 levels
# Add a 'w' after the number for within factors, and a 'b' for between factors
# Separate factors with a * (asterisk)
# Thus "2b*3w) is a design with 2 between levels, and 3 within levels
#Check if design and means match up - if not, throw an error and stop
if(prod(factor_levels) != length(mu)){stop("the length of the vector with means does not match the study design")}
#Check if the design and sd match (either 1 or length of design)
#if(length(sd) != 1 && prod(factor_levels) != length(sd)){stop("The SD must be a length of 1 or match the length of the study design")}
#Check if the factors are of an acceptable number of levels
if(any(factor_levels <= 0) == TRUE | any(factor_levels > 99) ) {
stop("Each factor can only have between 2 and 99 levels")
}
###############
# 2. Create Factors and Design ----
###############
#Count number of factors in design
factors <- length(factor_levels)
#Get factor names and labelnameslist
labelnames1 <- labelnames[(1 + 1):(1+factor_levels[1])]
if(factors > 1){labelnames2 <- labelnames[(factor_levels[1] + 3):((factor_levels[1] + 3) + factor_levels[2] - 1)]}
if(factors > 2){labelnames3 <- labelnames[(factor_levels[2] + factor_levels[1] + 4):((factor_levels[2] + factor_levels[1] + 4) + factor_levels[3] - 1)]}
factornames1 <- labelnames[1]
if(factors > 1){factornames2 <- labelnames[factor_levels[1] + 2]}
if(factors > 2){factornames3 <- labelnames[factor_levels[2] + factor_levels[1] + 3]}
if(factors == 1){labelnameslist <- list(labelnames1)}
if(factors == 2){labelnameslist <- list(labelnames1,labelnames2)}
if(factors == 3){labelnameslist <- list(labelnames1,labelnames2,labelnames3)}
if(factors == 1){factornames <- c(factornames1)}
if(factors == 2){factornames <- c(factornames1,factornames2)}
if(factors == 3){factornames <- c(factornames1,factornames2,factornames3)}
#Specify within/between factors in design: Factors that are within are 1, between 0
design_factors <- strsplit(gsub("[^A-Za-z]","",design),"",fixed = TRUE)[[1]]
design_factors <- as.numeric(design_factors == "w") #if within design, set value to 1, otherwise to 0
#Specify design list (similar as below)
xxx <- data.frame(matrix(NA, nrow = prod(factor_levels), ncol = 0))
for(j in 1:factors){
xxx <- cbind(xxx, as.factor(unlist(rep(as.list(paste(labelnameslist[[j]],
sep="_")),
each = prod(factor_levels)/prod(factor_levels[1:j]),
times = prod(factor_levels)/prod(factor_levels[j:factors])
))))
}
design_list <- as.character(interaction(xxx[, 1:factors], sep = "_")) #create a new condition variable combine 2 columns (interaction is a cool function!)
###############
# 3. Create Correlation and Covariance Matrix ----
###############
#Create empty matrix
sigmatrix <- data.frame(matrix(ncol=length(mu), nrow = length(mu)))
#NEW CODE, JUST FOR SINGLE correlation entry
#single number
cors <- r
vars <- length(design_list)
#Code by Lisa De Bruine. Allows multiple inputs for r - only use single value now.
#from: https://github.com/debruine/faux/blob/master/R/rnorm_multi.R
generate_cor_matrix <- function(vars = 3, cors = 0, mu = 0, sd = 1) {
if (length(mu) == 1) {
mu <- rep(mu, vars)
} else if (length(mu) != vars) {
stop("the length of mu must be 1 or vars");
}
if (length(sd) == 1) {
sd <- rep(sd, vars)
} else if (length(sd) != vars) {
stop("the length of sd must be 1 or vars");
}
# correlation matrix
if (class(cors) == "numeric" & length(cors) == 1) {
if (cors >=-1 & cors <=1) {
cors = rep(cors, vars*(vars-1)/2)
} else {
stop("cors must be between -1 and 1")
}
}
if (class(cors) == "matrix") {
if (!is.numeric(cors)) {
stop("cors matrix not numeric")
} else if (dim(cors)[1] != vars || dim(cors)[2] != vars) {
stop("cors matrix wrong dimensions")
} else if (sum(cors == t(cors)) != (nrow(cors)^2)) {
stop("cors matrix not symmetric")
} else {
cor_mat <- cors
}
} else if (length(cors) == vars*vars) {
cor_mat <- matrix(cors, vars)
} else if (length(cors) == vars*(vars-1)/2) {
# generate full matrix from vector of upper right triangle
cor_mat <- matrix(nrow=vars, ncol = vars)
upcounter = 1
lowcounter = 1
for (col in 1:vars) {
for (row in 1:vars) {
if (row == col) {
# diagonal
cor_mat[row, col] = 1
} else if (row > col) {
# lower left triangle
cor_mat[row, col] = cors[lowcounter]
lowcounter <- lowcounter + 1
}
}
}
for (row in 1:vars) {
for (col in 1:vars) {
if (row < col) {
# upper right triangle
cor_mat[row, col] = cors[upcounter]
upcounter <- upcounter + 1
}
}
}
}
# check matrix is positive definite
tol <- 1e-08
ev <- eigen(cor_mat, only.values = TRUE)$values
if (sum(ev < tol)) {
stop("correlation matrix not positive definite")
}
return(cor_mat)
}
cor_mat <- generate_cor_matrix(vars = vars, cors = cors)
sd_for_sigma <- sd #added to prevent changing sd which is now passed on
if (length(sd_for_sigma) == 1) {
sd_for_sigma <- rep(sd_for_sigma, vars)
} else if (length(sd_for_sigma) != vars) {
stop("the length of sd_for_sigma must be 1 or vars");
}
sigma <- (sd_for_sigma %*% t(sd_for_sigma)) * cor_mat #Our earlier code had a bug, with SD on the diagonal. Not correct! Thanks Lisa.
#General approach: For each factor in the list of the design, save the first item (e.g., a1b1)
#Then for each factor in the design, if 1, set number to wildcard
i1<-1
i2<-1
for(i1 in 1:length(design_list)){
design_list_split <- unlist(strsplit(design_list[i1],"_"))
#current_factor <- design_list_split[c(2,4,6)[1:length(design)]] #this creates a string of 2, 2,4 or 2,4,6 depending on the length of the design for below
for(i2 in 1:length(design_factors)){
#We set each number that is within to a wildcard, so that all within participant factors are matched
if(design_factors[i2]==1){design_list_split[i2] <- "\\w+"}
}
sigmatrix[i1,]<-as.numeric(grepl(paste0(design_list_split, collapse="_"), design_list)) # compare factors that match with current factor, given wildcard, save list to sigmatrix
}
#Now multiply the matrix we just created (that says what is within, and what is between, with the original covariance matrix)
#So factors manipulated within are correlated, those manipulated between are not.
cor_mat <- sigmatrix*cor_mat
sigmatrix <- sigma*sigmatrix
row.names(sigmatrix) <- design_list
colnames(sigmatrix) <- design_list
row.names(cor_mat) <- design_list
colnames(cor_mat) <- design_list
###############
# 4. Create Dataframe based on Design ----
###############
#Create the data frame. This will be re-used in the simulation (y variable is overwritten) but created only once to save time in the simulation
dataframe <- as.data.frame(mvrnorm(n=n,
mu=mu,
Sigma=sigmatrix,
empirical = FALSE))
dataframe$subject<-as.factor(c(1:n)) #create temp subject variable just for merging
#Melt dataframe
dataframe <- melt(dataframe,
id.vars = "subject",
variable.name = "cond",
value.name = "y")
# Let's break this down - it's a bit tricky. First, we want to create a list of labelnames that will indicate the factors.
# We are looping this over the number of factors.
# This: factor_levels - takes the string used to specify the design and turn it in a list.
# we take the labelnames and factornames and combine them
# We repeat these each: n*(2^(factors-1)*2)/(2^j) and them times: (2^j/2) to get a list for each factor
# We then bind these together with the existing dataframe.
for(j in 1:factors){
dataframe <- cbind(dataframe, as.factor(unlist(rep(as.list(paste(factornames[[j]],
labelnameslist[[j]],
sep="_")),
each = n*prod(factor_levels)/prod(factor_levels[1:j]),
times = prod(factor_levels)/prod(factor_levels[j:factors])
))))
}
#Rename the factor variables that were just created
names(dataframe)[4:(3+factors)] <- factornames[1:factors]
#Create subject column
subject <- 1:n #Set subject to 1 to the number of subjects collected
for(j2 in length(design_factors):1){ #for each factor in the design, from last to first
#if w: repeat current string as often as the levels in the current factor (e.g., 3)
#id b: repeat current string + max of current subject
if(design_factors[j2] == 1){subject <- rep(subject,factor_levels[j2])}
subject_length <- length(subject) #store current length - to append to string of this length below
if(design_factors[j2] == 0){
for(j3 in 2:factor_levels[j2]){
subject <- append(subject,subject[1:subject_length]+max(subject))
}
}
}
#Overwrite subject columns in dataframe
dataframe$subject <- subject
#For the correlation matrix, we want the names of each possible comparison of means
#Need to identify which columns from dataframe to pull the factor names from
if (factors == 1) {
cond_col <- c(4)
} else if (factors == 2) {
cond_col <- c(4, 5)
} else {
cond_col <- c(4, 5, 6)
}
dataframe$cond <- as.character(interaction(dataframe[, cond_col], sep = "_")) #create a new condition variable combine 2 columns (interaction is a cool function!)
###############
# 5. Specify factors for formula ----
###############
if(factors == 1 & sum(design_factors) == 1){frml1 <- as.formula(paste("y ~ ",factornames[1]," + Error(subject/",factornames[1],")",sep=""))}
if(factors == 1 & sum(design_factors) == 0){frml1 <- as.formula(paste("y ~ ",factornames[1]," + Error(1 | subject)",sep=""))}
if(factors == 2){
if(sum(design_factors) == 2){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2]," + Error(subject/",factornames[1],"*",factornames[2],")"))}
if(sum(design_factors) == 0){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2]," + Error(1 | subject)"))}
if(all(design_factors == c(1, 0)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2]," + Error(subject/",factornames[1],")"))}
if(all(design_factors == c(0, 1)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2]," + Error(subject/",factornames[2],")"))}
}
if(factors == 3){
if(sum(design_factors) == 3){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(subject/",factornames[1],"*",factornames[2],"*",factornames[3],")"))}
if(sum(design_factors) == 0){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(1 | subject)"))}
if(all(design_factors == c(1, 0, 0)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(subject/",factornames[1],")"))}
if(all(design_factors == c(0, 1, 0)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(subject/",factornames[2],")"))}
if(all(design_factors == c(0, 0, 1)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(subject/",factornames[3],")"))}
if(all(design_factors == c(1, 1, 0)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(subject/",factornames[1],"*",factornames[2],")"))}
if(all(design_factors == c(0, 1, 1)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(subject/",factornames[2],"*",factornames[3],")"))}
if(all(design_factors == c(1, 0, 1)) == TRUE){frml1 <- as.formula(paste("y ~ ",factornames[1],"*",factornames[2],"*",factornames[3]," + Error(subject/",factornames[1],"*",factornames[3],")"))}
}
#Specify second formula used for plotting
if(factors == 1){frml2 <- as.formula(paste("~",factornames[1]))}
if(factors == 2){frml2 <- as.formula(paste("~",factornames[1],"+",factornames[2]))}
if(factors == 3){frml2 <- as.formula(paste("~",factornames[1],"+",factornames[2],"+",factornames[3]))}
###############
# 6. Create plot of means to visualize the design ----
###############
dataframe_means <- data.frame(mu, sd)
for(j in 1:factors){
dataframe_means <- cbind(dataframe_means, as.factor(unlist(rep(as.list(paste(labelnameslist[[j]],
sep="")),
each = prod(factor_levels)/prod(factor_levels[1:j]),
times = prod(factor_levels)/prod(factor_levels[j:factors])
))))
}
if(factors == 1){
names(dataframe_means) <- c("mu","SD",factornames[1])
dataframe_means[,factornames[1]] <- ordered(dataframe_means[,factornames[1]], levels = labelnameslist[[1]])
}
if(factors == 2){
names(dataframe_means)<-c("mu","SD",factornames[1],factornames[2])
dataframe_means[,factornames[1]] <- ordered(dataframe_means[,factornames[1]], levels = labelnameslist[[1]])
dataframe_means[,factornames[2]] <- ordered(dataframe_means[,factornames[2]], levels = labelnameslist[[2]])
}
if(factors == 3) {
names(dataframe_means) <- c("mu","SD",factornames[1],factornames[2],factornames[3])
dataframe_means[,factornames[1]] <- ordered(dataframe_means[,factornames[1]], levels = labelnameslist[[1]])
dataframe_means[,factornames[2]] <- ordered(dataframe_means[,factornames[2]], levels = labelnameslist[[2]])
dataframe_means[,factornames[3]] <- ordered(dataframe_means[,factornames[3]], levels = labelnameslist[[3]])
}
if(factors == 1){meansplot = ggplot(dataframe_means, aes_string(y = "mu", x = factornames[1]))}
if(factors == 2){meansplot = ggplot(dataframe_means, aes_string(y = "mu", x = factornames[1], colour = factornames[2]))}
if(factors == 3){meansplot = ggplot(dataframe_means, aes_string(y = "mu", x = factornames[1], colour = factornames[2])) + facet_wrap( paste("~",factornames[3],sep=""))}
#Set custom color palette if factor 2 has a length greater than 8
if (factors >= 2 && length(labelnameslist[[2]]) >= 9) {
meansplot2 = meansplot +
geom_point(position = position_dodge(width=0.9), shape = 10, size=5, stat="identity") + #Personal preference for sd -- ARC
geom_errorbar(aes(ymin = mu-sd, ymax = mu+sd),
position = position_dodge(width=0.9), size=.6, width=.3) +
coord_cartesian(ylim=c(min(mu)-sd, max(mu)+sd)) +
theme_bw(base_size = 16) + ggtitle("Means for each condition in the design") +
scale_colour_brewer(palette = "Dark2")
} else {
meansplot2 = meansplot +
geom_point(position = position_dodge(width=0.9), shape = 10, size=5, stat="identity") + #Personal preference for sd -- ARC
geom_errorbar(aes(ymin = mu-sd, ymax = mu+sd),
position = position_dodge(width=0.9), size=.6, width=.3) +
coord_cartesian(ylim=c(min(mu)-sd, max(mu)+sd)) +
theme_bw() + ggtitle("Means for each condition in the design") +
scale_colour_brewer(palette = "Dark2")
}
if(plot == TRUE){
print(meansplot2) #should be blocked in Shiny context
}
# Return results in list()
invisible(list(dataframe = dataframe,
design = design,
design_list = design_list,
factors = factors,
frml1 = frml1,
frml2 = frml2,
mu = mu,
sd = sd,
r = r,
n = n,
cor_mat = cor_mat,
sigmatrix = sigmatrix,
design_factors = design_factors,
labelnames = labelnames,
labelnameslist = labelnameslist,
factornames = factornames,
meansplot = meansplot2))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.