#' ICTpowerSim - automate a series of parametric bootstrap power simulations.
#'
#' @export
#'
#' @author Stephen Tueller \email{stueller@@rti.org}
#'
#' @param designs A named list of designs.
#' in \code{\link{ICTpower}}. The names in the named list will be passed to
#' the parameter \code{file} in \code{\link{ICTpower}}.
#'
#' @param pReportName Character. A name used to create output files. The default
#' is \code{"ICTpowerSimResults"}
#'
#' @param B Numeric (integer). The number of replicated data sets per \code{design}
#' in \code{designs}.
#'
#' @param Seed Numeric (integer). The random seed for ensuring the simulation
#' can be replicated.
#'
#' @param save \code{NULL} or character. Should the data be saved? If NULL
#' (the default) raw data will not be saved. Other
#' options include \code{save='csv'} and \code{save='RData'}. This will be
#' combined with the names of \code{designs} to pass to the \code{file} option
#' in \code{\link{ICTpower}}.
#'
#' @param alpha See \code{\link{ICTpower}}.
#'
#' @param cores See \code{\link{ICTpower}}.
#'
#' @param standardize See \code{\link{ICTpower}}.
#'
#' @param dotar Logical. The default is \code{FALSE}. Should the data files
#' (if requested, see \code{save}) and analysis results be saved in a *.tar
#' file with the same name as \code{pReportName} and then delete the unzipped
#' copies of these files? This is suggested wheth the number of designs is
#' large. The summary of power across all conditions will no be included in the
#' *.tar archive. It is strongly encourage that a test run with only three
#' designs and B=3 is attempted at first to make sure archiving is done
#' correctly. If the path names resulting from your working directory are
#' too long, archiving will fail with the error "storing paths of more than 100
#' bytes is not portable". See \code{\link{tar}}.
#'
#' @examples
#'
#' \dontrun{
#'
#' # set up two designs
#' designs <- list()
#' designs[["defaultsN20"]] <- polyICT$new()
#' designs[["defaultsN40"]] <- designs[["defaultsN20"]]$clone(deep=TRUE)
#' designs[["defaultsN40"]]$update(groups = c(group1=40, group2=40))
#'
#' # run the simulation (without saving output, console print only)
#' ICTpowerSim(designs, pReportName = "ex1", B=3)
#'
#' # rerun with a finite population correction
#' ICTpowerSim(designs, pReportName = "ex1fpc", B=3, seed=2, fpc=100)
#'
#' }
ICTpowerSim <- function(designs ,
pReportName = "ICTpowerSimResults" ,
B = 1000 ,
seed = 1 ,
save = NULL ,
alpha = .05 ,
cores = parallel::detectCores()-1 ,
standardize = list(dv = TRUE ,
ivs = FALSE,
byids = TRUE ) ,
dotar = FALSE ,
...
)
{
# check whether `pReportName` is already used
if( file.exists(pReportName) )
{
stop("The `pReportName` ", pReportName, "is already used in this directory.",
"\nUse a different `pReportName`.")
}
# process dots
dots <- list(...)
dotsNames <- names(dots)
if(any(dotsNames %in% 'fpc')) fpc <- dots$fpc
# create a analysis directory
upDir <- getwd()
dir.create(pReportName)
setwd(pReportName)
start <- Sys.time()
fnames <- names(designs)
set.seed(seed)
nDesigns <- length(designs)
seeds <- matrix(ceiling(runif(2*nDesigns, 0, 9e6)), nDesigns, 2)
msgDelim <- paste('\n\n', paste(rep('*', 80), collapse = ''), '\n\n')
powerL <- powerLFPC <- list()
for(i in seq_along(designs))
{
message(msgDelim, ' Starting design: ', names(designs)[i],
" (", i, " of ",
length(designs), " designs)",
msgDelim)
temp <-
ICTpower(outFile = c(fnames[i] , save) ,
design = designs[[i]] ,
B = B ,
alpha = alpha ,
seed = seeds[i] ,
cores = cores ,
savePowerReport = TRUE ,
prompt = FALSE ,
...
)
if( exists("fpc") | length(temp) == 2)
{
powerL[[i]] <- temp$powerL
powerLFPC[[i]] <- temp$powerLFPC
}
if(!exists("fpc") & length(temp) == 1)
{
powerL[[i]] <- temp
}
}
# delete text files
txts <- dir(getwd(), glob2rx("*.txt"))
file.remove(txts)
if(dotar)
{
# find csvs created since the start of the run
csvs <- dir(getwd(), glob2rx("*.csv"), full.names = TRUE)
csvsCtime <- unlist( lapply(as.list(csvs), function(x) file.info(x)$ctime) )
sinceStart <- csvsCtime > start
csvs <- csvs[sinceStart]
# find Rdatas created since the start of the run
Rdatas <- dir(getwd(), glob2rx("*.Rdata"), full.names = TRUE)
RdatasCtime <- unlist( lapply(as.list(Rdatas), function(x) file.info(x)$ctime) )
sinceStart <- RdatasCtime > start
Rdatas <- Rdatas[sinceStart]
# package all the results into a tar file
tarName <- paste(pReportName, '.tar.gz', sep='')
tar(tarName, c(csvs, Rdatas), 'g')
# remove the other files if requested
file.remove(txts, csvs, Rdatas)
}
# save the results
power <- data.frame(do.call(rbind, lapply(powerL, function(x) x$power)) )
mEst <- data.frame(do.call(rbind, lapply(powerL, function(x) x$meanEst)) )
sdEst <- data.frame(do.call(rbind, lapply(powerL, function(x) x$sdEst)) )
row.names(power) <- fnames
row.names(mEst) <- fnames
row.names(sdEst) <- fnames
power$type <- "power"
mEst $type <- "meanEst"
sdEst$type <- "sdEst"
reportName <- paste(pReportName, 'csv', sep='')
powerOut <- rbind(power, mEst, sdEst)
names(powerOut) <- c(row.names(powerL[[1]]), 'type')
powerOut <- data.frame(design=row.names(powerOut), type=powerOut$type, powerOut)
powerOut$PersonAlyticsPower <- paste("Version",
packageVersion("PersonAlyticsPower"))
powerOut$PersonAyltics <- paste("Version", packageVersion('PersonAlytics'))
write.csv(powerOut, file=reportName, row.names=F)
# save the fpc results if fpc exists
if( exists('fpc'))
{
power <- data.frame(do.call(rbind, lapply(powerLFPC, function(x) x$power)) )
mEst <- data.frame(do.call(rbind, lapply(powerLFPC, function(x) x$meanEst)) )
sdEst <- data.frame(do.call(rbind, lapply(powerLFPC, function(x) x$sdEst)) )
row.names(power) <- fnames
row.names(mEst) <- fnames
row.names(sdEst) <- fnames
power$type <- "power"
mEst $type <- "meanEst"
sdEst$type <- "sdEst"
reportName <- paste(pReportName, 'FPC.csv', sep='_')
powerOut <- rbind(power, mEst, sdEst)
names(powerOut) <- c(row.names(powerL[[1]]), 'type')
powerOut <- data.frame(design=row.names(powerOut), type=powerOut$type, powerOut)
powerOut$PersonAlyticsPower <- paste("Version",
packageVersion("PersonAlyticsPower"))
powerOut$PersonAyltics <- paste("Version", packageVersion('PersonAlytics'))
write.csv(powerOut, file=reportName, row.names=F)
}
# move back to the parent directory
setwd(upDir)
}
#' simStatus
#'
#' @export
#'
#' @author Stephen Tueller \email{stueller@@rti.org}
#'
#' @param studyName
#'
#' @param studyDirectory Character. The full path to the directory the
#' simulation study is being run in
#'
simStatus <- function(studyName, studyDirectory=getwd())
{
# save and set wd
curDir <- getwd()
setwd(studyDirectory)
# get the designI file
if(! file.exists('ICT_Sim_Conditions.Rdata') )
{
stop("The file `ICT_Sim_Conditions.Rdata` does not exist in the directory\n",
studyDirectory)
}
designI <- read.csv('ICT_Sim_Conditions.Rdata')
nConditions <- nrow(designI)
# get the directories
simDirs <- dir(studyDirectory, glob2rx(paste("*", studyName, "*", sep='')))
studyChecker <- function(x)
{
# csvs that are output
csvs <- dir(x, glob2rx("*_PersonAlytics_*.csv"), recursive = TRUE)
# clvs that are data
csvd <- dir(x, glob2rx("*.csv"), recursive = TRUE)
csvd <- csvd[! csvd %in% csvs]
# results
c(outputCount=length(csvs), dataCount=length(csvd))
}
csvsCounts <-
data.frame(simDirs = simDirs,
do.call(rbind, lapply(as.list(simDirs), studyChecker)) )
print(csvsCounts)
message("\n\nThe design file `ICT_Sim_Conditions.Rdata` shows there are ", nConditions,
" conditions in the study.\n",
sum(csvsCounts$outputCount), "/",
nConditions, " output *.csv files are completed, (",
round(100*(sum(csvsCounts$outputCount)/nConditions),2), "%)",
" and\n", sum(csvsCounts$dataCount), "/", nConditions,
" data files have been saved (",
round(100*(sum(csvsCounts$dataCount)/nConditions),2), "%).")
# reset the wd
setwd(curDir)
}
# this evolved from
# R:\PaCCT\02 Clients\NIH\0216822.000.001 R21 ICT\03 Simulation Studies\Study1\simulationStudy1.R
#' ICTsimSetup - function to automate setting up \code{designs} to be passed to
#' \code{\link{ICTpowerSim}}, including splitting up the designs over sevar folders
#' so the simulation can be run in chunks or on multiple computers.
#'
#' @export
#'
#' @author Stephen Tueller \email{stueller@@rti.org}
#'
#' @param seed Numeric. A random seed for this simulation.
#'
#' @param nL Numeric list. A list of sample sizes. See \code{n} in
#' \code{\link{polyICT}}.
#'
#' @param phasesL A list of phases. See \code{n} in
#' \code{\link{polyICT}}.
#'
#' @param propErrVarL Numeric List. See \code{n} in
#' \code{\link{polyICT}}.
#'
#' @param randFxMeanL A list of effect sizes. See \code{n} in
#' \code{\link{polyICT}}.
#'
#' @param randFxCorMatL A list of numeric correlation matrices. See \code{n} in
#' \code{\link{polyICT}}.
#'
#' @param randFxVarL A list of numeric vectors. See \code{n} in
#' \code{\link{polyICT}}.
#'
#' @param nFolders Numeric. The number of folders to divide the simulation into.
#' The default is \code{NULL}, in which case the user is prompted for input at
#' the console after the total number of conditions is computed. To prevent
#' folders that are too large, it is reccomended that \code{nFolders} not
#' exceed 100, especially on Windows systems.
#'
#' @param ICTpowerSimOptions Character. Other options to be passed to the
#' function \code{\link{ICTpowerSim}}. For example,
#'
#' @param muFUN
#'
#' @examples
#' \dontrun{
#' seed <- 1234
#' nL <- list(10, 20)
#' phasesL <- list( makePhase(c(10,10), c(0,1)), makePhase(c(5,15), c(0,1)) )
#' propErrVarL <- list(.5, .75)
#' randFxMeanL <- list( list(randFx=list(intercept=0, slope=.25),
#' fixdFx=list(phase=.5, phaseTime=.25)),
#' list(randFx=list(intercept=0, slope=.5),
#' fixdFx=list(phase=.5, phaseTime=.25)))
#' randFxCorMatL <- list(matrix(c(1,.2,.2,1), 2, 2), matrix(c(1,.6,.6,1), 2, 2))
#' randFxVarL <- list(c(1, .1), c(1, .2))
#' B <- 3
#' nFolders <- 4
#' ICTpowerSimOptions <-"cores = 4"
#' ICTsimSetup(seed ,
#' nL ,
#' phasesL ,
#' propErrVarL ,
#' randFxMeanL ,
#' randFxCorMatL ,
#' randFxVarL ,
#' B ,
#' nFolders ,
#' ICTpowerSimOptions)
#' }
ICTsimSetup <- function(seed ,
nL ,
phasesL ,
propErrVarL ,
randFxMeanL ,
randFxCorMatL ,
randFxVarL ,
B = 1000 ,
nFolders = NULL ,
ICTpowerSimOptions = "" )
{
#TODO: add some validation for ICTpowerSimOptions
# if the Rdata file already exists, error
if( file.exists('ICT_Sim_Conditions.Rdata') )
{
stop("The file `ICT_Sim_Conditions.Rdata` already exists in the current",
" working directory,\n\n",
getwd(), "\n\nPlease move or delete `ICT_Sim_Conditions.Rdata`.",
"\nThis file creates all possible combinations\nof the conditions input",
" to the function `ICTsimSetup`.")
}
# check inputs
if( any(lapply(phasesL, length) > 2) )
{
#TODO: only works for a 2-phase design
stop("`ICTsimSetup` currently only works for 2-phase designs.")
}
# restructure the lists for expand.grid
nL <- unlist( nL )
nObservations <- unlist( lapply(phasesL, function(x) length(unlist(x))) )
nBaseline <- unlist( lapply(phasesL, function(x) length(x[[1]])) )
propErrVarL <- unlist( propErrVarL )
randFxMean <- seq_along(randFxMeanL)
randFxCorMats <- seq_along(randFxCorMatL)
randFxVars <- seq_along(randFxVarL)
# set up the designs
designI <- expand.grid(nL ,
nObservations,
nBaseline ,
propErrVarL ,
randFxMean ,
randFxCorMats ,
randFxVars )
names(designI) <- c('n' ,
'nObservations' ,
'nBaseline' ,
'propErrVar' ,
'effectSize' ,
'randFxCorMat' ,
'randFxVar' )
designI$ConditionName <- paste(
'n', designI$n ,
'o', designI$nObservations ,
'nbl', designI$nBaseline ,
'pev', designI$propsErrVar ,
'fx', designI$effectSize ,
'r', designI$randFxCorMat ,
'V', designI$randFxVar ,
sep='')
# send a message about the simulation design size
#TODO: estimate run time
message("\n\nThe current simulation study will have ", nrow(designI),
" conditions.\n\n")
# get nFolders
if(is.null(nFolders))
{
prompt <- "Enter the number of folders to break the simulation into: "
nFolders <- as.integer( readline(prompt = prompt) )
if(nrow(designI)/nFolders < 10)
{
stop('The choice of `nFolders` will result in fewer than 10 conditions',
'\nper folder. Use a smaller value for `nFolders`.')
}
}
# break up designI into nFolders subsets and set up directories
designI$dir <- sort( rep(1:nFolders, length.out = nrow(designI)) )
# set seeds
designI$seeds <- .makeSeeds(seed, nrow(designI))
# save objects for calling by the scripts produced in the loop
save(designI ,
nL ,
phasesL ,
propErrVarL ,
randFxMeanL ,
randFxCorMatL ,
randFxVarL ,
nFolders ,
ICTpowerSimOptions ,
file = 'ICT_Sim_Conditions.Rdata' )
for(i in unique(designI$dir))
{
d <- paste('simStudy1dir', i, sep='')
dir.create( d )
cat(file = paste(d, '/simStudy1dir', i, '.r', sep=''),
paste("setwd('", paste(getwd(), d, sep="/"), "')", "\n", sep=''),
"load('../ICT_Sim_Conditions.Rdata')\n",
"designs <- list()\n",
"library(PersonAlyticsPower)\n",
paste("for(i in which(designI$dir==",i,"))\n"),
"{\n",
" designs[[designI$ConditionName[i]]] <- polyICT$new(\n",
" n = designI$n[i],\n",
" phases = makePhase(c(designI$nBaseline[i], \n",
" designI$nObservations[i]-designI$nBaseline[i]),\n",
" c(0,1)),\n",
" propErrVar = designI$propErrVar[i],\n",
" randFxMean = randFxMeanL[[designI$effectSize[i]]],\n",
" randFxCorMat = randFxCorMatL[[designI$randFxCorMat[i]]],\n",
" randFxVar = randFxVarL[[designI$randFxVar[i]]]\n",
" )\n",
"}\n",
paste("ICTpowerSim(designs,\n",
"pReportName = paste('dir', ",i,", sep=''),\n",
"B=", B, ",\n",
"seed=", designI$seeds[i], ",\n",
ICTpowerSimOptions,
")\n")
)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.