inst/doc/HPC-computing.R

## ----nomessages, echo = FALSE-------------------------------------------------
knitr::opts_chunk$set(
  warning = FALSE,
  message = FALSE,
  fig.height = 5,
  fig.width = 5
)
options(digits=4)
par(mar=c(3,3,1,1)+.1)

## ----include=FALSE------------------------------------------------------------
library(SimDesign)

## -----------------------------------------------------------------------------
# SimDesign::SimFunctions()
library(SimDesign)

Design <- createDesign(N = c(10, 20, 30))

Generate <- function(condition, fixed_objects) {
    dat <- with(condition, rnorm(N, 10, 5)) # distributed N(10, 5)
    dat
}

Analyse <- function(condition, dat, fixed_objects) {
    ret <- c(mean=mean(dat), median=median(dat)) # mean/median of sample data
    ret
}

Summarise <- function(condition, results, fixed_objects){
    colMeans(results)
}

## ----eval=FALSE---------------------------------------------------------------
# # standard setup (not ideal for HPC clusters as parallelization
# #  occurs within the design conditions, not across)
# res <- runSimulation(design=Design, replications=10000, generate=Generate,
#                      analyse=Analyse, summarise=Summarise, parallel=TRUE,
#                      filename='mysim')

## -----------------------------------------------------------------------------
rc <- 100   # number of times the design row was repeated
Design300 <- expandDesign(Design, repeat_conditions = rc)
Design300

# compare the Design.IDs
print(Design, show.IDs = TRUE)
print(Design300, show.IDs = TRUE)

# target replication number for each condition
rep_target <- 10000

# replications per row in Design300
replications <- rep(rep_target  / rc, nrow(Design300))

## -----------------------------------------------------------------------------
rc <- c(100, 100, 1000)
DesignUnbalanced <- expandDesign(Design, repeat_conditions = rc)
DesignUnbalanced

rep_target <- 10000
replicationsUnbalanced <- rep(rep_target / rc, times = rc)
head(replicationsUnbalanced)
table(replicationsUnbalanced)

## -----------------------------------------------------------------------------
set.seed(0)
x <- runif(100)
set.seed(1)
y <- runif(100)

plot(x, y)           ## seemingly independent
plot(x[-1], y[-100]) ## subsets perfectly correlated

## -----------------------------------------------------------------------------
# genSeeds()   # do this once on the main node/home computer and store the number!
iseed <- 1276149341

## ----eval=FALSE---------------------------------------------------------------
# # get assigned array ID (default uses type = 'slurm')
# arrayID <- getArrayID()

## ----eval=FALSE---------------------------------------------------------------
# # run the simulation on subset based on arrayID subset information
# runArraySimulation(design=Design300, replications=replications,
#                    generate=Generate, analyse=Analyse,
#                    summarise=Summarise, iseed=iseed,
#                    arrayID=arrayID, filename='mysim')

## ----eval=FALSE---------------------------------------------------------------
# # run the simulation on subset based on arrayID subset information
# runArraySimulation(design=Design300, replications=replications,
#                    generate=Generate, analyse=Analyse,
#                    summarise=Summarise, iseed=iseed, arrayID=arrayID,
#                    dirname='mysimfiles', filename='mysim')

## ----eval=FALSE---------------------------------------------------------------
# library(SimDesign)
# 
# Design <- createDesign(N = c(10, 20, 30))
# 
# Generate <- function(condition, fixed_objects) {
#     dat <- with(condition, rnorm(N, 10, 5)) # distributed N(10, 5)
#     dat
# }
# 
# Analyse <- function(condition, dat, fixed_objects) {
#     ret <- c(mean=mean(dat), median=median(dat)) # mean/median of sample data
#     ret
# }
# 
# Summarise <- function(condition, results, fixed_objects){
#     colMeans(results)
# }
# 
# # expand the design to create 300 rows with associated replications
# rc <- 100
# Design300 <- expandDesign(Design, repeat_conditions = rc)
# 
# rep_target <- 10000
# replications <- rep(rep_target / rc, nrow(Design300))
# 
# # genSeeds() # do this once on the main node/home computer, and store the number!
# iseed <- 1276149341
# 
# # get assigned array ID (default uses type = 'slurm')
# arrayID <- getArrayID()
# 
# # run the simulation on subset based on arrayID subset information
# runArraySimulation(design=Design300, replications=replications,
#                    generate=Generate, analyse=Analyse,
#                    summarise=Summarise, iseed=iseed, arrayID=arrayID,
#                    dirname='mysimfiles', filename='mysim')

## ----eval=FALSE---------------------------------------------------------------
# library(SimDesign)
# 
# # automatically checks whether all saved files are present via SimCheck()
# Final <- SimCollect('mysimfiles/')
# Final

## ----eval=FALSE---------------------------------------------------------------
# # save the aggregated simulation object for subsequent analyses
# saveRDS(Final, "../final_sim.rds")

## -----------------------------------------------------------------------------
library(SimDesign)

Design <- createDesign(N = c(10, 20, 30),
                       SD = c(1,2,3))

Generate <- function(condition, fixed_objects) {
    dat <- with(condition, rnorm(N, 10, sd=SD)) # distributed N(10, 5)
    dat
}

Analyse <- function(condition, dat, fixed_objects) {
    ret <- c(mean=mean(dat), median=median(dat)) # mean/median of sample data
    ret
}

Summarise <- function(condition, results, fixed_objects){
    colMeans(results)
}

Design

## ----eval=FALSE---------------------------------------------------------------
# 
# # get array ID
# arrayID <- getArrayID()
# 
# multirow <- FALSE  # submit multiple rows of Design object to array?
# if(multirow){
#     # If selecting multiple design rows per array, such as the first 3 rows,
#     #  then next 3 rows, and so on, something like the following would work
# 
#     ## For arrayID=1, rows 1 through 3 are evaluated
#     ## For arrayID=2, rows 4 through 6 are evaluated
#     ## For arrayID=3, rows 7 through 9 are evaluated
#     array2row <- function(arrayID) 1:3 + 3 * (arrayID-1)
# } else {
#     # otherwise, use one row per respective arrayID
#     array2row <- function(arrayID) arrayID
# }
# 
# # Make sure parallel=TRUE flag is on to use all available cores!
# runArraySimulation(design=Design, replications=10000,
#                    generate=Generate, analyse=Analyse, summarise=Summarise,
#                    iseed=iseed, dirname='mysimfiles', filename='mysim',
#                    parallel=TRUE, arrayID=arrayID, array2row=array2row)

## ----eval=FALSE---------------------------------------------------------------
# # Return successful results up to the 11 hour mark
# runArraySimulation(design=Design300, replications=replications,
#                    generate=Generate, analyse=Analyse,
#                    summarise=Summarise, iseed=iseed, arrayID=arrayID,
#                    dirname='mysimfiles', filename='mysim',
#                    control=list(max_time="11:00:00"))
# 

## ----eval=FALSE---------------------------------------------------------------
# Final <- SimCollect('mysimfiles/')
# Final

## ----eval=FALSE---------------------------------------------------------------
# Missed <- SimCollect(files=dir(), check.only=TRUE)
# Missed

## ----include=FALSE------------------------------------------------------------
Design <- createDesign(N = c(10, 20, 30))
subDesign <- Design[c(1,3), ]
replications_missed <- c(1000, 2000)

## ----eval=FALSE---------------------------------------------------------------
# subDesign <- Design[c(1,3),]
# replications_missed <- subset(Missed, select=MISSED_REPLICATIONS)

## -----------------------------------------------------------------------------
print(subDesign, show.IDs = TRUE)
replications_missed

## -----------------------------------------------------------------------------
rc <- 50
Design_left <- expandDesign(subDesign, rc) # smaller number of reps per array
Design_left

replications_left <- rep(replications_missed/rc, each=rc)
table(replications_left)

# new total design and replication objects
Design_total <- rbindDesign(Design300, Design_left, keep.IDs=TRUE)
nrow(Design_total)
print(Design_total, show.IDs = TRUE)
replications_total <- c(replications, replications_left)
table(replications_total)

# this *must* be the same as the original submission!
iseed <- 1276149341

## ----eval=FALSE---------------------------------------------------------------
# # See if any missing still
# SimCollect('mysimfiles', check.only=TRUE)
# 
# # Obtain complete simulation results
# Final <- SimCollect('mysimfiles')

Try the SimDesign package in your browser

Any scripts or data that you put into this service are public.

SimDesign documentation built on April 3, 2025, 6:06 p.m.