#' @title Run EBV Simulations
#' @description Run a set of scenarios and replicates to generate genotypes.
#'
#' @param label label used for output files and folders for the run.
#' @param scenarios data.frame where each row provides different parameters for
#' each scenario. See \code{Details} for more information on its structure.
#' @param num.rep number of replicates to run for each scenario.
#' @param google.drive.id id of root Google Drive to save run to.
#' @param delete.fsc.files delete fastsimcoal files when the run is complete?
#' @param use.wd use the working directory to save fastsimcoal files? If
#' \code{FALSE} (default), a temporary folder is used, which is deleted when
#' the R session is closed.
#' @param num.cores number of cores to use. Each replciate is allocated to a
#' separate core. If set to 1, then progress is output to the console.
#' @param fsc.exec name of fastsimcoal executable.
#'
#' @details The \code{scenarios} data.frame must have the following columns:
#' \tabular{ll}{
#' \code{num.pops} \tab number of populations.\cr
#' \code{Ne} \tab effective population size of each population.\cr
#' \code{num.samples} \tab number of samples to use.\cr
#' \code{mig.rate} \tab migration rate.\cr
#' \code{mig.type} \tab type of migration matrix: "island" or "stepping.stone".\cr
#' \code{dvgnc.time} \tab divergence time of populations.\cr
#' \code{rmetasim.ngen} \tab number of generations to run \code{rmetasim} for.\cr
#' }
#'
#' @return vector of folders where scenario data and replicates were written.
#'
#' @author Eric Archer \email{eric.archer@@noaa.gov}
#'
#' @export
#'
runEBVsim <- function(label, scenarios, num.rep,
google.drive.id = NULL, delete.fsc.files = TRUE,
use.wd = FALSE, num.cores = 1, fsc.exec = "fsc26") {
if(!all(c("scenario", "num.pops", "Ne", "num.samples", "mig.rate", "mut.rate",
"num.loci", "ploidy", "rmetasim.ngen", "mig.type",
"marker.type") %in% colnames(scenarios))) {
stop("'scenarios' is missing some required columns")
}
for(x in colnames(scenarios)) {
if(x %in% c("scenario", "mig.type", "marker.type")) {
scenarios[[x]] <- tolower(as.character(scenarios[[x]]))
}
if(x %in% c("num.pops", "Ne", "num.samples", "mig.rate", "mut.rate",
"num.loci", "ploidy", "rmetasim.ngen")) {
scenarios[[x]] <- as.numeric(scenarios[[x]])
}
}
params <- list(
label = label,
scenarios = scenarios,
num.rep = num.rep,
google.drive.id = google.drive.id,
delete.fsc.files = delete.fsc.files,
use.wd = use.wd,
num.cores = num.cores,
fsc.exec = fsc.exec,
folders = writeScenarios(label, scenarios, google.drive.id)
)
params$rep.id <- if(!is.null(params$folders$google)) {
googledrive::as_id(params$folders$google)
} else NULL
params$rep.df <- expand.grid(
rep = 1:params$num.rep,
sc = 1:nrow(params$scenarios),
stringsAsFactors = FALSE
)
# params$Rland <- lapply(1:nrow(params$scenarios), .setupScRland, params = params)
# names(params$Rland) <- params$scenarios$scenario
# Run start header
start.time <- Sys.time()
cat(
paste0("\n", format(start.time)),
"Running", num.rep, "replicates of",
nrow(params$scenarios), "scenarios...\n\n"
)
is.1.val <- sapply(params$scenarios, function(x) length(unique(x)) == 1)
not.1.val <- colnames(params$scenarios)[!is.1.val]
print.df <- if(length(not.1.val) > 0) params$scenarios[, not.1.val] else NULL
if(!is.null(print.df)) print(print.df)
for(x in colnames(params$scenarios)[is.1.val]) {
cat(x, "=", unique(params$scenarios[[x]]), "\n")
}
cat("\n")
suppressMessages({
# Run simulation
x <- gc(FALSE)
sc.rep.vec <- 1:nrow(params$rep.df)
cl <- swfscMisc::setupClusters(num.cores)
run.smry <- tryCatch({
if(is.null(cl)) {
lapply(sc.rep.vec, .runWithLabel, params = params)
cat("\n")
} else {
parallel::clusterEvalQ(cl, require(ebvSim))
parallel::clusterExport(cl, "params", environment())
parallel::parLapplyLB(cl, sc.rep.vec, .runScRep, params = params)
}
}, finally = if(!is.null(cl)) parallel::stopCluster(cl) else NULL)
x <- gc(FALSE)
})
# Show and return results
run.time <- difftime(Sys.time(), start.time)
cat(
format(Sys.time()), "Run complete in",
format(round(run.time, 2)), "\n\n"
)
run.smry <- do.call(rbind, run.smry)
units(run.smry$run.time) <- units(run.time)
params$scenarios <- params$scenarios %>%
dplyr::left_join(run.smry, by = "scenario")
params$rep.df <- NULL
save(params, file = paste0(params$label, "_params.rdata"))
if(!is.null(print.df)) {
params$scenarios[, not.1.val] %>%
dplyr::left_join(
run.smry %>%
dplyr::group_by(.data$scenario) %>%
dplyr::summarize(
mean.rep.time = round(mean(.data$run.time), 2),
total.scn.time = round(sum(.data$run.time), 2)
) %>%
dplyr::ungroup(),
by = "scenario"
) %>%
print()
}
invisible(params)
}
#' @noRd
#'
.runWithLabel <- function(rep.i, params) {
# print label for each replicate when num.cores = 1
cat(paste0(
format(Sys.time()),
" ---- Scenario ", params$scenarios$scenario[params$rep.df$sc[rep.i]],
", Replicate ", params$rep.df$rep[rep.i],
" (", round(100 * rep.i / nrow(params$rep.df)), "%) ----\n"
))
.runScRep(rep.i, params)
}
#' @noRd
#'
.runScRep <- function(rep.i, params) {
sc.num <- params$rep.df$sc[rep.i]
rep.num <- params$rep.df$rep[rep.i]
tryCatch(
{
start.time <- Sys.time()
# fastsimcoal2 run and extract data
p <- .runFscSim(rep.i, params)
gen.data <- strataG::fscReadArp(p)
if(params$delete.fsc.files) strataG::fscCleanup(p$label, p$folder)
rm(p)
x <- gc(FALSE)
if(is.null(gen.data)) return(NULL)
sc <- params$scenarios[sc.num, ]
# rmetasim run if requested
if(sc$rmetasim.ngen > 0) {
cat(format(Sys.time()), "running rmetasim...\n")
gen.freqs <- calcFreqs(gen.data)
rm(gen.data)
gen.data <- gen.freqs %>%
.runRmetasim(
Rland = .setupScRland(sc.num, params), #params$Rland[[sc$scenario]],
sc = sc
) %>%
strataG::landscape2df()
rm(gen.freqs)
}
# return subset of samples if num.samples is not NA
if(!is.na(sc$num.samples)) {
to.keep <- tapply(1:nrow(gen.data), gen.data$strata, function(i) {
if(length(i) <= sc$num.samples) i else sample(i, sc$num.samples)
})
gen.data <- gen.data[unlist(to.keep), ]
}
gen.data$id <- 1:nrow(gen.data)
end.time <- Sys.time()
# write data
fname <- repFname(params$label, sc$scenario, rep.num)
out.name <- file.path(params$folders$out, fname)
utils::write.csv(gen.data, file = out.name, row.names = FALSE)
rm(gen.data)
x <- gc(FALSE)
if(!is.null(params$rep.id)) {
googledrive::drive_upload(
out.name,
path = params$rep.id,
name = fname,
verbose = FALSE
)
}
# return run summary
data.frame(
scenario = sc$scenario,
replicate = rep.num,
start = start.time,
end = end.time,
run.time = difftime(end.time, start.time, units = "mins"),
fname = out.name,
stringsAsFactors = FALSE
)
},
error = function(e) {
header <- paste0(
format(Sys.time()),
" Scenario ", params$scenarios$scenario[sc.num],
", Replicate ", rep.num
)
writeLines(
paste(header, ":", e, "\n"),
con = paste0(params$label, "_ERROR_text.txt")
)
utils::write.csv(gc(), file = paste0(params$label, "_ERROR_memory.csv"))
save(
rep.i, sc.num, rep.num, params,
file = paste0(params$label, "_ERROR_workspace.rdata")
)
stop(header, " : ", e)
}
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.