R/slim.R

Defines functions slim

Documented in slim

#' Run a slendr model in SLiM
#'
#' This function will execute a SLiM script generated by the \code{compile}
#' function during the compilation of a slendr demographic model.
#'
#' The arguments \code{sequence_length} and \code{recombination_rate} can be
#' omitted for slendr models utilizing customized initialization of genomic
#' architecture. In such cases, users may either provide hard-coded values
#' directly through SLiM's \code{initializeGenomicElement()} and
#' \code{initializeRecombinationRate()} functions or utilize slendr's
#' templating functionality provided by its \code{substitute()} function.
#
#' When \code{ts = TRUE}, the returning value of this function depends on whether
#' or not the \code{path} argument was set. If the user did provide the \code{path}
#' where output files should be saved, the path is returned (invisibly). This is
#' mostly intended to support simulations of customized user models. If \code{path}
#' is not set by the user, it is assumed that a tree-sequence object is desired as
#' a sole return value of the function (when \code{ts = TRUE}) and so it is automatically
#' loaded when simulation finishes, or (when \code{ts = FALSE}) that only customized
#' files are to be produced by the simulation, in which the user will be loading such
#' files by themselves (and only the path is needed).
#'
#' @param model Model object created by the \code{compile} function
#' @param sequence_length Total length of the simulated sequence (in base-pairs)
#' @param recombination_rate Recombination rate of the simulated sequence (in
#'   recombinations per basepair per generation)
#' @param samples A data frame of times at which a given number of individuals
#'   should be remembered in the tree-sequence (see \code{schedule_sampling} for a
#'   function that can generate the sampling schedule in the correct format). If
#'   missing, only individuals present at the end of the simulation will be
#'   recorded in the final tree-sequence file.
#' @param ts Should a tree sequence be simulated from the model?
#' @param path Path to the directory where simulation result files will be saved.
#'   If \code{NULL}, this directory will be automatically created as a temporary
#'   directory. If \code{TRUE}, this path will be also returned by the function.
#'   If a string is given, it is assumed to be a path to a directory where simulation
#'   results will be saved. In this case, the function will return this path invisibly.
#'   Note that if a tree-sequence file should be simulated (along with other files,
#'   potentially), that tree-sequence file (named 'slim.trees' by default) will
#'   have to be explicitly loaded using \code{ts_read()}.
#' @param random_seed Random seed (if \code{NULL}, a seed will be generated between
#'   0 and the maximum integer number available)
#' @param method How to run the script? ("gui" - open in SLiMgui, "batch" - run
#'   on the command line)
#' @param verbose Write the log information from the SLiM run to the console
#'   (default \code{FALSE})?
#' @param run Should the SLiM engine be run? If \code{FALSE}, the command line SLiM
#'   command will be printed (and returned invisibly as a character vector) but not executed.
#' @param burnin Length of the burnin (in model's time units, i.e. years)
#' @param max_attempts How many attempts should be made to place an offspring
#'   near one of its parents? Serves to prevent infinite loops on the SLiM
#'   backend. Default value is 1.
#' @param spatial Should the model be executed in spatial mode? By default, if a
#'   world map was specified during model definition, simulation will proceed in
#'   a spatial mode.
#' @param coalescent_only Should \code{initializeTreeSeq(retainCoalescentOnly =
#'   <...>)} be set to \code{TRUE} (the default) or \code{FALSE}? See
#'   "retainCoalescentOnly" in the SLiM manual for more detail.
#' @param locations If \code{NULL}, locations are not saved. Otherwise, the
#'   path to the file where locations of each individual throughout the simulation
#'   will be saved (most likely for use with \code{animate_model}).
#' @param slim_path Path to the appropriate SLiM binary (this is useful if the
#'   \code{slim} binary is not on the \code{$PATH}). Note that this argument must
#'   be specified if the function is being run on Windows.
#'
#' @return A tree-sequence object loaded via Python-R reticulate interface function \code{ts_read}
#'   (internally represented by the Python object \code{tskit.trees.TreeSequence}). If the
#'   \code{path} argument was set, specifying the directory where results should be saved,
#'   the function will return this path as a single-element character vector.
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE, slim = TRUE, quit = TRUE) # dependencies must be present
#' }
#' init_env()
#'
#' # load an example model
#' model <- read_model(path = system.file("extdata/models/introgression", package = "slendr"))
#'
#' # afr and eur objects would normally be created before slendr model compilation,
#' # but here we take them out of the model object already compiled for this
#' # example (in a standard slendr simulation pipeline, this wouldn't be necessary)
#' afr <- model$populations[["AFR"]]
#' eur <- model$populations[["EUR"]]
#' chimp <- model$populations[["CH"]]
#'
#' # schedule the sampling of a couple of ancient and present-day individuals
#' # given model at 20 ky, 10 ky, 5ky ago and at present-day (time 0)
#' modern_samples <- schedule_sampling(model, times = 0, list(afr, 5), list(eur, 5), list(chimp, 1))
#' ancient_samples <- schedule_sampling(model, times = c(30000, 20000, 10000), list(eur, 1))
#'
#' # sampling schedules are just data frames and can be merged easily
#' samples <- rbind(modern_samples, ancient_samples)
#'
#' # run a simulation using the SLiM back end from a compiled slendr model object and return
#' # a tree-sequence object as a result
#' ts <- slim(model, sequence_length = 1e5, recombination_rate = 0, samples = samples)
#'
#' # simulated tree-sequence object can be saved to a file using ts_write()...
#' ts_file <- normalizePath(tempfile(fileext = ".trees"), winslash = "/", mustWork = FALSE)
#' ts_write(ts, ts_file)
#' # ... and, at a later point, loaded by ts_read()
#' ts <- ts_read(ts_file, model)
#'
#' ts
#' @export
slim <- function(
    model, sequence_length, recombination_rate, samples = NULL, ts = TRUE, path = NULL,
    random_seed = NULL, method = c("batch", "gui"),
    verbose = FALSE, run = TRUE, slim_path = NULL, burnin = 0,
    max_attempts = 1, spatial = !is.null(model$world), coalescent_only = TRUE,
    locations = NULL
) {
  method <- match.arg(method)

  random_seed <- set_random_seed(random_seed)

  if (is.null(model$path))
    stop("It is not possible to simulate non-serialized models in SLiM", call. = FALSE)

  results_path <- if (is.logical(path) || is.null(path)) file.path(tempdir(), paste0("slendr_results_", random_seed)) else path
  results_path <- normalizePath(results_path, winslash = "/", mustWork = FALSE)
  results_path <- paste0(results_path, "/")
  dir.create(results_path, recursive = TRUE, showWarnings = FALSE)

  if (method == "gui" & !interactive())
    stop("SLiMgui can only be run from an interactive R session", call. = FALSE)

  model_dir <- model$path
  if (!dir.exists(model_dir))
    stop(sprintf("Model directory '%s' does not exist", model_dir), call. = FALSE)

  script_contents <- readLines(normalizePath(file.path(model$path, "script.slim"),
                                             winslash = "/", mustWork = TRUE))
  # 2 occurences in the default script involve metadata -- more means that the
  # constant must be given on the command line
  seqlen_required <- sum(grepl("SEQUENCE_LENGTH", script_contents)) > 2
  if (missing(sequence_length) && seqlen_required)
    stop("Specifying the `sequence_length =` argument is required", call. = FALSE)
  if (!missing(sequence_length) && !seqlen_required)
    stop("Specifying `sequence_length =` is not allowed when it is already given\n",
         "in the customized script", call. = FALSE)

  # 2 occurences in the default script involve metadata -- more means that the
  # constant must be given on the command line
  recrate_required <- sum(grepl("RECOMBINATION_RATE", script_contents)) > 2
  if (missing(recombination_rate) && recrate_required)
    stop("Specifying the `recombination_rate =` argument is required", call. = FALSE)
  if (!missing(recombination_rate) && !recrate_required)
    stop("Specifying `recombination_rate =` is not allowed when it is already given\n",
         "in the customized script", call. = FALSE)

  if (!missing(sequence_length) && seqlen_required &&
      (sequence_length %% 1 != 0 || sequence_length <= 0))
    stop("Sequence length must be a non-negative integer number", call. = FALSE)
  if (!missing(sequence_length) && recrate_required &&
      (!is.numeric(recombination_rate) || recombination_rate < 0))
    stop("Recombination rate must be a numeric value", call. = FALSE)

  if (missing(sequence_length)) sequence_length <- -1
  if (missing(recombination_rate)) recombination_rate <- -1

  # verify checksums of serialized model configuration files
  checksums <- readr::read_tsv(file.path(model_dir, "checksums.tsv"), progress = FALSE,
                               col_types = "cc")
  verify_checksums(file.path(model_dir, checksums$file), checksums$hash)

  if (is.character(slim_path) && !all(file.exists(slim_path)))
    stop("SLiM binary not found at ", slim_path, call. = FALSE)

  spatial <- if (spatial) "T" else "F"
  locations <- if (is.character(locations)) normalizePath(locations, winslash = "/", mustWork = FALSE) else ""
  coalescent_only <- if (coalescent_only) "T" else "F"
  simulate_ts <- if (ts) "T" else "F"
  burnin <- round(burnin / model$generation_time)

  if (ts) {
    sampling_path <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
    sampling_df <- process_sampling(samples, model, verbose)
    readr::write_tsv(sampling_df, sampling_path)
  } else
    sampling_path <- ""

  binary <- if (!is.null(slim_path)) slim_path else get_binary(method)
  if (binary != "open -a SLiMgui" && Sys.which(binary) == "")
    stop(sprintf("%s binary not found. Please modify your $PATH accordingly or
  specify the path manually by setting the 'binary_path' argument.", binary),
  call. = FALSE)

  seed <- paste0(" -d SEED=", random_seed)

  samples_arg <- if (sampling_path == "") "" else paste0(" -d \"SAMPLES_PATH='", sampling_path, "'\"")

  script_path <- path.expand(file.path(model_dir, "script.slim"))

  if (method == "gui") {
    # to be able to execute the script in the SLiMgui, we have to hardcode
    # the path to the model configuration directory
    modif_path <- normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
    script_contents <- readLines(script_path) %>%
      gsub("\"MODEL_PATH\", \".\"", paste0("\"MODEL_PATH\", \"", model$path, "\""), .) %>%
      gsub("\"SAMPLES_PATH\", \"\"", paste0("\"SAMPLES_PATH\", \"", sampling_path, "\""), .) %>%
      gsub("optional_arg\\(\"TS\", \"\"\\)", sprintf("defineConstant(\"TS\", %s)", simulate_ts), .) %>%
      gsub("required_arg\\(\"SEQUENCE_LENGTH\"\\)", sprintf("defineConstant(\"SEQUENCE_LENGTH\", %s)", sequence_length), .) %>%
      gsub("required_arg\\(\"RECOMBINATION_RATE\"\\)", sprintf("defineConstant(\"RECOMBINATION_RATE\", %s)", recombination_rate), .) %>%
      gsub("required_arg\\(\"PATH\"\\)", sprintf("defineConstant(\"PATH\", \"%s\")", results_path), .) %>%
      gsub("optional_arg\\(\"BURNIN_LENGTH\", 0\\)", sprintf("defineConstant(\"BURNIN_LENGTH\", %s)", burnin), .)

    # check if the SLiM code was customized by the user
    init_start <- grep("default slendr neutral initialization -- start", script_contents)
    init_end <- grep("default slendr neutral initialization -- end", script_contents)
    if (init_start == init_end - 1) {
      script_contents[init_start] <- paste0(
        script_contents[init_start], "\n",
        "initialize() {\n",
        sprintf("    defineConstant(\"SEQUENCE_LENGTH\", %s);", sequence_length), "\n",
        sprintf("    defineConstant(\"RECOMBINATION_RATE\", %s);", recombination_rate), "\n",
        "}"
      )
    }

    cat(script_contents, file = modif_path, sep = "\n")
    system(sprintf("%s %s", binary, modif_path))
  } else {
    slim_command <- paste(binary,
                          seed,
                          samples_arg,
                          paste0("-d \"MODEL_PATH='", model_dir, "'\""),
                          paste0("-d \"PATH='", results_path, "'\""),
                          paste0("-d SIMULATE_TS=", simulate_ts),
                          paste0("-d SPATIAL=", spatial),
                          paste0("-d SEQUENCE_LENGTH=", sequence_length),
                          paste0("-d RECOMBINATION_RATE=", recombination_rate),
                          paste0("-d BURNIN_LENGTH=", burnin),
                          paste0("-d SIMULATION_LENGTH=", model$length),
                          paste0("-d \"LOCATIONS_PATH='", locations, "'\""),
                          paste0("-d COALESCENT_ONLY=", coalescent_only),
                          paste0("-d MAX_ATTEMPTS=", max_attempts),
                          script_path)

    if (verbose || !run) {
      cat("--------------------------------------------------\n")
      cat("SLiM command:\n\n")
      cat(slim_command, "\n")
      cat("--------------------------------------------------\n\n")
    }

    if (!run) return(invisible(slim_command))

    # execute the command, capture all log output and decide whether to print
    # any of the log information to the console
    execute_fun <- if (Sys.info()["sysname"] == "Windows") shell else system
    log_output <- suppressWarnings(execute_fun(paste(slim_command, "2>&1"), intern = TRUE))
    log_warnings <- grep("#WARNING", log_output, value = TRUE) %>%
      grep("Species::InitializePopulationFromFile", ., invert = TRUE, value = TRUE)
    if (verbose)
      cat(log_output, sep = "\n")
    else if (length(log_warnings)) {
      warning("There were some warnings during the simulation run:\n",
              paste(log_warnings, collapse = "\n"), call. = FALSE)
    }

    if (!any(grepl("Simulation finished", log_output))) {
      if (!verbose) cat(log_output, sep = "\n")
      stop("Unfortunately SLiM terminated before a tree sequence was saved.\n",
           "See the above for an indication of where things ",
           "could have gone wrong.",
           ifelse(!is.null(attr(log_output, "status")),
                  paste0("\n\nSLiM exit status: ",
                         attr(log_output, "status"), "\n",
                         "Message: ", attr(log_output, "errmsg")),
                  ""), call. = FALSE)
    }
  }

  # if the simulation was run in GUI mode, wait for the confirmation from the user that it
  # finished before loading the tree-sequence output file
  if (method == "gui" && ts)
    readline("Please confirm that the SLiMgui simulation is finished [press ENTER]")

  if (!is.logical(path) && is.null(path)) {
    if (ts) {
      ts_path <- file.path(results_path, "slim.trees")
      if (!file.exists(ts_path))
        stop("Tree sequence was not found at the expected location:\n", ts_path, call. = FALSE)

      if (verbose) {
        cat("Tree sequence was saved to: ", ts_path, "\n")
        cat("Loading the tree-sequence file...\n")

      }

      ts_object <- ts_read(model, file = ts_path)
      return(ts_object)
    } else
      return(results_path)
  } else
    return(invisible(results_path))
}
bodkan/spannr documentation built on Dec. 19, 2024, 11:43 p.m.