R/fscWrite.R

Defines functions fscWrite .fscCheckSettings .notAnumber .fscCollectParams .fscWriteParTpl .fscWriteEst .fscWriteSFS

Documented in fscWrite

#' @title Write fastsimcoal2 input files
#' @description Write files necessary to run a \code{fastsimcoal2} simulation.
#'
#' @param demes matrix of deme sampling information created by the 
#'   \code{\link{fscSettingsDemes}} function.
#' @param genetics data.frame specifying loci to simulate created by the 
#'   \code{\link{fscSettingsGenetics}} function.
#' @param migration a list of matrices giving the migration rates 
#'   between pairs of demes created by the \code{\link{fscSettingsMigration}} 
#'   function.
#' @param events matrix of historical events created by the 
#'   \code{\link{fscSettingsEvents}} function.
#' @param est list of parameter estimation definitions and rules generated by 
#' the \code{\link{fscSettingsEst}} function.
#' @param def matrix of parameter values to substitute into the model generated 
#' by the \code{\link{fscSettingsDef}} function.
#' @param label character string used to label output files for the simulation.
#' @param use.wd use current working directory for input and output? If 
#'   \code{FALSE} then a temporary directory in the session temporary directory. 
#'   Note that this directory is deleted when the R session closed. See 
#'   \code{\link{tempdir}} for more information.
#' 
#' @note
#' 
#' @return Writes input files for \code{fastsimcoal2} and returns a list of
#'   input parameters, input file, and input filenames. This list is the primary
#'   input to \code{\link{fscRun}}.
#' 
#' @note \code{fastsimcoal2} is not included with `strataG` and must be
#'   downloaded separately. Additionally, it must be installed such that it can
#'   be run from the command line in the current working directory. 
#'   The function \code{fscTutorial()} will open a detailed tutorial on the 
#'   interface in your web browser.
#' 
#' @references Excoffier, L. and Foll, M (2011) fastsimcoal: a continuous-time 
#'   coalescent simulator of genomic diversity under arbitrarily complex 
#'   evolutionary scenarios Bioinformatics 27: 1332-1334.\cr
#'   Excoffier, L., Dupanloup, I., Huerta-Sánchez, E., Sousa, V.C., 
#'   and M. Foll (2013) Robust demographic inference from genomic and SNP data. 
#'   PLOS Genetics, 9(10):e1003905. \cr
#'   \url{http://cmpg.unibe.ch/software/fastsimcoal2/}
#' 
#' @author Eric Archer \email{eric.archer@@noaa.gov}
#' 
#' @seealso \code{\link{fsc.input}}, \code{\link{fscRun}}, \code{\link{fscRead}}
#' 
#' @examples \dontrun{
#' #' # three demes with optional names
#' demes <- fscSettingsDemes(
#'   Large = fscDeme(10000, 10), 
#'   Small = fscDeme(2500, 10),
#'   Medium = fscDeme(5000, 3, 1500)
#' )
#' 
#' # four historic events
#' events <- fscSettingsEvents(
#'   fscEvent(event.time = 2000, source = 1, sink = 2, prop.migrants = 0.05),
#'   fscEvent(2980, 1, 1, 0, 0.04),
#'   fscEvent(3000, 1, 0),
#'   fscEvent(15000, 0, 2, new.size = 3)
#'  )
#'  
#' # four genetic blocks of different types on three chromosomes.  
#' genetics <- fscSettingsGenetics(
#'   fscBlock_snp(10, 1e-6, chromosome = 1),
#'   fscBlock_dna(10, 1e-5, chromosome = 1),
#'   fscBlock_microsat(3, 1e-4, chromosome = 2),
#'   fscBlock_standard(5, 1e-3, chromosome = 3)
#' )
#' 
#' params <- fscWrite(demes = demes, events = events, genetics = genetics)
#' }
#' 
#' @name fscWrite
#' @export
#' 
fscWrite <- function(demes, genetics, migration = NULL, events = NULL, 
                     est = NULL, def = NULL, label = "strataG.fsc",
                     use.wd = FALSE) {

  # ---- Create parameter and settings list
  p <- list(
    label = make.names(label), 
    folder = if(use.wd) "." else tempdir(),
    files = list(),
    settings = list(
      demes = demes, migration = migration, events = events,
      genetics = genetics, est = est, def = def
    )
  )
  
  # ---- Check settings and parameters
  .fscCheckSettings(p)
  p <- .fscCollectParams(p)
  
  # ---- Write files
  opt <- options(scipen = 0)
  p <- .fscWriteParTpl(p) 
  if(!is.null(p$sim.params)) {
    if(is.null(def)) {
      p <- .fscWriteEst(p)
      p <- .fscWriteSFS(p)
    } else {
      p$files$def <- file.path(p$folder, paste0(p$label, ".def"))
      utils::write.table(
        p$settings$def, file = p$files$def, quote = FALSE,
        sep = "\t", row.names = FALSE
      )
    }
  }
  options(opt)
  
  invisible(p)
}


#' @noRd
#' 
.fscCheckSettings <- function(p) {
  ps <- p$settings
  
  # ---- Check deme info
  if(!inherits(ps$demes, "fscSettingsDemes")) {
    stop("`demes` must be a list generated by the `fscSettingsDemes()` function.")
  }
  
  # ---- Check migration rate matrices
  if(!is.null(ps$migration)) {
    if(!inherits(ps$migration, "fscSettingsMigration")) {
      stop(
        "`migration` must be a list of matrices generated by the ",
        "`fscSettingsMigration()` function."
      )
    }
    if(!all(sapply(ps$migration, nrow) == nrow(ps$demes))) {
      stop(
        "All matrices in `migration` must be the same size as the ",
        "number of demes."
      )
    }
  }
  
  # ---- Check and format historical events
  if(!is.null(ps$events)) {
    if(!inherits(ps$events, "fscSettingsEvents")) {
      stop("`events` must be a list generated by the `fscSettingsEvents()` function.")
    }
    event.demes <- unlist(ps$events[, c("source", "sink")])
    if(any(stats::na.omit(event.demes) > (nrow(ps$demes) - 1))) {
      stop("There are source or sink populations in `events` not specified in `demes`.")
    }
    max.mig.mat <- max(ps$events$migr.mat)
    if(max.mig.mat > 0 & (length(ps$migration) < (max.mig.mat + 1))) {
      stop("There are migration matrices in `events` not specified in `migration`.")
    }
  }
  
  # ---- Check genetic info
  if(!inherits(ps$genetics, "fscSettingsGenetics")) {
    stop("`genetics` must be a list of matrices generated by `fscSettingsGenetics()`.")
  }
  if(!is.null(ps$est) & !is.null(ps$def)) {
    stop("Both 'est' and 'def' can't be specified.")
  }
  has.freq <- ps$genetics$fsc.type[1] == "FREQ"
  if(!is.null(ps$est) & !has.freq) {
    stop("If 'est' is specified, 'fscBlock_freq()' must be used.")
  }
  if(has.freq & is.null(ps$est)) {
    stop("If 'fscBlock_freq()' is used, 'est' must be specified.")
  }
  
  # ---- Check parameter estimation
  if(!is.null(ps$est)) {
    if(!inherits(ps$est, "fscSettingsEst")) {
      stop("'est' must be a list generated by 'fscSettingsEst()'.")
    }
  }
  
  # ---- Check parameter definition
  if(!is.null(ps$def)) {
    if(!inherits(ps$def, "fscSettingsDef")) {
      stop("'def' must be a numeric matrix generated by 'fscSettingsDef()'.")
    }
  }
}


#' @noRd
#' 
.notAnumber <- function(x) {
  opt <- options(warn = -1)
  not.num <- is.na(as.numeric(x))
  options(opt)
  not.num & !is.na(x)
}


#' @noRd
#' 
.fscCollectParams <- function(p) {
  ps <- p$settings
  
  # ---- demes
  demes.params <- lapply(colnames(ps$demes)[-1], function(x) {
    i <- which(.notAnumber(ps$demes[[x]]))
    if(length(i) > 0) {
      data.frame(
        name = ps$demes[i, x], settings = "demes", 
        col = x, row = i, stringsAsFactors = FALSE
      )
    } else NULL
  })
  demes.params <- do.call(rbind, demes.params)
  is.dup <- duplicated(demes.params$name)
  demes.params <- demes.params[!is.dup, ]
  
  # ---- historical events
  events.params <- lapply(colnames(ps$events), function(x) {
    i <- which(.notAnumber(ps$events[[x]]))
    if(length(i) > 0) {
      data.frame(
        name = ps$events[i, x], settings = "events", 
        col = x, row = i, stringsAsFactors = FALSE
      )
    } else NULL
  })
  events.params <- do.call(rbind, events.params)
  is.dup <- duplicated(events.params$name)
  events.params <- events.params[!is.dup, ]
  
  # ---- migration
  migration.params <- lapply(1:length(ps$migration), function(x) {
    mat <- ps$migration[[x]]
    i <- .notAnumber(mat)
    if(sum(i) > 0) {
      data.frame(
        name = mat[i], settings = "migration",
        col = as.character(col(mat)[i]), row = row(mat)[i], matrix = x,
        stringsAsFactors = FALSE
      )
    } else NULL
  })
  migration.params <- do.call(rbind, migration.params)
  is.dup <- duplicated(migration.params$name)
  migration.params <- migration.params[!is.dup, ]
  
  # ---- genetics 
  cols <- if(ps$genetics$fsc.type[1] == "FREQ") {
    "mut.rate"
  } else {
    c("recomb.rate", "mut.rate", "param.5", "param.6")
  }
  genetics.params <- lapply(cols, function(x) {    
    i <- which(.notAnumber(ps$genetics[[x]]))
    if(length(i) > 0) {
      data.frame(
        name = ps$genetics[i, x], settings = "genetics", 
        col = x, row = i, stringsAsFactors = FALSE
      )
    } else NULL
  })
  genetics.params <- do.call(rbind, genetics.params)
  
  params <- dplyr::bind_rows(
    demes.params, events.params, migration.params, genetics.params
  ) %>% 
    as.data.frame()
  if(nrow(params) == 0) params <- NULL
  
  # ---- Check parameters
  if(!is.null(params)) {
    if(any(duplicated(params$name))) {
      stop("Can't have duplicated parameter names.")
    }
    if(!is.null(p$settings$def)) {
      not.found <- setdiff(params$name, colnames(p$settings$def))
      if(length(not.found) > 0) {
        stop(
          "Can't find the following parameters in 'def': ", 
          paste(not.found, collapse = ", ")
        )
      }
    } else if(!is.null(p$settings$est)) {
      not.found <- setdiff(params$name, p$settings$est$params$name)
      if(length(not.found) > 0) {
        warning(
          "Can't find the following parameters in 'est': ",
          paste(not.found, collapse = ", "),
          call. = FALSE
        )
      }
    } else {
      stop("Parameters defined in settings, but no 'est' or 'def' supplied.")
    }
  } else {
    if(!is.null(p$settings$def)) {
      stop("'def' supplied, but no parameters found in simulation settings.")
    }
    if(!is.null(p$settings$est)) {
      stop("'est' supplied, but no parameters found in simulation settings.")
    }
  }
  p$sim.params <- params
  
  p
}


#' @noRd
#' 
.fscWriteParTpl <- function(p) {
  ps <- p$settings
  
  # have to increase population size by ploidy as fastsimcoal generates 
  #   haploid number of genes
  ploidy <- attr(ps$demes, "ploidy")
  demes <- ps$demes
  for(x in c("deme.size", "sample.size")) {
    i <- which(!.notAnumber(demes[, x]))
    if(length(i) > 0) demes[i, x] <- as.numeric(demes[i, x]) * ploidy
  }
  
  n.demes <- nrow(demes)
  
  opt <- options(scipen = 10)
  
  p$files$input <- paste0(
    p$label, 
    ifelse(!is.null(p$sim.params), ".tpl", ".par")
  )
  f <- file(file.path(p$folder, p$files$input), open = "wt")
  writeLines("//Number of population samples (demes)", f)
  writeLines(as.character(n.demes), f)
  
  writeLines("//Population effective sizes (number of genes)", f)
  for(i in 1:n.demes) writeLines(as.character(demes$deme.size[i]), f)
  
  writeLines("//Sample sizes, times, inbreeding", f)
  x <- c("sample.size", "sample.time", "inbreeding")
  for(i in 1:n.demes) writeLines(paste(demes[i, x], collapse = " "), f)
  
  writeLines("//Growth rates: negative growth implies population expansion", f)
  for(i in 1:n.demes) writeLines(as.character(demes[i, "growth"]), f)
  
  writeLines("//Number of migration matrices: 0 implies no migration between demes", f)
  writeLines(as.character(length(ps$migration)), f)
  if(!is.null(ps$migration)) {
    for(i in 1:length(ps$migration)) {
      writeLines("//migration matrix", f)
      for(r in 1:nrow(ps$migration[[i]])) {
        writeLines(paste(ps$migration[[i]][r, ], collapse = " "), f)
      }
    }
  }
  
  writeLines("//Historical events: time, source, sink, migrants, new size, growth rate, migr. matrix", f)
  n.events <- if(is.null(ps$events)) 0 else nrow(ps$events)
  writeLines(as.character(n.events), f)
  if(!is.null(ps$events)) {
    for(r in 1:n.events) writeLines(paste(ps$events[r, ], collapse = " "), f)
  }
  
  writeLines("//Number of independent loci [chromosomes]", f)
  num.chrom <- attr(ps$genetics, "num.chrom")
  chrom.diff <- as.numeric(attr(ps$genetics, "chrom.diff"))
  writeLines(paste(num.chrom, chrom.diff, collapse = " "), f)
  chroms <- if(chrom.diff) {
    split(ps$genetics, ps$genetics$chromosome)
  } else {
    list(ps$genetics)
  }
  for(block in chroms) {
    writeLines("//Per chromosome: Number of linkage blocks", f)
    writeLines(as.character(nrow(block)), f)
    writeLines("//Per block: data type, num loci, rec. rate and mut rate + optional parameters", f)
    for(i in 1:nrow(block)) {
      block.i <- block[i, -(1:2)]
      block.i[is.na(block.i)] <- " "
      writeLines(paste(block.i, collapse = " "), f)
    }
  }
  
  options(opt)
  close(f)
  p
}


#' @noRd
#' 
.fscWriteEst <- function(p) {
  param.df <- p$settings$est$params[is.na(p$settings$est$params$value), ]
  param.df$value <- NULL
  cmplx.df <- p$settings$est$params[!is.na(p$settings$est$params$value), ]
  cmplx.df$name <- paste0(cmplx.df$name, " = ", cmplx.df$value)
  cmplx.df <- cmplx.df[, c("is.int", "name", "output", "bounded", "reference")]
  
  p$files$est <- paste0(p$label, ".est")
  f <- file(file.path(p$folder, p$files$est), open = "wt")
  writeLines("// Priors and rules file", f)
  writeLines("// *********************", f)
  writeLines("", f)
  writeLines("[PARAMETERS]", f)
  writeLines("//#isInt? #name #dist. #min #max", f)
  writeLines("//all N are in number of haploid individuals", f)
  if(nrow(param.df) != 0) {
    for(i in 1:nrow(param.df)) {
      writeLines(paste(param.df[i, ], collapse = "\t"), f)
    }
  }
  writeLines("", f)
  writeLines("[RULES]", f)
  rules <- p$settings$est$rules
  if(!is.null(rules)) writeLines(rules, f)
  writeLines("", f)
  writeLines("[COMPLEX PARAMETERS]", f)
  if(nrow(cmplx.df) != 0) {
    for(i in 1:nrow(cmplx.df)) {
      writeLines(paste(cmplx.df[i, ], collapse = "\t"), f)
    }
  }
  close(f)
  
  p
}


#' @noRd
#' 
.fscWriteSFS <- function(p) {
  .writeMat <- function(mat, fname) {   
    mat <- cbind(mat)
    f <- file(fname, open = "wt")
    writeLines("1 observation", f)
    writeLines(paste(colnames(mat), collapse = " "), f)
    for(i in 1:nrow(mat)) {
      if(is.null(rownames(mat))) {
        writeLines(paste(mat[i, ], collapse = " "), f)
      } else {
        writeLines(paste(c(rownames(mat)[i], mat[i, ]), collapse = " "), f)
      }
    }
    close(f)
  }
  
  sfs <- p$settings$est$sfs
  sfs.type <- attr(p$settings$est, "sfs.type")
  if(!is.list(sfs)) {
    x <- rbind(sfs)
    rownames(x) <- NULL
    p$file$sfs <- paste0(p$label, "_", sfs.type, "pop0.obs")
    .writeMat(x, file.path(p$folder, p$file$sfs))
  } else {
    p$file$sfs <- sapply(sfs, function(x) {
      row.d <- regmatches(rownames(x), regexpr("d[[:digit:]]+", rownames(x)))
      row.d <- unique(gsub("d", "", row.d))
      col.d <- regmatches(colnames(x), regexpr("d[[:digit:]]+", colnames(x)))
      col.d <- unique(gsub("d", "", col.d))
      if(length(row.d) > 1) stop("More than one deme specified in rows.")
      if(length(col.d) > 1) stop("More than one deme specified in columns.")
      fname <- paste0(
        p$label, "_joint", sfs.type, "pop", row.d, "_", col.d, ".obs"
      )
      .writeMat(x, fname)
      fname
    })
  }
  
  p
}

Try the strataG package in your browser

Any scripts or data that you put into this service are public.

strataG documentation built on Feb. 28, 2020, 9:07 a.m.