R/eems.run.R

Defines functions eems eems.from.files

Documented in eems eems.from.files

#' Run an EEMS analysis
#'
#' This function runs an EEMS analysis with given parameters, which include file paths for
#' retrieving the input files (datapath.coord, datapath.diffs and datapath.outer) and
#' writing the output files.
#' It is an exact replicate of the original EEMS command-line function. 
#'
#' @param seed The random seed. Defaults to the current system time in seconds. 
#' @param datapath Full path to a set of three files: datapath.coord, datapath.diffs and datapath.outer.
#' @param mcmcpath Full path to a filename prefix in an output directory with write permission. 
#' @param prevpath Full path to previous output directory, i.e., the mcmcpath in a previous EEMS run. Optional.
#' @param gridpath Full path to a set of two files: gridpath.demes and gridpath.edges. Optional.
#' @param nDemes Number of demes, roughly. EEMS constructs a regular triangular grid with circa nDemes vertices.
#' @param nIndiv Number of samples. Should match the size of the dissimilarity matrix in datapath.diffs.
#' @param nSites Number of SNPs used to compute the observed dissimilarity matrix in datapath.diffs.
#' @param diploid Logical value that indicates whether the species is diploid (TRUE) or haploid (FALSE). Defaults to TRUE.
#' @param distance Distance metric. Either `euclidean` or `greatcirc`, that is, great circle distance.
#' Defaults to 'euclidean'.
#' @param numMCMCIter Number of Markov Chain Monte Carlo iterations. 
#' @param numBurnIter Number of burn-in iterations to be discarded before sampling from posterior. 
#' @param numThinIter Number of thinning iterations to be discarded between sampling from posterior. 
#' @param mSeedsProposalS2 Variance of normal proposals to update the seeds of the migration tiles. Defaults to 0.01.
#' @param qSeedsProposalS2 Variance of normal proposals to update the seeds of the diversity tiles. Defaults to 0.1. 
#' @param mEffctProposalS2 Variance of normal proposals to update the log10 rates of the migration tiles. Defaults to 0.1.
#' @param qEffctProposalS2 Variance of normal proposals to update the log10 rates of the diversity tiles. Defaults to 0.001.
#' @param mrateMuProposalS2 Variance of normal proposals to update the overall mean migration rate, on the log10 scale.
#' Defaults to 0.01.
#' @param qVoronoiPr With probability qVoronoiPr, update diversity Voronoi;
#' with probability 1-qVoronoiPr, update migration Voronoi. Defaults to 0.25.
#' @param qrateShape Shape hyperparameter for the diversity rates variance, qrateS2 ~ invgamma(qrateShape, qrateScale).
#' Defaults to 0.001
#' @param mrateShape Shape hyperparameter for the migration rates variance, mrateS2 ~ invgamma(mrateShape, mrateScale).
#' Defaults to 0.001.
#' @param sigmaShape Shape hyperparameter for the scaling factor sigma^2 ~ invgamma(sigmaShape, sigmaScale).
#' Defaults to 0.001.
#' @param qrateScale Scale hyperparameter for the diversity rates variance, qrateS2 ~ invgamma(qrateShape, qrateScale).
#' Defaults to 1.0.
#' @param mrateScale Scale hyperparameter for the migration rates variance, mrateS2 ~ invgamma(mrateShape, mrateScale).
#' Defaults to 1.0.
#' @param sigmaScale Scale hyperparameter for the scaling factor sigma^2 ~ invgamma(sigmaShape, sigmaScale).
#' Defaults to 1.0.
#' @param negBiProb Success probability for the number of Voronoi tiles ~ negbinom(negBiSize, negBiProb). Defaults to 0.67.
#' @param negBiSize Size for the number of Voronoi tiles ~ negbinom(negBiSize, negBiProb). Defaults to 10.
#' @returns None
#' @references Petkova, D., Novembre, J. & Stephens, M.
#' Visualizing spatial population structure with estimated effective migration surfaces.
#' Nat Genet 48, 94–100 (2016). https://doi.org/10.1038/ng.3464
#' @examples
#' # We use example input from Petkova et al. (2016), found in the '/extdata' directory
#' data_path <- system.file("extdata", package = "reems")
#' input <- file.path(data_path, "barrier-schemeX-nIndiv300-nSites3000")
#' # The example puts the output in a temporary directory.
#' mcmcdir <- file.path(tempdir(), "eems_out")
#' dir.create(mcmcdir, showWarnings = FALSE)
#' 
#'
#' # Run an example EEMS analysis with a small number of iterations to ensure quick termination. 
#' 
#' eems.from.files(
#'   datapath = input,
#'   mcmcpath = mcmcdir,
#'   nDemes = 200,
#'   nIndiv = 300,
#'   nSites = 3000,
#'   diploid = FALSE,
#'   numMCMCIter = 200,
#'   numBurnIter = 100,
#'   numThinIter = 99,
#' )
#' # Delete the output directory to tidy up. 
#' unlink(mcmcdir, recursive = TRUE, force = TRUE)
#' @seealso \code{\link{eems.plots}, \link{eems}}
#' @export
eems.from.files <- function(
                            seed = unclass(Sys.time()),
                            datapath,
                            mcmcpath,
                            prevpath = "",
                            gridpath = "",
                            nDemes,
                            nIndiv,
                            nSites,
                            diploid = TRUE,
                            distance = "euclidean",
                            numMCMCIter,
                            numBurnIter,
                            numThinIter,
                            mSeedsProposalS2 = 0.01,
                            qSeedsProposalS2 = 0.1,
                            mEffctProposalS2 = 0.1,
                            qEffctProposalS2 = 0.001,
                            mrateMuProposalS2 = 0.01,
                            qVoronoiPr = 0.25,
                            qrateShape = 0.001,
                            mrateShape = 0.001,
                            sigmaShape = 0.001,                            
                            qrateScale = 1.0,
                            mrateScale = 1.0,
                            sigmaScale = 1.0,
                            negBiProb = 0.67,
                            negBiSize = 10) {
    testing = FALSE
    p <- as.list(environment())
    cpp_eems(p)
}

#' Run an EEMS analysis from data in R format
#'
#' This function runs an EEMS analysis with given parameters in the input format for `conStruct`,
#' which include R matrices for allele frequency data and coordinates.  
#' In addition to taking R objects as inputs,
#' it also provides additional default values over the original `EEMS` command-line utility.
#' Furthermore, it allows for running more than one chain, either in parallel or sequentially,
#' and returns a vector of output directories as input to [eems.plots()].  
#'
#' @param coords The coordinate matrix, a two-column matrix with a line for every sample.
#' @param freqs An allele frequency  matrix in `conStruct` format, with a column for every site and a row for every sample.
#' Choosing one main allele, it tabulates the average genotypes of the sample at that locus.  
#' @param outer Matrix of coordinates of the outer polygon.
#' Defaults to a polygon larger than the convex hull of the supplied coordinates to mitigate EEMS boundary effects,
#' which is created by [outer.buffered()].
#' @param buffer The rough distance of the boundary from the convex hull. Defaults to 0.2 times the length of the hull.
#' Unused if `outer` is specified.
#' @param nDemes Number of demes, roughly. EEMS constructs a regular triangular grid with circa nDemes vertices.
#' Defaults to 6 times the number of individuals, aiming for sufficient resolution. 
#' @param nChains Number of chains to be run. Defaults to 1.
#' @param parallel Logical values that indicates whether chains should be run in parallel. Defaults to FALSE.
#' @param nWorkers If `parallel` is TRUE, then the number of workers used is the minimum of `nChains` and `nWorkers`.
#' Defaults to the number of available cores.
#' @param distance Distance metric. Either `euclidean` or `greatcirc`, that is, great circle distance.
#' Defaults to 'greatcirc', which is more accurate at larger scales.
#' @param numMCMCIter Number of Markov Chain Monte Carlo iterations. Defaults to 2000000.
#' @param numBurnIter Number of burn-in iterations to be discarded before sampling from posterior. Defaults to 1000000.
#' @param numThinIter Number of thinning iterations to be discarded between sampling from posterior. Defaults to 9999.
#' @param demes Matrix of coordinates of demes; optional, and usually unnecessary. Only in conjunction with `edges`.
#' @param edges Matrix of custom input grid; optional, and usually unnecessary. Only in conjunction with `demes`. 
#' @inheritParams eems.from.files
#' @returns A vector of output directories which can be given as input to [eems.plots()]. 
#' @references Petkova, D., Novembre, J. & Stephens, M.
#' Visualizing spatial population structure with estimated effective migration surfaces.
#' Nat Genet 48, 94–100 (2016). https://doi.org/10.1038/ng.3464
#' @examples
#' # The example puts the output in a temporary directory.
#' mcmcdir <- file.path(tempdir(), "eems_out")
#' dir.create(mcmcdir, showWarnings = FALSE)
#' mcmcpath <- file.path(mcmcdir,"example")
#'
#' # Run an example EEMS analysis with a small number of iterations to ensure quick termination. 
#' 
#' eems(
#'   freqs = ex.freqs,
#'   coords = ex.coords,
#'   mcmcpath = mcmcpath,   
#'   numMCMCIter = 200,
#'   numBurnIter = 100,
#'   numThinIter = 99,
#' )
#' # Delete the output directory to tidy up. 
#' unlink(mcmcdir, recursive = TRUE, force = TRUE)
#' @seealso \code{\link{eems.plots}, \link{eems.from.files}}
#' @export
eems <- function(
                 seed = unclass(Sys.time()),
                 coords,
                 freqs,
                 outer = NULL,
                 buffer = NULL,
                 mcmcpath,
                 demes = NULL,
                 edges = NULL,
                 prevpath = "",
                 nChains = 1,
                 parallel = FALSE,
                 nWorkers = NULL,
                 nDemes = NULL,
                 diploid = TRUE,
                 distance = "greatcircle",
                 numMCMCIter = 2000000,
                 numBurnIter = 1000000,
                 numThinIter = 9999,
                 mSeedsProposalS2 = 0.01,
                 qSeedsProposalS2 = 0.1,
                 mEffctProposalS2 = 0.1,
                 qEffctProposalS2 = 0.001,
                 mrateMuProposalS2 = 0.01,
                 qVoronoiPr = 0.25,
                 qrateShape = 0.001,
                 mrateShape = 0.001,
                 sigmaShape = 0.001,                            
                 qrateScale = 1.0,
                 mrateScale = 1.0,
                 sigmaScale = 1.0,
                 negBiProb = 0.67,
                 negBiSize = 10) {
    if (is.null(outer)) {
        outer <- outer.buffered(coords,buffer) 
    }
    nIndiv <- nrow(freqs)
    nSites <- ncol(freqs)
    if (is.null(nDemes)) {
        nDemes <- 6*nIndiv 
    }
    data_path <- file.path(tempdir(), "data_path")
    dir.create(data_path, showWarnings = FALSE)
    on.exit(unlink(data_path, recursive = TRUE, force = TRUE), add = TRUE)
    datapath <- file.path(data_path,"in")
    coordfile <- paste0(datapath, ".coord")
    diffsfile <- paste0(datapath,".diffs")
    outerfile <- paste0(datapath,".outer")
    write_for_eems(coords,coordfile)
    write_for_eems(geno2diffs(freqs),diffsfile)
    write_for_eems(outer,outerfile)
    write_for_eems(coords,coordfile)    
    testing = FALSE

    if (!is.null(demes) && !is.null(edges)) {
        gridpath <- datapath
        demesfile <- paste0(gridpath, ".demes")
        edgesfile <- paste0(gridpath, ".edges")
        write_for_eems(demes, demesfile)
        write_for_eems(edges, edgesfile)
    }
    else {
        gridpath = ""
    }
    p <- as.list(environment())
    if (parallel){
        if(is.null(nWorkers)) {
            nWorkers <- min(nChains, future::availableCores(constraints = "connections-16"))
        }
        old_future_plan <- future::plan()
        on.exit(future::plan(old_future_plan), add = TRUE)
        future::plan(future::multisession, workers = nWorkers)
        paths <- future.apply::future_sapply(1:nChains, function(i) {
        pf <- p
        pf$mcmcpath <- paste0(p$mcmcpath, "-chain", i)
        dir.create(pf$mcmcpath, showWarnings = FALSE)
        pf$seed <- p$seed + i
        capture.output(cpp_eems(pf), file = file.path(pf$mcmcpath, "eems.log"))
        return(pf$mcmcpath)
        }, future.seed = TRUE)
        return(paths)
    }
    else {
        sapply(1:nChains, function(i) {
        pf <- p
        pf$mcmcpath <- paste0(p$mcmcpath, "-chain", i)
        pf$seed <- p$seed + i
        cpp_eems(pf)
        return(pf$mcmcpath)
        })
    }
}

Try the reems package in your browser

Any scripts or data that you put into this service are public.

reems documentation built on May 6, 2026, 1:07 a.m.