#' Define a population
#'
#' Defines the parameters of a population (non-spatial and spatial).
#'
#' There are four ways to specify a spatial boundary: i) circular range
#' specified using a center coordinate and a radius, ii) polygon specified as a
#' list of two-dimensional vector coordinates, iii) polygon as in ii), but
#' defined (and named) using the \code{region} function, iv) with just a world
#' map specified (circular or polygon range parameters set to the default
#' \code{NULL} value), the population will be allowed to occupy the entire
#' landscape.
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and split times are rounded to the nearest
#' integer value.
#'
#' @param name Name of the population
#' @param time Time of the population's first appearance
#' @param N Number of individuals at the time of first appearance
#' @param parent Parent population object or \code{NULL} (which indicates that
#' the population does not have an ancestor, as it is the first population
#' in its "lineage")
#' @param map Object of the type \code{slendr_map} which defines the world
#' context (created using the \code{world} function). If the value
#' \code{FALSE} is provided, a non-spatial model will be run.
#' @param center Two-dimensional vector specifying the center of the circular
#' range
#' @param radius Radius of the circular range
#' @param polygon List of vector pairs, defining corners of the polygon range or
#' a geographic region of the class \code{slendr_region} from which the
#' polygon coordinates will be extracted (see the \code{region() function})
#' @param remove Time at which the population should be removed
#' @param intersect Intersect the population's boundaries with landscape
#' features?
#' @param competition,mating Maximum spatial competition and mating
#' choice distance
#' @param dispersal Standard deviation of the normal distribution of the
#' distance that offspring disperses from its parent
#' @param dispersal_fun Distribution function governing the dispersal of
#' offspring. One of "normal", "uniform", "cauchy", "exponential", or
#' "brownian" (in which vertical and horizontal displacements are drawn from a
#' normal distribution independently).
#' @param aquatic Is the species aquatic (\code{FALSE} by default, i.e.
#' terrestrial species)?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#' parameters such as name, time of appearance in the simulation, parent
#' population (if any), and its spatial parameters such as map and spatial
#' boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
population <- function(name, time, N, parent = NULL, map = FALSE,
center = NULL, radius = NULL, polygon = NULL,
remove = NULL, intersect = TRUE,
competition = NA, mating = NA, dispersal = NA,
dispersal_fun = NULL, aquatic = FALSE) {
if (!is.character(name) ||
length(name) != 1 ||
!grepl("^(?:[_\\p{L}\\p{Nl}])(?:[_\\p{L}\\p{Nl}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}])*$",
name, perl = TRUE))
stop("A population name must be a character scalar value which must also be\n",
"valid Python identifiers (a restriction of the msprime simulation engine)", call. = FALSE)
N <- as.integer(round(N))
time <- as.integer(round(time))
if (time < 1) stop("Split time must be a non-negative number", call. = FALSE)
if (N < 1) stop("Population size must be a non-negative number", call. = FALSE)
# if this population splits from a parental population, check that the parent
# really exists and that the split time make sense given the time of appearance
# of the parental population
if (!is.null(parent)) {
if (!inherits(parent, "slendr_pop"))
stop("Only slendr_pop objects can represent parental populations", call. = FALSE)
else
check_split_time(time, parent)
}
parent_remove <- attr(parent, "remove")
if (!is.null(parent) && parent_remove != -1) {
direction <- if (time > parent$time[1]) "forward" else "backward"
if (direction == "forward" && parent_remove <= time)
stop("Parent population will be removed before the split event", call. = FALSE)
else if (direction == "backward" && parent_remove >= time)
stop("Parent population will be removed before the split event", call. = FALSE)
}
if (!is.logical(map) && !inherits(map, "slendr_map"))
stop("A simulation landscape must be an object of the class slendr_map", call. = FALSE)
if (!is.null(parent) && is.logical(map) && map == FALSE)
map <- attr(parent, "map")
if (inherits(map, "slendr_map")) {
check_spatial_pkgs()
# define the population range as a simple geometry object
# and bind it with the annotation info into an sf object
if (is.null(polygon) && is.null(center) && is.null(radius)) {
geometry <- sf::st_geometry(map)
} else if (!is.null(polygon) & inherits(polygon, "slendr_region"))
geometry <- sf::st_geometry(polygon)
else
geometry <- define_boundary(map, center, radius, polygon)
pop <- sf::st_sf(
data.frame(pop = name, time = time, stringsAsFactors = FALSE),
geometry = geometry
)
sf::st_agr(pop) <- "constant"
attr(pop, "intersect") <- intersect
attr(pop, "aquatic") <- aquatic
} else
pop <- list(pop = name, time = time)
# when to clean up the population?
attr(pop, "remove") <- if (!is.null(remove)) remove else -1
# keep a record of the parent population
if (inherits(parent, "slendr_pop"))
attr(pop, "parent") <- parent
else if (is.null(parent))
attr(pop, "parent") <- "__pop_is_ancestor"
else
stop("Suspicious parent population", call. = FALSE)
attr(pop, "map") <- map
dispersal_fun <- kernel_fun(dispersal_fun)
# create the first population history event - population split
attr(pop, "history") <- list(data.frame(
pop = name,
event = "split",
time = time,
N = N,
competition = competition,
mating = mating,
dispersal = dispersal,
dispersal_fun = dispersal_fun
))
class(pop) <- set_class(pop, "pop")
if (!is.logical(map) && intersect && (nrow(intersect_features(pop)) == 0))
stop("The specified population boundary has no overlap with liveable landscape surface",
call. = FALSE)
pop
}
#' Move the population to a new location in a given amount of time
#'
#' This function defines a displacement of a population along a given trajectory
#' in a given time frame
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param trajectory List of two-dimensional vectors (longitude, latitude)
#' specifying the migration trajectory
#' @param start,end Start/end points of the population migration
#' @param overlap Minimum overlap between subsequent spatial boundaries
#' @param snapshots The number of intermediate snapshots (overrides the
#' \code{overlap} parameter)
#' @param verbose Show the progress of searching through the number of
#' sufficient snapshots?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#' parameters such as name, time of appearance in the simulation, parent
#' population (if any), and its spatial parameters such as map and spatial
#' boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
move <- function(pop, trajectory, end, start, overlap = 0.8, snapshots = NULL,
verbose = TRUE) {
if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)
check_spatial_pkgs()
check_event_time(c(start, end), pop)
check_removal_time(start, pop)
check_removal_time(end, pop)
if (!is.null(snapshots))
if (snapshots <= 0)
stop("The number of snapshots must be a non-negative integer", call. = FALSE)
if (!(overlap > 0 & overlap < 1))
stop("The required overlap between subsequent spatial maps must be a number between 0 and 1",
call. = FALSE)
map <- attr(pop, "map")
# take care of just a single destination point being specified
if (!is.list(trajectory) & length(trajectory) == 2)
trajectory <- list(trajectory)
# get the last available population boundary
region_start <- pop[nrow(pop), ]
region_start$time <- start
sf::st_agr(region_start) <- "constant"
# prepend the coordinates of the first region to the list of "checkpoints"
# along the way of the movement
start_coords <- sf::st_centroid(region_start)
if (has_crs(map)) {
source_crs <- "EPSG:4326"
target_crs <- sf::st_crs(pop)
start_coords <- sf::st_transform(start_coords, crs = source_crs)
}
start_coords <- sf::st_coordinates(start_coords)
checkpoints <- c(list(as.vector(start_coords)), trajectory)
traj <- sf::st_linestring(do.call(rbind, checkpoints)) %>% sf::st_sfc()
if (has_crs(map)) {
traj <- sf::st_sf(traj, crs = source_crs) %>%
sf::st_transform(crs = target_crs)
} else {
traj <- sf::st_sf(traj)
}
if (is.null(snapshots)) {
n <- 1
message("Iterative search for the minimum sufficient number of intermediate ",
"spatial snapshots, starting at ", n, ". This should only take a couple of ",
"seconds, but if you don't want to wait, you can set `snapshots = N` manually.")
} else
n <- snapshots
# iterate through the number of intermediate spatial boundaries to reach
# the required overlap between subsequent spatial maps
repeat {
traj_segments <- sf::st_segmentize(traj, sf::st_length(traj) / n)
traj_points <- sf::st_cast(traj_segments, "POINT")
traj_points_coords <- sf::st_coordinates(traj_points)
traj_diffs <- diff(traj_points_coords)
time_slices <- seq(start, end, length.out = nrow(traj_points))[-1]
traj_diffs <- cbind(traj_diffs, time = time_slices)
inter_regions <- list()
inter_regions[[1]] <- region_start
for (i in seq_len(nrow(traj_diffs))) {
shifted_region <- sf::st_geometry(inter_regions[[i]]) + traj_diffs[i, c("X", "Y")]
inter_regions[[i + 1]] <- sf::st_sf(
data.frame(
pop = region_start$pop,
time = traj_diffs[i, "time"],
stringsAsFactors = FALSE
),
geometry = shifted_region,
crs = sf::st_crs(inter_regions[[i]])
)
sf::st_agr(inter_regions[[i + 1]]) <- "constant"
}
if (!is.null(snapshots)) break
# calculate the overlap between subsequent spatial snapshots
overlaps <- compute_overlaps(do.call(rbind, inter_regions))
if (all(overlaps >= overlap)) {
message("The required ", sprintf("%.1f%%", 100 * overlap),
" overlap between subsequent spatial maps has been met")
break
} else {
n <- n + 1
if (verbose)
message("- testing ", n, " snapshots")
}
}
inter_regions <- rbind(pop, do.call(rbind, inter_regions))
sf::st_agr(inter_regions) <- "constant"
result <- copy_attributes(
inter_regions, pop,
c("map", "parent", "remove", "intersect", "aquatic", "history")
)
attr(result, "history") <- append(attr(result, "history"), list(data.frame(
pop = unique(region_start$pop),
event = "move",
tstart = start,
tend = end
)))
result
}
#' Expand the population range
#'
#' Expands the spatial population range by a specified distance in a given
#' time-window
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and times of events are rounded to the nearest
#' integer value.
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param by How many units of distance to expand by?
#' @param start,end When does the expansion start/end?
#' @param overlap Minimum overlap between subsequent spatial boundaries
#' @param snapshots The number of intermediate snapshots (overrides the
#' \code{overlap} parameter)
#' @param polygon Geographic region to restrict the expansion to
#' @param lock Maintain the same density of individuals. If
#' \code{FALSE} (the default), the number of individuals in the
#' population will not change. If \code{TRUE}, the number of
#' individuals simulated will be changed (increased or decreased)
#' appropriately, to match the new population range area.
#' @param verbose Report on the progress of generating intermediate spatial
#' boundaries?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#' parameters such as name, time of appearance in the simulation, parent
#' population (if any), and its spatial parameters such as map and spatial
#' boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
expand_range <- function(pop, by, end, start, overlap = 0.8, snapshots = NULL,
polygon = NULL, lock = FALSE, verbose = TRUE) {
if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)
check_spatial_pkgs()
start <- as.integer(round(start))
end <- as.integer(round(end))
shrink_or_expand(pop, by, end, start, overlap, snapshots, polygon, lock, verbose)
}
#' Shrink the population range
#'
#' Shrinks the spatial population range by a specified distance in a given
#' time-window
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and split times are rounded to the nearest
#' integer value.
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param by How many units of distance to shrink by?
#' @param start,end When does the boundary shrinking start/end?
#' @param overlap Minimum overlap between subsequent spatial boundaries
#' @param snapshots The number of intermediate snapshots (overrides the
#' \code{overlap} parameter)
#' @param lock Maintain the same density of individuals. If
#' \code{FALSE} (the default), the number of individuals in the
#' population will not change. If \code{TRUE}, the number of
#' individuals simulated will be changed (increased or decreased)
#' appropriately, to match the new population range area.
#' @param verbose Report on the progress of generating intermediate spatial
#' boundaries?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#' parameters such as name, time of appearance in the simulation, parent
#' population (if any), and its spatial parameters such as map and spatial
#' boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
shrink_range <- function(pop, by, end, start, overlap = 0.8, snapshots = NULL,
lock = FALSE, verbose = TRUE) {
if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)
check_spatial_pkgs()
shrink_or_expand(pop, -by, end, start, overlap, snapshots, polygon = NULL, lock, verbose)
}
#' Update the population range
#'
#' This function allows a more manual control of spatial map changes
#' in addition to the \code{expand} and \code{move} functions
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param time Time of the change
#' @param center Two-dimensional vector specifying the center of the
#' circular range
#' @param radius Radius of the circular range
#' @param polygon List of vector pairs, defining corners of the
#' polygon range (see also the \code{region} argument) or a
#' geographic region of the class \code{slendr_region} from which
#' the polygon coordinates will be extracted
#' @param lock Maintain the same density of individuals. If
#' \code{FALSE} (the default), the number of individuals in the
#' population will not change. If \code{TRUE}, the number of
#' individuals simulated will be changed (increased or decreased)
#' appropriately, to match the new population range area.
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#' parameters such as name, time of appearance in the simulation, parent
#' population (if any), and its spatial parameters such as map and spatial
#' boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
set_range <- function(pop, time, center = NULL, radius = NULL,
polygon = NULL, lock = FALSE) {
if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)
check_spatial_pkgs()
check_event_time(time, pop)
check_removal_time(time, pop)
map <- attr(pop, "map")
# define the population range as a simple geometry object
# and bind it with the annotation info into an sf object
if (!is.null(polygon) & inherits(polygon, "slendr_region"))
geometry <- sf::st_geometry(polygon)
else
geometry <- define_boundary(map, center, radius, polygon)
updated <- sf::st_sf(
data.frame(pop = unique(pop$pop), time = time, stringsAsFactors = FALSE),
geometry = geometry
)
# Let's not enforce this given that the point of this function is to allow
# full manual control over the boundary dynamics:
# if (compute_overlaps(rbind(updated, pop[nrow(pop), ])) < overlap)
# stop("Insufficient overlap with the last active spatial boundary (",
# "please adjust the new spatial boundary or adjust the `overlap = `",
# "parameter for less stringent checking)", call. = FALSE)
combined <- rbind(pop, updated)
sf::st_agr(combined) <- "constant"
result <- copy_attributes(
combined, pop,
c("map", "parent", "remove", "intersect", "aquatic", "history")
)
attr(result, "history") <- append(attr(result, "history"), list(data.frame(
event = "range",
time = time
)))
if (lock) {
areas <- slendr::area(result)$area
area_change <- areas[length(areas)] / areas[length(areas) - 1]
prev_N <- utils::tail(sapply(attributes(pop)$history, function(event) event$N), 1)
new_N <- round(area_change * prev_N)
result <- resize(result, N = new_N, time = time, how = "step")
}
result
}
#' Change the population size
#'
#' Resizes the population starting from the current value of \code{N}
#' individuals to the specified value
#'
#' In the case of exponential size change, if the final \code{N} is larger than
#' the current size, the population will be exponentially growing over the
#' specified time period until it reaches \code{N} individuals. If \code{N} is
#' smaller, the population will shrink exponentially.
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and split times are rounded to the nearest
#' integer value.
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param N Population size after the change
#' @param how How to change the population size (options are \code{"step"} or
#' \code{"exponential"})
#' @param time Time of the population size change
#' @param end End of the population size change period (used for exponential
#' change events)
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#' parameters such as name, time of appearance in the simulation, parent
#' population (if any), and its spatial parameters such as map and spatial
#' boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
resize <- function(pop, N, how, time, end = NULL) {
if (N < 1) stop("resize(): Only positive, non-zero population sizes are allowed", call. = FALSE)
N <- as.integer(round(N))
time <- as.integer(round(time))
if (!is.null(end)) end <- as.integer(round(end))
if (time == attr(pop, "history")[[1]]$time)
stop("Population resize cannot happen at the time the population is created", call. = FALSE)
if (!how %in% c("step", "exponential"))
stop("resize(): Only 'step' or 'exponential' are allowed as arguments for the 'how' parameter", call. = FALSE)
if (how == "exponential" & is.null(end))
stop("resize(): Start-end period of the exponential growth must be specified", call. = FALSE)
# get the last active population size
prev_N <- sapply(attr(pop, "history"), function(event) event$N) %>%
Filter(Negate(is.null), .) %>%
unlist %>%
utils::tail(1)
change <- data.frame(
pop = unique(pop$pop),
event = "resize",
how = how,
N = N,
prev_N = prev_N,
tresize = time
)
if (how == "step") {
check_event_time(time, pop)
check_removal_time(time, pop)
change$tend <- NA
} else {
check_event_time(c(time, end), pop)
check_removal_time(time, pop)
check_removal_time(end, pop)
change$tend <- end
}
attr(pop, "history") <- append(attr(pop, "history"), list(change))
pop
}
#' Change dispersal parameters
#'
#' Changes either the competition interactive distance, mating choice distance,
#' or the dispersal of offspring from its parent
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param time Time of the population size change
#' @param competition,mating Maximum spatial competition and mating
#' choice distance
#' @param dispersal Standard deviation of the normal distribution of the
#' distance that offspring disperses from its parent
#' @param dispersal_fun Distribution function governing the dispersal of
#' offspring. One of "normal", "uniform", "cauchy", "exponential", or
#' "brownian" (in which vertical and horizontal displacements are drawn from a
#' normal distribution independently).
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#' parameters such as name, time of appearance in the simulation, parent
#' population (if any), and its spatial parameters such as map and spatial
#' boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
set_dispersal <- function(pop, time, competition = NA, mating = NA, dispersal = NA,
dispersal_fun = NULL) {
if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)
check_spatial_pkgs()
if (is.na(competition) && is.na(mating) && is.na(dispersal) &&
is.null(dispersal_fun))
stop("At least one spatial interaction parameter must be specified", call. = FALSE)
if (any(c(competition, mating, dispersal) < 0, na.rm = TRUE))
stop("Spatial interaction parameters can only have positive, non-zero values", call. = FALSE)
dispersal_fun <- kernel_fun(dispersal_fun)
map <- attr(pop, "map")
if (!is.na(competition)) check_resolution(map, competition)
if (!is.na(mating)) check_resolution(map, mating)
if (!is.na(dispersal)) check_resolution(map, dispersal)
check_event_time(time, pop)
check_removal_time(time, pop)
change <- data.frame(
pop = unique(pop$pop),
event = "dispersal",
time = time,
competition = competition,
mating = mating,
dispersal = dispersal,
dispersal_fun = dispersal_fun
)
attr(pop, "history") <- append(attr(pop, "history"), list(change))
pop
}
#' Define a gene-flow event between two populations
#'
#' @param from,to Objects of the class \code{slendr_pop}
#' @param rate Scalar value in the range (0, 1] specifying the
#' proportion of migration over given time period
#' @param start,end Start and end of the gene-flow event
#' @param overlap Require spatial overlap between admixing
#' populations? (default \code{TRUE})
#'
#' @return Object of the class data.frame containing parameters of the specified
#' gene-flow event.
#'
#' @export
#'
#' @example man/examples/model_definition.R
gene_flow <- function(from, to, rate, start, end, overlap = TRUE) {
if (!inherits(from, "slendr_pop") || !inherits(to, "slendr_pop"))
stop("Both 'from' and 'to' arguments must be slendr population objects",
call. = FALSE)
# TODO: this needs some serious restructuring -- this function originated when slendr
# could only do spatial simulations and some consistency checks relied on spatial
# attributes; as a result, some checks are overlapping and/or redundant
if ((has_map(from) && !has_map(to)) || (!has_map(from) && has_map(to)))
stop("Both or neither populations must be spatial", call. = FALSE)
# make sure that gene flow has a sensible value between 0 and 1
if (rate < 0 || rate > 1)
stop("Gene-flow rate must be a numeric value between 0 and 1", call. = FALSE)
from_name <- unique(from$pop)
to_name <- unique(to$pop)
if (start == end)
stop(sprintf("Start and end time for the %s -> %s gene flow is the same (%s)",
from_name, to_name, start), call. = FALSE)
gf_dir <- if (start < end) "forward" else "backward"
from_dir <- time_direction(from)
to_dir <- time_direction(to)
direction <- unique(setdiff(c(gf_dir, from_dir, to_dir), "unknown"))
if (length(direction) > 1)
stop("Inconsistent time direction implied by populations and the gene flow event", call. = FALSE)
# make sure both participating populations are present at the start of the
# gene flow event (`check_present_time()` is reused from the sampling functionality)
tryCatch(
{
check_present_time(start, from, offset = 0, direction = direction)
check_present_time(end, from, offset = 0, direction = direction)
check_present_time(start, to, offset = 0, direction = direction)
check_present_time(end, to, offset = 0, direction = direction)
check_removal_time(start, from)
check_removal_time(end, from)
check_removal_time(start, to)
check_removal_time(end, to)
},
error = function(e) {
stop(sprintf("Both %s and %s must be already present within the gene-flow window %s-%s",
from_name, to_name, start, end), call. = FALSE)
}
)
if (has_map(from) && has_map(to)) {
comp <- if (direction == "forward") `<=` else `>=`
# get the last specified spatial maps before the geneflow time
region_from <- intersect_features(from[comp(from$time, start), ] %>% .[nrow(.), ])
region_to <- intersect_features(to[comp(to$time, start), ] %>% .[nrow(.), ])
if (nrow(region_from) == 0)
stop(sprintf("No spatial map defined for %s at/before the time %d",
from_name, start),
call. = FALSE)
if (nrow(region_to) == 0)
stop(sprintf("No spatial map defined for %s at/before the time %d",
to_name, start),
call. = FALSE)
# calculate the overlap of spatial ranges between source and target
region_overlap <- sf::st_intersection(region_from, region_to)
area_overlap <- as.numeric(sum(sf::st_area(region_overlap)))
if (overlap && area_overlap == 0) {
stop(sprintf("No overlap between population ranges of %s and %s at time %d.
Please check the spatial maps of both populations by running
`plot_map(%s, %s)` and adjust them accordingly. Alternatively, in case
this makes sense for your model, you can add `overlap = F` which
will instruct slendr to simulate gene flow without spatial overlap
between populations.",
from_name, to_name, start, deparse(base::substitute(from)),
deparse(base::substitute(to))), call. = FALSE)
}
} else
overlap <- FALSE
data.frame(
from_name = from_name,
to_name = to_name,
tstart = start,
tend = end,
rate = rate,
overlap = overlap,
stringsAsFactors = FALSE
)
}
#' Define a world map for all spatial operations
#'
#' Defines either an abstract geographic landscape (blank or containing
#' user-defined landscape) or using a real Earth cartographic data from the
#' Natural Earth project (<https://www.naturalearthdata.com>).
#'
#' @param xrange Two-dimensional vector specifying minimum and maximum
#' horizontal range ("longitude" if using real Earth cartographic data)
#' @param yrange Two-dimensional vector specifying minimum and maximum vertical
#' range ("latitude" if using real Earth cartographic data)
#' @param landscape Either "blank" (for blank abstract geography),
#' "naturalearth" (for real Earth geography) or an object of the class
#' \code{sf} defining abstract geographic features of the world
#' @param crs EPSG code of a coordinate reference system to use for spatial
#' operations. No CRS is assumed by default (\code{NULL}), implying an
#' abstract landscape not tied to any real-world geographic region (when
#' \code{landscape = "blank"} or when \code{landscape} is a custom-defined
#' geographic landscape), or implying WGS-84 (EPSG 4326) coordinate system
#' when a real Earth landscape was defined (\code{landscape =
#' "naturalearth"}).
#' @param scale If Natural Earth geographic data is used (i.e. \code{landscape =
#' "naturalearth"}), this parameter determines the resolution of the data
#' used. The value "small" corresponds to 1:110m data and is provided with the
#' package, values "medium" and "large" correspond to 1:50m and 1:10m
#' respectively and will be downloaded from the internet. Default value is
#' "small".
#'
#' @return Object of the class \code{slendr_map}, which encodes a standard
#' spatial object of the class \code{sf} with additional slendr-specific
#' attributes such as requested x-range and y-range.
#'
#' @export
#'
#' @example man/examples/spatial_functions.R
world <- function(xrange, yrange, landscape = "naturalearth", crs = NULL,
scale = c("small", "medium", "large")) {
check_spatial_pkgs()
if (length(xrange) != 2 || length(yrange) != 2)
stop("Horizontal (i.e. longitude) and vertical (i.e. latitude) must be\n",
"specified as two-dimensional vectors such as:\n",
" `xrange = c(x1, x2), yrange = c(y1, y2)`",
call. = FALSE)
if (is.character(landscape) && landscape == "naturalearth" && is.null(crs)) {
warning("No explicit coordinate reference system (CRS) was specified.\n",
"A default geographic CRS (EPSG:4326) will be used but please make\n",
"sure this is appropriate for your geographic region of interest.",
call. = FALSE)
crs <- 4326
}
if (inherits(landscape, "sf")) { # a landscape defined by the user
cropped_landscape <- sf::st_crop(
landscape,
xmin = xrange[1], xmax = xrange[2],
ymin = yrange[1], ymax = yrange[2]
)
map <- sf::st_sf(landscape = sf::st_geometry(cropped_landscape)) %>%
set_bbox(xmin = xrange[1], xmax = xrange[2], ymin = yrange[1], ymax = yrange[2])
} else if (landscape == "blank") { # an empty abstract landscape
map <- sf::st_sf(geometry = sf::st_sfc()) %>%
set_bbox(xmin = xrange[1], xmax = xrange[2], ymin = yrange[1], ymax = yrange[2])
} else if (landscape == "naturalearth") { # Natural Earth data vector landscape
scale <- match.arg(scale)
# the small scale Natural Earth data is bundled with slendr
ne_dir <- file.path(tempdir(), "naturalearth")
if (scale == "small") {
utils::unzip(system.file("naturalearth/ne_110m_land.zip", package = "slendr"),
exdir = ne_dir)
} else {
size <- ifelse(scale == "large", 10, 50)
file <- sprintf("ne_%sm_land.zip", size)
if (!dir.exists(ne_dir)) dir.create(ne_dir)
path <- file.path(ne_dir, file)
utils::download.file(
url = sprintf("https://naturalearth.s3.amazonaws.com/%sm_physical/%s", size, file),
destfile = path, quiet = TRUE
)
utils::unzip(path, exdir = ne_dir)
}
# TODO: this function uses internally rgdal, which is to be retired by 2023
# silence the deprecation warning for now, as it would only confuse the user
# and figure out a way to deal with this (either by providing a PR to the devs
# or hacking our own alternative)
suppressWarnings(
map_raw <- rnaturalearth::ne_load(
scale = scale, type = "land", category = "physical",
returnclass = "sf", destdir = ne_dir
)
)
sf::st_agr(map_raw) <- "constant"
# define boundary coordinates in the target CRS
crop_bounds <- define_zoom(xrange, yrange, "EPSG:4326")
# crop the map to the boundary coordinates
map_cropped <- tryCatch({
sf::st_crop(sf::st_make_valid(map_raw), crop_bounds)
}, error = function(cond) {
sf::st_crop(map_raw, crop_bounds)
})
# transform the map into the target CRS if needed
map <- sf::st_transform(map_cropped, crs)
} else {
stop("Landscape has to be either 'blank', 'naturalearth' or an object of the class 'sf'",
call. = FALSE)
}
sf::st_agr(map) <- "constant"
class(map) <- set_class(map, "map")
attr(map, "xrange") <- xrange
attr(map, "yrange") <- yrange
map
}
#' Define a geographic region
#'
#' Creates a geographic region (a polygon) on a given map and gives it
#' a name. This can be used to define objects which can be reused in
#' multiple places in a slendr script (such as \code{region} arguments
#' of \code{population}) without having to repeatedly define polygon
#' coordinates.
#'
#' @param name Name of the geographic region
#' @param map Object of the type \code{sf} which defines the map
#' @param center Two-dimensional vector specifying the center of the
#' circular range
#' @param radius Radius of the circular range
#' @param polygon List of vector pairs, defining corners of the
#' polygon range or a geographic region of the class
#' \code{slendr_region} from which the polygon coordinates will be
#' extracted (see the \code{region() function})
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#' spatial object of the class \code{sf} with several additional attributes
#' (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
#'
#' @example man/examples/spatial_functions.R
region <- function(name = NULL, map = NULL, center = NULL, radius = NULL, polygon = NULL) {
check_spatial_pkgs()
# for accurate circular areas see: https://stackoverflow.com/a/65280376
if (is.null(name)) name <- "unnamed region"
region <- sf::st_sf(
region = name,
geometry = define_boundary(map, center, radius, polygon)
) %>% sf::st_make_valid()
sf::st_agr(region) <- "constant"
# keep the map as an internal attribute
attr(region, "map") <- map
class(region) <- set_class(region, "region")
region
}
#' Reproject coordinates between coordinate systems
#'
#' Converts between coordinates on a compiled raster map (i.e. pixel
#' units) and different Geographic Coordinate Systems (CRS).
#'
#' @param x,y Coordinates in two dimensions (if missing, coordinates
#' are expected to be in the \code{data.frame} specified in the
#' \code{coords} parameter as columns "x" and "y")
#' @param from,to Either a CRS code accepted by GDAL, a valid integer
#' EPSG value, an object of class \code{crs}, the value "raster"
#' (converting from/to pixel coordinates), or "world" (converting
#' from/to whatever CRS is set for the underlying map)
#' @param coords data.frame-like object with coordinates in columns
#' "x" and "y"
#' @param model Object of the class \code{slendr_model}
#' @param add Add column coordinates to the input data.frame
#' \code{coords} (coordinates otherwise returned as a separate
#' object)?
#' @param input_prefix,output_prefix Input and output prefixes of data
#' frame columns with spatial coordinates
#'
#' @return Data.frame with converted two-dimensional coordinates given as input
#'
#' @examples
#' lon_lat_df <- data.frame(x = c(30, 0, 15), y = c(60, 40, 10))
#'
#' reproject(
#' from = "epsg:4326",
#' to = "epsg:3035",
#' coords = lon_lat_df,
#' add = TRUE # add converted [lon,lat] coordinates as a new column
#' )
#' @export
reproject <- function(from, to, x = NULL, y = NULL, coords = NULL, model = NULL,
add = FALSE, input_prefix = "", output_prefix = "new") {
check_spatial_pkgs()
if ((is.null(x) | is.null(y)) & is.null(coords))
stop("Coordinates for conversion are missing", call. = FALSE)
from_slendr <- !is.null(model)
if ((from == "raster" | to == "raster") & !from_slendr)
stop("Model object needs to be specified for conversion of raster coordinates", call. = FALSE)
if (add && is.null(coords))
stop("Converted coordinates can only be added to a provided data.frame", call. = FALSE)
inx <- paste0(input_prefix, "x"); iny <- paste0(input_prefix, "y")
outx <- paste0(output_prefix, "x"); outy <- paste0(output_prefix, "y")
if (!is.null(coords) & !all(c(inx, iny) %in% colnames(coords)))
stop("Columns '", inx, "' and '", iny, "' must be present in the input data.frame", call. = FALSE)
if (from_slendr) {
# dimension of the map in the projected CRS units
bbox <- sf::st_bbox(model$world)
map_dim <- c(bbox["xmax"] - bbox["xmin"], bbox["ymax"] - bbox["ymin"])
# dimension of the rasterized map in pixel units
# (x/y dimensions of PNGs are reversed)
raster_dim <- dim(png::readPNG(file.path(model$path, model$maps$path[1])))[2:1]
}
if (to == "world") to <- sf::st_crs(model$world)
if (from == "world" && has_crs(model$world)) from <- sf::st_crs(model$world)
if (is.null(coords)) {
df <- data.frame(x = x, y = y)
colnames(df) <- c(inx, iny)
} else
df <- coords[, c(inx, iny)]
if (from == "raster") {
# convert pixel coordinates to na sf object in world-based coordinates
df[[inx]] <- bbox["xmin"] + map_dim[1] * df[[inx]] / raster_dim[1]
df[[iny]] <- bbox["ymin"] + map_dim[2] * df[[iny]] / raster_dim[2]
point <- sf::st_as_sf(df, coords = c(inx, iny), crs = sf::st_crs(model$world))
} else {
# ... otherwise create a formal sf point object from the
# coordinates already given
point <- sf::st_as_sf(df, coords = c(inx, iny), crs = from)
}
if (to == "raster") {
if (has_crs(model$world))
point<- sf::st_transform(point, crs = sf::st_crs(model$world))
point_coords <- sf::st_coordinates(point)
newx <- abs((point_coords[, "X"] - bbox["xmin"])) / map_dim[1] * raster_dim[1]
newy <- abs((point_coords[, "Y"] - bbox["ymin"])) / map_dim[2] * raster_dim[2]
new_point <- data.frame(newx = round(as.vector(newx)), newy = round(as.vector(newy)))
colnames(new_point) <- c(outx, outy)
} else if (!is.na(to)) {
new_point <- sf::st_transform(point, crs = to) %>% sf::st_coordinates()
colnames(new_point) <- c(outx, outy)
} else {
new_point <- point %>% sf::st_coordinates()
colnames(new_point) <- c(outx, outy)
}
# if (nrow(new_point) == 1)
# return(as.vector(unlist(new_point)))
if (add) new_point <- cbind(coords, new_point) %>% dplyr::as_tibble()
new_point
}
#' Merge two spatial \code{slendr} objects into one
#'
#' @param x Object of the class \code{slendr}
#' @param y Object of the class \code{slendr}
#' @param name Optional name of the resulting geographic region. If missing,
#' name will be constructed from the function arguments.
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#' spatial object of the class \code{sf} with several additional attributes
#' (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
#'
#' @example man/examples/spatial_functions.R
join <- function(x, y, name = NULL) {
check_spatial_pkgs()
if (!inherits(x, "slendr")) x <- region(polygon = x)
if (!inherits(y, "slendr")) y <- region(polygon = y)
result <- sf::st_union(x, y)
result$region.1 <- NULL
if (is.null(name))
result$region <- sprintf("(%s and %s)", x$region, y$region)
else
result$region <- name
attrs <- if (!is.null(attr(x, "map"))) "map" else NULL
result <- copy_attributes(result, x, attrs)
sf::st_agr(result) <- "constant"
result
}
#' Generate the overlap of two \code{slendr} objects
#'
#' @inheritParams join
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#' spatial object of the class \code{sf} with several additional attributes
#' (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
overlap <- function(x, y, name = NULL) {
check_spatial_pkgs()
if (!inherits(x, "slendr")) x <- region(polygon = x)
if (!inherits(y, "slendr")) y <- region(polygon = y)
result <- sf::st_intersection(x, y)
if (nrow(result)) {
if (nrow(result) > 1)
result <- sf::st_sf(geometry = sf::st_combine(result))
if (is.null(name)) {
xname <- deparse(base::substitute(x))
yname <- deparse(base::substitute(y))
result$region <- sprintf("(overlap of %s and %s)", xname, yname)
} else
result$region <- name
result <- result[, c("region", "geometry")]
}
map <- attr(x, "map")
class(result) <- set_class(result, "region")
attr(result, "map") <- map
sf::st_agr(result) <- "constant"
result
}
#' Generate the difference between two \code{slendr} objects
#'
#' @inheritParams join
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#' spatial object of the class \code{sf} with several additional attributes
#' (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
subtract <- function(x, y, name = NULL) {
check_spatial_pkgs()
if (!inherits(x, "slendr")) x <- region(polygon = x)
if (!inherits(y, "slendr")) y <- region(polygon = y)
result <- sf::st_difference(x, y)
if (nrow(result)) {
if (is.null(name)) {
xname <- deparse(base::substitute(x))
yname <- deparse(base::substitute(y))
result$region <- sprintf("(%s minus %s)", xname, yname)
} else
result$region <- name
}
map <- attr(x, "map")
result <- result[, c("region", "geometry")]
class(result) <- set_class(result, "region")
attr(result, "map") <- map
sf::st_agr(result) <- "constant"
result
}
#' Calculate the distance between a pair of spatial boundaries
#'
#' @param x,y Objects of the class \code{slendr}
#' @param measure How to measure distance? This can be either \code{'border'}
#' (distance between the borders of \code{x} and \code{y}) or \code{'center'}
#' (distance between their centroids).
#' @param time Time closest to the spatial maps of \code{x} and \code{y} if they
#' represent \code{slendr_pop} population boundaries (ignored for general
#' \code{slendr_region} objects)
#'
#' @return If the coordinate reference system was specified, a distance in
#' projected units (i.e. meters) is returned. Otherwise the function returns a
#' normal Euclidean distance.
#'
#' @examples
#' # create two regions on a blank abstract landscape
#' region_a <- region("A", center = c(20, 50), radius = 20)
#' region_b <- region("B", center = c(80, 50), radius = 20)
#' plot_map(region_a, region_b)
#'
#' # compute the distance between the centers of both population ranges
#' distance(region_a, region_b, measure = "center")
#'
#' # compute the distance between the borders of both population ranges
#' distance(region_a, region_b, measure = "border")
#' @export
distance <- function(x, y, measure, time = NULL) {
check_spatial_pkgs()
if (!measure %in% c("center", "border"))
stop("Unknown distance measure method provided (must be either 'center' or 'border')", call. = FALSE)
if ((inherits(x, "slendr_pop") | inherits(y, "slendr_pop")) & is.null(time))
stop("Time of the nearest spatial snapshot must be be given when calculating
distance between population boundaries", call. = FALSE)
if (inherits(x, "slendr_pop")) x <- x[which.min(abs(x$time - time)), ]
if (inherits(y, "slendr_pop")) y <- y[which.min(abs(y$time - time)), ]
if (measure == "center") {
x <- sf::st_centroid(x)
y <- sf::st_centroid(y)
}
as.vector(sf::st_distance(x, y))
}
#' Calculate the area covered by the given slendr object
#'
#' @param x Object of the class \code{slendr}
#'
#' @return Area covered by the input object. If a \code{slendr_pop}
#' was given, a table with an population range area in each time
#' point will be returned. If a \code{slendr_region} or
#' \code{slendr_world} object was specified, the total area covered
#' by this object's spatial boundary will be returned.
#'
#' @examples
#' region_a <- region("A", center = c(20, 50), radius = 20)
#' region_b <- region("B", polygon = list(c(50, 40), c(70, 40), c(70, 60), c(50, 60)))
#' plot_map(region_a, region_b)
#'
#' # note that area won't be *exactly* equal to pi*r^2:
#' # https://stackoverflow.com/a/65280376
#' area(region_a)
#'
#' area(region_b)
#' @export
area <- function(x) {
check_spatial_pkgs()
if (!inherits(x, "slendr") & !inherits(x, "sf"))
stop("Input must be of the type 'slendr' or 'sf'", call. = FALSE)
if (inherits(x, "slendr_pop")) {
areas <- purrr::map_dbl(seq_len(nrow(x)), ~ as.numeric(sum(sf::st_area(sf::st_make_valid(x[.x, ])))))
times <- x$time
return(dplyr::tibble(time = times, area = areas))
} else if (inherits(x, "slendr_map") || inherits(x, "slendr_region"))
return(as.numeric(sum(sf::st_area(x))))
else
stop("Only the areas covered by slendr_(pop/region/world) objects can be computed",
call. = FALSE)
}
#' Define sampling events for a given set of populations
#'
#' Schedule sampling events at specified times and, optionally, a given set of
#' locations on a landscape
#'
#' If both times and locations are given, the the sampling will be scheduled on
#' each specified location in each given time-point. Note that for the
#' time-being, in the interest of simplicity, no sanity checks are performed on
#' the locations given except the restriction that the sampling points must fall
#' within the bounding box around the simulated world map. Other than that,
#' slendr will simply instruct its SLiM backend script to sample individuals as
#' close to the sampling points given as possible, regardless of whether those
#' points lie within a population spatial boundary at that particular moment of
#' time.
#'
#' @param model Object of the class \code{slendr_model}
#' @param times Integer vector of times (in model time units) at which to
#' schedule remembering of individuals in the tree-sequence
#' @param ... Lists of two elements (\code{slendr_pop} population object-<number
#' of individuals to sample), representing from which populations should how
#' many individuals be remembered at times given by \code{times}
#' @param locations List of vector pairs, defining two-dimensional coordinates
#' of locations at which the closest number of individuals from given
#' populations should be sampled. If \code{NULL} (the default), individuals
#' will be sampled randomly throughout their spatial boundary.
#' @param strict Should any occurence of a population not being present at a
#' given time result in an error? Default is \code{FALSE}, meaning that
#' invalid sampling times for any populations will be quietly ignored.
#'
#' @return Data frame with three columns: time of sampling, population to sample
#' from, how many individuals to sample
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE, quit = TRUE) # dependencies must be present
#' }
#' init_env()
#'
#' # load an example model with an already simulated tree sequence
#' path <- system.file("extdata/models/introgression", package = "slendr")
#' model <- read_model(path)
#'
#' # afr and eur objects would normally be created before slendr model compilation,
#' # but here we take them out of the model object already compiled for this
#' # example (in a standard slendr simulation pipeline, this wouldn't be necessary)
#' afr <- model$populations[["AFR"]]
#' eur <- model$populations[["EUR"]]
#'
#' # schedule the recording of 10 African and 100 European individuals from a
#' # given model at 20 ky, 10 ky, 5ky ago and at present-day (time 0)
#' schedule <- schedule_sampling(
#' model, times = c(20000, 10000, 5000, 0),
#' list(afr, 10), list(eur, 100)
#' )
#'
#' # the result of `schedule_sampling` is a simple data frame (note that the locations
#' # of sampling locations have `NA` values because the model is non-spatial)
#' schedule
#' @export
schedule_sampling <- function(model, times, ..., locations = NULL, strict = FALSE) {
if (!inherits(model, "slendr_model"))
stop("A slendr_model object must be specified", call. = FALSE)
times <- unique(as.integer(round(sort(times))))
samples <- list(...)
sample_pops <- purrr::map(samples, 1)
sample_counts <- purrr::map(samples, 2)
model_names <- vapply(model$populations, function(pop) pop$pop[1], FUN.VALUE = "character")
sample_names <- vapply(sample_pops, function(pop) pop$pop[1], FUN.VALUE = "character")
missing_names <- setdiff(sample_names, model_names)
if (length(missing_names))
stop("The following sampled populations are not part of the model: ",
paste(missing_names, collapse = ", "), call. = FALSE)
if (is.null(model$world) && !is.null(locations))
stop("Sampling locations may only be specified for a spatial model", call. = FALSE)
if (length(sample_pops) != length(sample_counts))
stop("Samples must be represented by pairs of <slendr_pop>-<n>", call. = FALSE)
if (!all(purrr::map_lgl(sample_pops, ~ inherits(.x, "slendr_pop"))))
stop("Objects to sample from must be of the class 'slendr_pop'", call. = FALSE)
if (!all(purrr::map_lgl(sample_counts, ~ .x == round(.x))))
stop("Sample counts must be integer numbers", call. = FALSE)
# make sure that all sampling times fall in the time window of the simulation itself
oldest_time <- get_oldest_time(model$populations, model$direction)
if ((model$direction == "forward" && (any(times > oldest_time + model$orig_length)
|| any(times < oldest_time))) ||
(model$direction == "backward" && (any(times < oldest_time - model$orig_length)
|| any(times > oldest_time)))) {
if (strict)
stop("A sampling event was scheduled outside of the simulation time window", call. = FALSE)
else if (model$direction == "forward")
times <- times[times <= oldest_time + model$orig_length]
else
times <- times[times >= oldest_time - model$orig_length]
}
schedule <- lapply(times, function(t) {
lapply(samples, function(s) {
pop <- s[[1]]
n <- s[[2]]
tryCatch(
{
check_removal_time(t, pop, direction = model$direction)
check_present_time(t, pop, direction = model$direction, offset = model$generation_time)
if (!is.infinite(n)) n <- as.integer(n)
dplyr::tibble(time = t, pop = pop$pop[1], n = n)
},
error = function(cond) {
if (!strict)
return(NULL)
else
stop("Cannot schedule sampling for '", pop$pop, "' at time ", t,
" because\nthe population will not yet be present in the simulation",
" at that\npoint. Consider running this function with `strict = FALSE`",
" which\nwill automatically retain only valid sampling events.",
call. = FALSE)
})
}) %>% do.call(rbind, .)
}) %>% do.call(rbind, .)
if (is.null(schedule)) {
warning("No valid sampling events were retained", call. = FALSE)
return(NULL)
}
if (!is.null(locations)) {
check_location_bounds(locations, model$world)
# convert the list of coordinate pairs into a data frame with x and y
# columns transformed from world-based coordinates into raster-based
# coordinates
locations_df <- dplyr::tibble(
orig_x = purrr::map_dbl(locations, ~ .[[1]]),
orig_y = purrr::map_dbl(locations, ~ .[[2]])
) %>%
reproject(
coords = ., model = model,
from = "world", to = "raster",
input_prefix = "orig_", output_prefix = "",
add = TRUE
) %>%
dplyr::rename(x_orig = orig_x, y_orig = orig_y)
schedule <- merge(schedule, locations_df)
} else {
schedule$x <- schedule$y <- schedule$x_orig <- schedule$y_orig <- NA
}
schedule
}
#' Activate slendr's own dedicated Python environment
#'
#' This function attempts to activate a dedicated slendr Miniconda Python
#' environment previously set up via \code{setup_env}.
#'
#' @param quiet Should informative messages be printed to the console? Default
#' is \code{FALSE}.
#'
#' @return No return value, called for side effects
#'
#' @export
init_env <- function(quiet = FALSE) {
if (!is_slendr_env_present())
stop("Could not activate slendr's Python environment because it is not\npresent ",
"on your system ('", PYTHON_ENV, "').\n\n",
"To set up a dedicated Python environment you first need to run setup_env().", call. = FALSE)
else {
reticulate::use_condaenv(PYTHON_ENV, required = TRUE)
# this is an awful workaround around the reticulate/Python bug which prevents
# import_from_path (see zzz.R) from working properly -- I'm getting nonsensical
# Error in py_call_impl(callable, dots$args, dots$keywords) :
# TypeError: integer argument expected, got float
# in places with no integer/float conversion in sight
#
# at least it prevents having to do things like:
# reticulate::py_run_string("def get_pedigree_ids(ts): return [ind.metadata['pedigree_id']
# for ind in ts.individuals()]")
# (moved from ts_read() here because this is a better place for loading our Python functions)
reticulate::source_python(file = system.file("pylib/pylib.py", package = "slendr"))
if (!reticulate::py_module_available("msprime") ||
!reticulate::py_module_available("tskit") ||
!reticulate::py_module_available("pyslim") ||
!reticulate::py_module_available("tspop")) {
stop("Python environment ", PYTHON_ENV, " has been found but it",
" does not appear to have msprime, tskit, pyslim and tspop modules all",
" installed. Perhaps the environment got corrupted somehow?",
" Running `clear_env()` and `setup_env()` to reset the slendr's Python",
" environment is recommended.", call. = FALSE)
} else {
# pylib <<- reticulate::import_from_path(
# "pylib",
# path = system.file("python", package = "slendr"),
# delay_load = TRUE
# )
if (!quiet)
message("The interface to all required Python modules has been activated.")
}
}
}
#' Setup a dedicated Python virtual environment for slendr
#'
#' This function will automatically download a Python miniconda distribution
#' dedicated to an R-Python interface. It will also create a slendr-specific
#' Python environment with all the required Python dependencies.
#'
#' @param quiet Should informative messages be printed to the console? Default
#' is \code{FALSE}.
#' @param agree Automatically agree to all questions?
#' @param pip Should pip be used instead of conda for installing slendr's Python
#' dependencies?
#'
#' @return No return value, called for side effects
#'
#' @export
setup_env <- function(quiet = FALSE, agree = FALSE, pip = FALSE) {
if (is_slendr_env_present()) {
message("A required slendr Python environment is already present. You can activate\n",
"it by calling init_env().")
} else {
if (agree)
answer <- 2
else
answer <- utils::menu(
c("No", "Yes"),
title = paste0(
"This function will install a completely isolated Miniconda Python distribution\n",
"just for slendr and create an environment with all required Python modules.\n",
"\nEverything will be installed into a completely separate location into an\n",
"isolated environment in an R library directory. This won't affect your other\n",
"Python installations at all. You can always wipe out the automatically created\n",
"environment by running clear_env().\n\n",
"Do you wish to proceed with the automated Python environment setup?")
)
if (answer == 2) {
message("=======================================================================")
message("Installing slendr's Python environment. Please wait until")
message("the installation procedure finishes. Do NOT interrupt the")
message("process while the installation is still running.")
message("======================================================================\n")
Sys.sleep(10)
if (!dir.exists(reticulate::miniconda_path()))
reticulate::install_miniconda()
# parse the Python env name back to the list of dependencies
# (the environment is defined in .onAttach(), and this makes sure the
# dependencies are defined all in one place)
versions <- PYTHON_ENV %>% gsub("-", "==", .) %>% strsplit("_") %>% .[[1]]
python_version <- gsub("Python==", "", versions[1])
package_versions <- versions[-1]
reticulate::conda_create(envname = PYTHON_ENV, python_version = python_version)
reticulate::use_condaenv(PYTHON_ENV, required = TRUE)
# # some Python dependencies are broken on M1 Mac architecture, so fallback
# # to pip in cases like this (otherwise use conda to avoid any potential
# # compilation issues such as missing libgsl)
# if (is.null(pip))
# pip <- all(Sys.info()[c("sysname", "machine")] == c("Darwin", "arm64"))
# tspop isn't available on conda and pyslim gives installation errors with conda
# on M-architecture Macs, so they will need to be installed by pip
# no matter the user's preference (given by the pip function argument value)
# TODO: check at some point later if tspop / pyslim are on conda for all systems
which_tspop_and_pyslim <- grepl("tspop|pyslim", package_versions)
reticulate::conda_install(envname = PYTHON_ENV, packages = package_versions[!which_tspop_and_pyslim], pip = pip)
reticulate::conda_install(envname = PYTHON_ENV, packages = c(package_versions[which_tspop_and_pyslim], "pyarrow"), pip = TRUE)
if (!quiet) {
message("======================================================================")
message("Python environment for slendr has been successfuly created, and ",
"the R\ninterface to msprime, tskit, and pyslim modules has been activated.\n")
message("In future sessions, activate this environment by calling init_env().")
message("=======================================================================")
}
} else
warning("Your Python environment is not set up correctly which means that the tree\n",
"sequence functionality of slendr will not work.", call. = FALSE)
}
}
#' Remove the automatically created slendr Python environment
#'
#' @param force Ask before deleting any environment?
#' @param all Should all (present and past) slendr Python environments be removed
#' (default is \code{FALSE}) or just the current environment?
#'
#' @return No return value, called for side effects
#'
#' @export
clear_env <- function(force = FALSE, all = FALSE) {
envs_to_delete <- reticulate::conda_list()$name %>%
grep("Python-.*_msprime-.*_tskit-.*_pyslim-.*", ., value = TRUE)
if (!all)
envs_to_delete <- grep(PYTHON_ENV, envs_to_delete, value = TRUE)
if (length(envs_to_delete) == 0) {
warning("No slendr Python environment has been found, nothing to delete.", call. = FALSE)
} else {
env_names <- paste(paste0("[#", seq_along(envs_to_delete), "]"), envs_to_delete)
cat("The following slendr-looking Python environments have been identified:\n\n")
cat(paste0(paste0(" - ", env_names, "\n"), collapse = ""))
if (!force) {
cat("\nYou will be asked for confirmation before any of them are deleted.\n")
if (all == TRUE) {
cat("To remove all of them at once, you can call `clear_env(force = TRUE, all = TRUE)`.\n")
}
cat("\n")
}
for (i in seq_along(envs_to_delete)) {
env <- envs_to_delete[i]
path <- reticulate::conda_list() %>%
dplyr::filter(grepl(env, name)) %>%
{ gsub("bin\\/python", "", .$python) }
if (force) {
answer <- 2
} else {
answer <- utils::menu(
c("No", "Yes"),
title = paste0("Do you want to delete Python environment [#", i, "]? (Hit ESC to cancel.)\n\n", path)
)
}
if (answer == 2) {
reticulate::conda_remove(env)
message("slendr environment '", env, "' has been sucessfully removed.")
} else {
message("slendr environment '", env, "' has not been removed.")
}
}
}
}
#' Get the name of the current slendr Python environment
#'
#' @return Name of the slendr Python environment
get_env <- function() {
PYTHON_ENV
}
#' Check that the active Python environment is setup for slendr
#'
#' This function inspects the Python environment which has been activated by the
#' reticulate package and prints the versions of all slendr Python dependencies
#' to the console.
#'
#' @param verbose Should a log message be printed? If \code{FALSE}, only a logical
#' value is returned (invisibly).
#'
#' @return Either \code{TRUE} (slendr Python environment is present) or \code{FALSE}
#' (slendr Python environment is not present).
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE, quit = TRUE) # dependencies must be present
#' }
#' init_env()
#' check_env()
#' @export
check_env <- function(verbose = TRUE) {
# if there is no Python available on user's system, don't immediately
# jump to installing miniconda (let's deal with that in setup_env())
orig_env <- Sys.getenv("RETICULATE_MINICONDA_ENABLED")
Sys.setenv(RETICULATE_MINICONDA_ENABLED = FALSE)
on.exit(Sys.setenv(RETICULATE_MINICONDA_ENABLED = orig_env))
py <- reticulate::py_discover_config()
has_tskit <- reticulate::py_module_available("tskit")
has_msprime <- reticulate::py_module_available("msprime")
has_pyslim <- reticulate::py_module_available("pyslim")
has_tspop <- reticulate::py_module_available("tspop")
# has_pylib <- !is.null(pylib)
if (has_tskit)
tskit_version <- paste("version", tskit[["_version"]]$tskit_version, "\u2713")
else
tskit_version <- "MISSING \u274C"
if (has_msprime)
msprime_version <- paste("version", msp[["_version"]]$version, "\u2713")
else
msprime_version <- "MISSING \u274C"
if (has_pyslim)
pyslim_version <- paste("version", pyslim$pyslim_version, "\u2713")
else
pyslim_version <- "MISSING \u274C"
if (has_tspop)
tspop_version <- paste("present \u2713")
else
tspop_version <- "MISSING \u274C"
# if (has_pylib)
# pylib_status <- "successfully loaded \u2713"
# else
# pylib_status <- "NOT LOADED \u274C"
if (verbose) {
cat("Summary of the currently active Python environment:\n\n")
cat("Python binary:", py$python, "\n")
cat("Python version:", py$version_string, "\n")
cat("\nslendr requirements:\n")
cat(" - tskit:", tskit_version, "\n")
cat(" - msprime:", msprime_version, "\n")
cat(" - pyslim:", pyslim_version, "\n")
cat(" - tspop:", tspop_version, "\n")
# cat(" - slendr module:", pylib_status, "\n")
}
if (!all(c(has_tskit, has_pyslim, has_msprime, has_tspop))) {
return_value <- FALSE
if (verbose)
cat("\nNote that due to the technical limitations of embedded Python,",
"if you\nwant to switch to another Python environment you will need",
"to restart\nyour R session first.\n")
# reference: https://github.com/rstudio/reticulate/issues/27#issuecomment-512256949
} else
return_value <- TRUE
if (verbose)
return(invisible(return_value))
else
return(return_value)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.