R/interface.R

Defines functions resize set_range shrink_range expand_range move population

Documented in expand_range move population resize set_range shrink_range

#' Define a population
#'
#' Defines the parameters of a population (non-spatial and spatial).
#'
#' There are four ways to specify a spatial boundary: i) circular range
#' specified using a center coordinate and a radius, ii) polygon specified as a
#' list of two-dimensional vector coordinates, iii) polygon as in ii), but
#' defined (and named) using the \code{region} function, iv) with just a world
#' map specified (circular or polygon range parameters set to the default
#' \code{NULL} value), the population will be allowed to occupy the entire
#' landscape.
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and split times are rounded to the nearest
#' integer value.
#'
#' @param name Name of the population
#' @param time Time of the population's first appearance
#' @param N Number of individuals at the time of first appearance
#' @param parent Parent population object or \code{NULL} (which indicates that
#'   the population does not have an ancestor, as it is the first population
#'   in its "lineage")
#' @param map Object of the type \code{slendr_map} which defines the world
#'   context (created using the \code{world} function). If the value
#'   \code{FALSE} is provided, a non-spatial model will be run.
#' @param center Two-dimensional vector specifying the center of the circular
#'   range
#' @param radius Radius of the circular range
#' @param polygon List of vector pairs, defining corners of the polygon range or
#'   a geographic region of the class \code{slendr_region} from which the
#'   polygon coordinates will be extracted (see the \code{region() function})
#' @param remove Time at which the population should be removed
#' @param intersect Intersect the population's boundaries with landscape
#'   features?
#' @param competition,mating Maximum spatial competition and mating
#'   choice distance
#' @param dispersal Standard deviation of the normal distribution of the
#'   distance that offspring disperses from its parent
#' @param dispersal_fun Distribution function governing the dispersal of
#'   offspring. One of "normal", "uniform", "cauchy", "exponential", or
#'   "brownian" (in which vertical and horizontal displacements are drawn from a
#'   normal distribution independently).
#' @param aquatic Is the species aquatic (\code{FALSE} by default, i.e.
#'   terrestrial species)?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#'   parameters such as name, time of appearance in the simulation, parent
#'   population (if any), and its spatial parameters such as map and spatial
#'   boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
population <- function(name, time, N, parent = NULL, map = FALSE,
                       center = NULL, radius = NULL, polygon = NULL,
                       remove = NULL, intersect = TRUE,
                       competition = NA, mating = NA, dispersal = NA,
                       dispersal_fun = NULL, aquatic = FALSE) {
  if (!is.character(name) ||
      length(name) != 1 ||
      !grepl("^(?:[_\\p{L}\\p{Nl}])(?:[_\\p{L}\\p{Nl}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}])*$",
            name, perl = TRUE))
    stop("A population name must be a character scalar value which must also be\n",
         "valid Python identifiers (a restriction of the msprime simulation engine)", call. = FALSE)

  N <- as.integer(round(N))
  time <- as.integer(round(time))

  if (time < 1) stop("Split time must be a non-negative number", call. = FALSE)
  if (N < 1) stop("Population size must be a non-negative number", call. = FALSE)

  # if this population splits from a parental population, check that the parent
  # really exists and that the split time make sense given the time of appearance
  # of the parental population
  if (!is.null(parent)) {
    if (!inherits(parent, "slendr_pop"))
      stop("Only slendr_pop objects can represent parental populations", call. = FALSE)
    else
      check_split_time(time, parent)
  }

  parent_remove <- attr(parent, "remove")
  if (!is.null(parent) && parent_remove != -1) {
    direction <- if (time > parent$time[1]) "forward" else "backward"
    if (direction == "forward" && parent_remove <= time)
      stop("Parent population will be removed before the split event", call. = FALSE)
    else if (direction == "backward" && parent_remove >= time)
      stop("Parent population will be removed before the split event", call. = FALSE)
  }

  if (!is.logical(map) && !inherits(map, "slendr_map"))
    stop("A simulation landscape must be an object of the class slendr_map", call. = FALSE)

  if (!is.null(parent) && is.logical(map) && map == FALSE)
    map <- attr(parent, "map")

  if (inherits(map, "slendr_map")) {
    check_spatial_pkgs()

    # define the population range as a simple geometry object
    # and bind it with the annotation info into an sf object
    if (is.null(polygon) && is.null(center) && is.null(radius)) {
      geometry <- sf::st_geometry(map)
    } else if (!is.null(polygon) & inherits(polygon, "slendr_region"))
      geometry <- sf::st_geometry(polygon)
    else
      geometry <- define_boundary(map, center, radius, polygon)

    pop <- sf::st_sf(
      data.frame(pop = name, time = time, stringsAsFactors = FALSE),
      geometry = geometry
    )
    sf::st_agr(pop) <- "constant"
    attr(pop, "intersect") <- intersect
    attr(pop, "aquatic") <- aquatic
  } else
    pop <- list(pop = name, time = time)

  # when to clean up the population?
  attr(pop, "remove") <- if (!is.null(remove)) remove else -1

  # keep a record of the parent population
  if (inherits(parent, "slendr_pop"))
    attr(pop, "parent") <- parent
  else if (is.null(parent))
    attr(pop, "parent") <- "__pop_is_ancestor"
  else
    stop("Suspicious parent population", call. = FALSE)

  attr(pop, "map") <- map

  dispersal_fun <- kernel_fun(dispersal_fun)

  # create the first population history event - population split
  attr(pop, "history") <- list(data.frame(
    pop =  name,
    event = "split",
    time = time,
    N = N,
    competition = competition,
    mating = mating,
    dispersal = dispersal,
    dispersal_fun = dispersal_fun
  ))

  class(pop) <- set_class(pop, "pop")

  if (!is.logical(map) && intersect && (nrow(intersect_features(pop)) == 0))
    stop("The specified population boundary has no overlap with liveable landscape surface",
         call. = FALSE)

  pop
}


#' Move the population to a new location in a given amount of time
#'
#' This function defines a displacement of a population along a given trajectory
#' in a given time frame
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param trajectory List of two-dimensional vectors (longitude, latitude)
#'   specifying the migration trajectory
#' @param start,end Start/end points of the population migration
#' @param overlap Minimum overlap between subsequent spatial boundaries
#' @param snapshots The number of intermediate snapshots (overrides the
#'   \code{overlap} parameter)
#' @param verbose Show the progress of searching through the number of
#'   sufficient snapshots?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#'   parameters such as name, time of appearance in the simulation, parent
#'   population (if any), and its spatial parameters such as map and spatial
#'   boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
move <- function(pop, trajectory, end, start, overlap = 0.8, snapshots = NULL,
                 verbose = TRUE) {
  if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)

  check_spatial_pkgs()

  check_event_time(c(start, end), pop)
  check_removal_time(start, pop)
  check_removal_time(end, pop)

  if (!is.null(snapshots))
    if (snapshots <= 0)
      stop("The number of snapshots must be a non-negative integer", call. = FALSE)

  if (!(overlap > 0 & overlap < 1))
    stop("The required overlap between subsequent spatial maps must be a number between 0 and 1",
         call. = FALSE)

  map <- attr(pop, "map")

  # take care of just a single destination point being specified
  if (!is.list(trajectory) & length(trajectory) == 2)
    trajectory <- list(trajectory)

  # get the last available population boundary
  region_start <- pop[nrow(pop), ]
  region_start$time <- start
  sf::st_agr(region_start) <- "constant"

  # prepend the coordinates of the first region to the list of "checkpoints"
  # along the way of the movement
  start_coords <- sf::st_centroid(region_start)

  if (has_crs(map)) {
    source_crs <- "EPSG:4326"
    target_crs <- sf::st_crs(pop)
    start_coords <- sf::st_transform(start_coords, crs = source_crs)
  }

  start_coords <- sf::st_coordinates(start_coords)
  checkpoints <- c(list(as.vector(start_coords)), trajectory)

  traj <- sf::st_linestring(do.call(rbind, checkpoints)) %>% sf::st_sfc()

  if (has_crs(map)) {
    traj <- sf::st_sf(traj, crs = source_crs) %>%
      sf::st_transform(crs = target_crs)
  } else {
    traj <- sf::st_sf(traj)
  }

  if (is.null(snapshots)) {
    n <- 1
    message("Iterative search for the minimum sufficient number of intermediate ",
            "spatial snapshots, starting at ", n, ". This should only take a couple of ",
            "seconds, but if you don't want to wait, you can set `snapshots = N` manually.")
  } else
    n <- snapshots

  # iterate through the number of intermediate spatial boundaries to reach
  # the required overlap between subsequent spatial maps
  repeat {
    traj_segments <- sf::st_segmentize(traj, sf::st_length(traj) / n)

    traj_points <- sf::st_cast(traj_segments, "POINT")
    traj_points_coords <- sf::st_coordinates(traj_points)
    traj_diffs <- diff(traj_points_coords)

    time_slices <- seq(start, end, length.out = nrow(traj_points))[-1]
    traj_diffs <- cbind(traj_diffs, time = time_slices)

    inter_regions <- list()
    inter_regions[[1]] <- region_start
    for (i in seq_len(nrow(traj_diffs))) {
      shifted_region <- sf::st_geometry(inter_regions[[i]]) + traj_diffs[i, c("X", "Y")]
      inter_regions[[i + 1]] <- sf::st_sf(
        data.frame(
          pop = region_start$pop,
          time = traj_diffs[i, "time"],
          stringsAsFactors = FALSE
        ),
        geometry = shifted_region,
        crs = sf::st_crs(inter_regions[[i]])
      )
      sf::st_agr(inter_regions[[i + 1]]) <- "constant"
    }

    if (!is.null(snapshots)) break

    # calculate the overlap between subsequent spatial snapshots
    overlaps <- compute_overlaps(do.call(rbind, inter_regions))

    if (all(overlaps >= overlap)) {
      message("The required ", sprintf("%.1f%%", 100 * overlap),
              " overlap between subsequent spatial maps has been met")
      break
    } else {
      n <- n + 1
      if (verbose)
        message("- testing ", n, " snapshots")
    }
  }

  inter_regions <- rbind(pop, do.call(rbind, inter_regions))
  sf::st_agr(inter_regions) <- "constant"

  result <- copy_attributes(
    inter_regions, pop,
    c("map", "parent", "remove", "intersect", "aquatic", "history")
  )

  attr(result, "history") <- append(attr(result, "history"), list(data.frame(
    pop =  unique(region_start$pop),
    event = "move",
    tstart = start,
    tend = end
  )))

  result
}


#' Expand the population range
#'
#' Expands the spatial population range by a specified distance in a given
#' time-window
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and times of events are rounded to the nearest
#' integer value.
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param by How many units of distance to expand by?
#' @param start,end When does the expansion start/end?
#' @param overlap Minimum overlap between subsequent spatial boundaries
#' @param snapshots The number of intermediate snapshots (overrides the
#'   \code{overlap} parameter)
#' @param polygon Geographic region to restrict the expansion to
#' @param lock Maintain the same density of individuals. If
#'   \code{FALSE} (the default), the number of individuals in the
#'   population will not change. If \code{TRUE}, the number of
#'   individuals simulated will be changed (increased or decreased)
#'   appropriately, to match the new population range area.
#' @param verbose Report on the progress of generating intermediate spatial
#'   boundaries?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#'   parameters such as name, time of appearance in the simulation, parent
#'   population (if any), and its spatial parameters such as map and spatial
#'   boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
expand_range <- function(pop, by, end, start, overlap = 0.8, snapshots = NULL,
                         polygon = NULL, lock = FALSE, verbose = TRUE) {
  if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)

  check_spatial_pkgs()

  start <- as.integer(round(start))
  end <- as.integer(round(end))

  shrink_or_expand(pop, by, end, start, overlap, snapshots, polygon, lock, verbose)
}


#' Shrink the population range
#'
#' Shrinks the spatial population range by a specified distance in a given
#' time-window
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and split times are rounded to the nearest
#' integer value.
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param by How many units of distance to shrink by?
#' @param start,end When does the boundary shrinking start/end?
#' @param overlap Minimum overlap between subsequent spatial boundaries
#' @param snapshots The number of intermediate snapshots (overrides the
#'   \code{overlap} parameter)
#' @param lock Maintain the same density of individuals. If
#'   \code{FALSE} (the default), the number of individuals in the
#'   population will not change. If \code{TRUE}, the number of
#'   individuals simulated will be changed (increased or decreased)
#'   appropriately, to match the new population range area.
#' @param verbose Report on the progress of generating intermediate spatial
#'   boundaries?
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#'   parameters such as name, time of appearance in the simulation, parent
#'   population (if any), and its spatial parameters such as map and spatial
#'   boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
shrink_range <- function(pop, by, end, start, overlap = 0.8, snapshots = NULL,
                         lock = FALSE, verbose = TRUE) {
  if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)

  check_spatial_pkgs()

  shrink_or_expand(pop, -by, end, start, overlap, snapshots, polygon = NULL, lock, verbose)
}


#' Update the population range
#'
#' This function allows a more manual control of spatial map changes
#' in addition to the \code{expand} and \code{move} functions
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param time Time of the change
#' @param center Two-dimensional vector specifying the center of the
#'   circular range
#' @param radius Radius of the circular range
#' @param polygon List of vector pairs, defining corners of the
#'   polygon range (see also the \code{region} argument) or a
#'   geographic region of the class \code{slendr_region} from which
#'   the polygon coordinates will be extracted
#' @param lock Maintain the same density of individuals. If
#'   \code{FALSE} (the default), the number of individuals in the
#'   population will not change. If \code{TRUE}, the number of
#'   individuals simulated will be changed (increased or decreased)
#'   appropriately, to match the new population range area.
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#'   parameters such as name, time of appearance in the simulation, parent
#'   population (if any), and its spatial parameters such as map and spatial
#'   boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
set_range <- function(pop, time, center = NULL, radius = NULL,
                      polygon = NULL, lock = FALSE) {
  if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)

  check_spatial_pkgs()

  check_event_time(time, pop)
  check_removal_time(time, pop)

  map <- attr(pop, "map")

  # define the population range as a simple geometry object
  # and bind it with the annotation info into an sf object
  if (!is.null(polygon) & inherits(polygon, "slendr_region"))
    geometry <- sf::st_geometry(polygon)
  else
    geometry <- define_boundary(map, center, radius, polygon)

  updated <- sf::st_sf(
    data.frame(pop = unique(pop$pop), time = time, stringsAsFactors = FALSE),
    geometry = geometry
  )

  # Let's not enforce this given that the point of this function is to allow
  # full manual control over the boundary dynamics:
  # if (compute_overlaps(rbind(updated, pop[nrow(pop), ])) < overlap)
  #   stop("Insufficient overlap with the last active spatial boundary (",
  #        "please adjust the new spatial boundary or adjust the `overlap = `",
  #        "parameter for less stringent checking)", call. = FALSE)

  combined <- rbind(pop, updated)
  sf::st_agr(combined) <- "constant"

  result <- copy_attributes(
    combined, pop,
    c("map", "parent", "remove", "intersect", "aquatic", "history")
  )

  attr(result, "history") <- append(attr(result, "history"), list(data.frame(
    event = "range",
    time = time
  )))

  if (lock) {
    areas <- slendr::area(result)$area
    area_change <- areas[length(areas)] / areas[length(areas) - 1]
    prev_N <- utils::tail(sapply(attributes(pop)$history, function(event) event$N), 1)
    new_N <- round(area_change * prev_N)
    result <- resize(result, N = new_N, time = time, how = "step")
  }

  result
}


#' Change the population size
#'
#' Resizes the population starting from the current value of \code{N}
#' individuals to the specified value
#'
#' In the case of exponential size change, if the final \code{N} is larger than
#' the current size, the population will be exponentially growing over the
#' specified time period until it reaches \code{N} individuals. If \code{N} is
#' smaller, the population will shrink exponentially.
#'
#' Note that because slendr models have to accomodate both SLiM and msprime
#' back ends, population sizes and split times are rounded to the nearest
#' integer value.
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param N Population size after the change
#' @param how How to change the population size (options are \code{"step"} or
#'   \code{"exponential"})
#' @param time Time of the population size change
#' @param end End of the population size change period (used for exponential
#'   change events)
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#'   parameters such as name, time of appearance in the simulation, parent
#'   population (if any), and its spatial parameters such as map and spatial
#'   boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
resize <- function(pop, N, how, time, end = NULL) {
  if (N < 1) stop("resize(): Only positive, non-zero population sizes are allowed", call. = FALSE)

  N <- as.integer(round(N))
  time <- as.integer(round(time))
  if (!is.null(end)) end <- as.integer(round(end))

  if (time == attr(pop, "history")[[1]]$time)
    stop("Population resize cannot happen at the time the population is created", call. = FALSE)

  if (!how %in% c("step", "exponential"))
    stop("resize(): Only 'step' or 'exponential' are allowed as arguments for the 'how' parameter", call. = FALSE)

  if (how == "exponential" & is.null(end))
    stop("resize(): Start-end period of the exponential growth must be specified", call. = FALSE)

  # get the last active population size
  prev_N <- sapply(attr(pop, "history"), function(event) event$N) %>%
    Filter(Negate(is.null), .) %>%
    unlist %>%
    utils::tail(1)

  change <- data.frame(
    pop =  unique(pop$pop),
    event = "resize",
    how = how,
    N = N,
    prev_N = prev_N,
    tresize = time
  )

  if (how == "step") {
    check_event_time(time, pop)
    check_removal_time(time, pop)
    change$tend <- NA
  } else {
    check_event_time(c(time, end), pop)
    check_removal_time(time, pop)
    check_removal_time(end, pop)
    change$tend <- end
  }

  attr(pop, "history") <- append(attr(pop, "history"), list(change))

  pop
}


#' Change dispersal parameters
#'
#' Changes either the competition interactive distance, mating choice distance,
#' or the dispersal of offspring from its parent
#'
#' @param pop Object of the class \code{slendr_pop}
#' @param time Time of the population size change
#' @param competition,mating Maximum spatial competition and mating
#'   choice distance
#' @param dispersal Standard deviation of the normal distribution of the
#'   distance that offspring disperses from its parent
#' @param dispersal_fun Distribution function governing the dispersal of
#'   offspring. One of "normal", "uniform", "cauchy", "exponential", or
#'   "brownian" (in which vertical and horizontal displacements are drawn from a
#'   normal distribution independently).
#'
#' @return Object of the class \code{slendr_pop}, which contains population
#'   parameters such as name, time of appearance in the simulation, parent
#'   population (if any), and its spatial parameters such as map and spatial
#'   boundary.
#'
#' @export
#'
#' @example man/examples/model_definition.R
set_dispersal <- function(pop, time, competition = NA, mating = NA, dispersal = NA,
                          dispersal_fun = NULL) {
  if (!has_map(pop)) stop("This operation is only allowed for spatial models", call. = FALSE)

  check_spatial_pkgs()

  if (is.na(competition) && is.na(mating) && is.na(dispersal) &&
      is.null(dispersal_fun))
    stop("At least one spatial interaction parameter must be specified", call. = FALSE)

  if (any(c(competition, mating, dispersal) < 0, na.rm = TRUE))
    stop("Spatial interaction parameters can only have positive, non-zero values", call. = FALSE)

  dispersal_fun <- kernel_fun(dispersal_fun)

  map <- attr(pop, "map")
  if (!is.na(competition)) check_resolution(map, competition)
  if (!is.na(mating)) check_resolution(map, mating)
  if (!is.na(dispersal)) check_resolution(map, dispersal)

  check_event_time(time, pop)
  check_removal_time(time, pop)

  change <- data.frame(
    pop =  unique(pop$pop),
    event = "dispersal",
    time = time,
    competition = competition,
    mating = mating,
    dispersal = dispersal,
    dispersal_fun = dispersal_fun
  )

  attr(pop, "history") <- append(attr(pop, "history"), list(change))

  pop
}


#' Define a gene-flow event between two populations
#'
#' @param from,to Objects of the class \code{slendr_pop}
#' @param rate Scalar value in the range (0, 1] specifying the
#'   proportion of migration over given time period
#' @param start,end Start and end of the gene-flow event
#' @param overlap Require spatial overlap between admixing
#'   populations?  (default \code{TRUE})
#'
#' @return Object of the class data.frame containing parameters of the specified
#'   gene-flow event.
#'
#' @export
#'
#' @example man/examples/model_definition.R
gene_flow <- function(from, to, rate, start, end, overlap = TRUE) {
  if (!inherits(from, "slendr_pop") || !inherits(to, "slendr_pop"))
    stop("Both 'from' and 'to' arguments must be slendr population objects",
         call. = FALSE)

  # TODO: this needs some serious restructuring -- this function originated when slendr
  # could only do spatial simulations and some consistency checks relied on spatial
  # attributes; as a result, some checks are overlapping and/or redundant

  if ((has_map(from) && !has_map(to)) || (!has_map(from) && has_map(to)))
    stop("Both or neither populations must be spatial", call. = FALSE)

  # make sure that gene flow has a sensible value between 0 and 1
  if (rate < 0 || rate > 1)
    stop("Gene-flow rate must be a numeric value between 0 and 1", call. = FALSE)

  from_name <- unique(from$pop)
  to_name <- unique(to$pop)

  if (start == end)
    stop(sprintf("Start and end time for the %s -> %s gene flow is the same (%s)",
                 from_name, to_name, start), call. = FALSE)

  gf_dir <- if (start < end) "forward" else "backward"
  from_dir <- time_direction(from)
  to_dir <- time_direction(to)
  direction <- unique(setdiff(c(gf_dir, from_dir, to_dir), "unknown"))

  if (length(direction) > 1)
    stop("Inconsistent time direction implied by populations and the gene flow event", call. = FALSE)

  # make sure both participating populations are present at the start of the
  # gene flow event (`check_present_time()` is reused from the sampling functionality)
  tryCatch(
    {
      check_present_time(start, from, offset = 0, direction = direction)
      check_present_time(end, from, offset = 0, direction = direction)
      check_present_time(start, to, offset = 0, direction = direction)
      check_present_time(end, to, offset = 0, direction = direction)

      check_removal_time(start, from)
      check_removal_time(end, from)
      check_removal_time(start, to)
      check_removal_time(end, to)
    },
    error = function(e) {
      stop(sprintf("Both %s and %s must be already present within the gene-flow window %s-%s",
                   from_name, to_name, start, end), call. = FALSE)
    }
  )

  if (has_map(from) && has_map(to)) {
    comp <- if (direction == "forward") `<=` else `>=`

    # get the last specified spatial maps before the geneflow time
    region_from <- intersect_features(from[comp(from$time, start), ] %>% .[nrow(.), ])
    region_to <- intersect_features(to[comp(to$time, start), ] %>% .[nrow(.), ])

    if (nrow(region_from) == 0)
      stop(sprintf("No spatial map defined for %s at/before the time %d",
                   from_name, start),
           call. = FALSE)
    if (nrow(region_to) == 0)
      stop(sprintf("No spatial map defined for %s at/before the time %d",
                   to_name, start),
           call. = FALSE)

    # calculate the overlap of spatial ranges between source and target
    region_overlap <- sf::st_intersection(region_from, region_to)
    area_overlap <- as.numeric(sum(sf::st_area(region_overlap)))

    if (overlap && area_overlap == 0) {
      stop(sprintf("No overlap between population ranges of %s and %s at time %d.

  Please check the spatial maps of both populations by running
  `plot_map(%s, %s)` and adjust them accordingly. Alternatively, in case
  this makes sense for your model, you can add `overlap = F` which
  will instruct slendr to simulate gene flow without spatial overlap
  between populations.",
  from_name, to_name, start, deparse(substitute(from)),
  deparse(substitute(to))), call. = FALSE)
    }
  } else
    overlap <- FALSE

  data.frame(
    from_name = from_name,
    to_name = to_name,
    tstart = start,
    tend = end,
    rate = rate,
    overlap = overlap,
    stringsAsFactors = FALSE
  )
}


#' Define a world map for all spatial operations
#'
#' Defines either an abstract geographic landscape (blank or containing
#' user-defined landscape) or using a real Earth cartographic data from the
#' Natural Earth project (<https://www.naturalearthdata.com>).
#'
#' @param xrange Two-dimensional vector specifying minimum and maximum
#'   horizontal range ("longitude" if using real Earth cartographic data)
#' @param yrange Two-dimensional vector specifying minimum and maximum vertical
#'   range ("latitude" if using real Earth cartographic data)
#' @param landscape Either "blank" (for blank abstract geography),
#'   "naturalearth" (for real Earth geography) or an object of the class
#'   \code{sf} defining abstract geographic features of the world
#' @param crs EPSG code of a coordinate reference system to use for spatial
#'   operations. No CRS is assumed by default (\code{NULL}), implying an
#'   abstract landscape not tied to any real-world geographic region (when
#'   \code{landscape = "blank"} or when \code{landscape} is a custom-defined
#'   geographic landscape), or implying WGS-84 (EPSG 4326) coordinate system
#'   when a real Earth landscape was defined (\code{landscape =
#'   "naturalearth"}).
#' @param scale If Natural Earth geographic data is used (i.e. \code{landscape =
#'   "naturalearth"}), this parameter determines the resolution of the data
#'   used. The value "small" corresponds to 1:110m data and is provided with the
#'   package, values "medium" and "large" correspond to 1:50m and 1:10m
#'   respectively and will be downloaded from the internet. Default value is
#'   "small".
#'
#' @return Object of the class \code{slendr_map}, which encodes a standard
#'   spatial object of the class \code{sf} with additional slendr-specific
#'   attributes such as requested x-range and y-range.
#'
#' @export
#'
#' @example man/examples/spatial_functions.R
world <- function(xrange, yrange, landscape = "naturalearth", crs = NULL,
                  scale = c("small", "medium", "large")) {
  check_spatial_pkgs()

  if (length(xrange) != 2 || length(yrange) != 2)
    stop("Horizontal (i.e. longitude) and vertical (i.e. latitude) must be\n",
         "specified as two-dimensional vectors such as:\n",
         "    `xrange = c(x1, x2), yrange = c(y1, y2)`",
         call. = FALSE)

  if (is.character(landscape) && landscape == "naturalearth" && is.null(crs)) {
    warning("No explicit coordinate reference system (CRS) was specified.\n",
            "A default geographic CRS (EPSG:4326) will be used but please make\n",
            "sure this is appropriate for your geographic region of interest.",
            call. = FALSE)
    crs <- 4326
  }

  if (inherits(landscape, "sf")) { # a landscape defined by the user
    cropped_landscape <- sf::st_crop(
      landscape,
      xmin = xrange[1], xmax = xrange[2],
      ymin = yrange[1], ymax = yrange[2]
    )
    map <- sf::st_sf(landscape = sf::st_geometry(cropped_landscape)) %>%
      set_bbox(xmin = xrange[1], xmax = xrange[2], ymin = yrange[1], ymax = yrange[2])
  } else if (landscape == "blank") { # an empty abstract landscape
    map <- sf::st_sf(geometry = sf::st_sfc()) %>%
      set_bbox(xmin = xrange[1], xmax = xrange[2], ymin = yrange[1], ymax = yrange[2])
  } else if (landscape == "naturalearth") {  # Natural Earth data vector landscape
    scale <- match.arg(scale)
    # the small scale Natural Earth data is bundled with slendr
    ne_dir <- file.path(tempdir(), "naturalearth")
    if (scale == "small") {
      utils::unzip(system.file("naturalearth/ne_110m_land.zip", package = "slendr"),
                   exdir = ne_dir)
    } else {
      size <- ifelse(scale == "large", 10, 50)
      file <- sprintf("ne_%sm_land.zip", size)
      if (!dir.exists(ne_dir)) dir.create(ne_dir)
      path <- file.path(ne_dir, file)
      utils::download.file(
        url = sprintf("https://naturalearth.s3.amazonaws.com/%sm_physical/%s", size, file),
        destfile = path, quiet = TRUE
      )
      utils::unzip(path, exdir = ne_dir)
    }

    # TODO: this function uses internally rgdal, which is to be retired by 2023
    # silence the deprecation warning for now, as it would only confuse the user
    # and figure out a way to deal with this (either by providing a PR to the devs
    # or hacking our own alternative)
    suppressWarnings(
      map_raw <- rnaturalearth::ne_load(
        scale = scale, type = "land", category = "physical",
        returnclass = "sf", destdir = ne_dir
      )
    )
    sf::st_agr(map_raw) <- "constant"

    # define boundary coordinates in the target CRS
    crop_bounds <- define_zoom(xrange, yrange, "EPSG:4326")

    # crop the map to the boundary coordinates
    map_cropped <- tryCatch({
      sf::st_crop(sf::st_make_valid(map_raw), crop_bounds)
    }, error = function(cond) {
      sf::st_crop(map_raw, crop_bounds)
    })

    # transform the map into the target CRS if needed
    map <- sf::st_transform(map_cropped, crs)
  } else {
    stop("Landscape has to be either 'blank', 'naturalearth' or an object of the class 'sf'",
         call. = FALSE)
  }

  sf::st_agr(map) <- "constant"

  class(map) <- set_class(map, "map")
  attr(map, "xrange") <- xrange
  attr(map, "yrange") <- yrange

  map
}


#' Define a geographic region
#'
#' Creates a geographic region (a polygon) on a given map and gives it
#' a name. This can be used to define objects which can be reused in
#' multiple places in a slendr script (such as \code{region} arguments
#' of \code{population}) without having to repeatedly define polygon
#' coordinates.
#'
#' @param name Name of the geographic region
#' @param map Object of the type \code{sf} which defines the map
#' @param center Two-dimensional vector specifying the center of the
#'   circular range
#' @param radius Radius of the circular range
#' @param polygon List of vector pairs, defining corners of the
#'   polygon range or a geographic region of the class
#'   \code{slendr_region} from which the polygon coordinates will be
#'   extracted (see the \code{region() function})
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#'   spatial object of the class \code{sf} with several additional attributes
#'   (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
#'
#' @example man/examples/spatial_functions.R
region <- function(name = NULL, map = NULL, center = NULL, radius = NULL, polygon = NULL) {
  check_spatial_pkgs()

  # for accurate circular areas see: https://stackoverflow.com/a/65280376
  if (is.null(name)) name <- "unnamed region"
  region <- sf::st_sf(
    region = name,
    geometry = define_boundary(map, center, radius, polygon)
  ) %>% sf::st_make_valid()
  sf::st_agr(region) <- "constant"

  # keep the map as an internal attribute
  attr(region, "map") <- map

  class(region) <- set_class(region, "region")
  region
}


#' Reproject coordinates between coordinate systems
#'
#' Converts between coordinates on a compiled raster map (i.e. pixel
#' units) and different Geographic Coordinate Systems (CRS).
#'
#' @param x,y Coordinates in two dimensions (if missing, coordinates
#'   are expected to be in the \code{data.frame} specified in the
#'   \code{coords} parameter as columns "x" and "y")
#' @param from,to Either a CRS code accepted by GDAL, a valid integer
#'   EPSG value, an object of class \code{crs}, the value "raster"
#'   (converting from/to pixel coordinates), or "world" (converting
#'   from/to whatever CRS is set for the underlying map)
#' @param coords data.frame-like object with coordinates in columns
#'   "x" and "y"
#' @param model Object of the class \code{slendr_model}
#' @param add Add column coordinates to the input data.frame
#'   \code{coords} (coordinates otherwise returned as a separate
#'   object)?
#' @param input_prefix,output_prefix Input and output prefixes of data
#'   frame columns with spatial coordinates
#'
#' @return Data.frame with converted two-dimensional coordinates given as input
#'
#' @examples
#' lon_lat_df <- data.frame(x = c(30, 0, 15), y = c(60, 40, 10))
#'
#' reproject(
#'   from = "epsg:4326",
#'   to = "epsg:3035",
#'   coords = lon_lat_df,
#'   add = TRUE # add converted [lon,lat] coordinates as a new column
#' )
#' @export
reproject <- function(from, to, x = NULL, y = NULL, coords = NULL, model = NULL,
                      add = FALSE, input_prefix = "", output_prefix = "new") {
  check_spatial_pkgs()

  if ((is.null(x) | is.null(y)) & is.null(coords))
    stop("Coordinates for conversion are missing", call. = FALSE)

  from_slendr <- !is.null(model)

  if ((from == "raster" | to == "raster") & !from_slendr)
    stop("Model object needs to be specified for conversion of raster coordinates", call. = FALSE)

  if (add && is.null(coords))
    stop("Converted coordinates can only be added to a provided data.frame", call. = FALSE)

  inx <- paste0(input_prefix, "x"); iny <- paste0(input_prefix, "y")
  outx <- paste0(output_prefix, "x"); outy <- paste0(output_prefix, "y")
  if (!is.null(coords) & !all(c(inx, iny) %in% colnames(coords)))
    stop("Columns '", inx, "' and '", iny, "' must be present in the input data.frame", call. = FALSE)

  if (from_slendr) {
    # dimension of the map in the projected CRS units
    bbox <- sf::st_bbox(model$world)
    map_dim <- c(bbox["xmax"] - bbox["xmin"], bbox["ymax"] - bbox["ymin"])

    # dimension of the rasterized map in pixel units
    # (x/y dimensions of PNGs are reversed)
    raster_dim <- dim(png::readPNG(file.path(model$path, model$maps$path[1])))[2:1]
  }

  if (to == "world") to <- sf::st_crs(model$world)
  if (from == "world" && has_crs(model$world)) from <- sf::st_crs(model$world)

  if (is.null(coords)) {
    df <- data.frame(x = x, y = y)
    colnames(df) <- c(inx, iny)
  } else
    df <- coords[, c(inx, iny)]

  if (from == "raster") {
    # convert pixel coordinates to na sf object in world-based coordinates
    df[[inx]] <- bbox["xmin"] + map_dim[1] * df[[inx]] / raster_dim[1]
    df[[iny]] <- bbox["ymin"] + map_dim[2] * df[[iny]] / raster_dim[2]
    point <- sf::st_as_sf(df, coords = c(inx, iny), crs = sf::st_crs(model$world))
  } else {
    # ... otherwise create a formal sf point object from the
    # coordinates already given
    point <- sf::st_as_sf(df, coords = c(inx, iny), crs = from)
  }

  if (to == "raster") {
    if (has_crs(model$world))
      point<- sf::st_transform(point, crs = sf::st_crs(model$world))
    point_coords <- sf::st_coordinates(point)
    newx <- abs((point_coords[, "X"] - bbox["xmin"])) / map_dim[1] * raster_dim[1]
    newy <- abs((point_coords[, "Y"] - bbox["ymin"])) / map_dim[2] * raster_dim[2]
    new_point <- data.frame(newx = round(as.vector(newx)), newy = round(as.vector(newy)))
    colnames(new_point) <- c(outx, outy)
  } else if (!is.na(to)) {
    new_point <- sf::st_transform(point, crs = to) %>% sf::st_coordinates()
    colnames(new_point) <- c(outx, outy)
  } else {
    new_point <- point %>% sf::st_coordinates()
    colnames(new_point) <- c(outx, outy)
  }

  # if (nrow(new_point) == 1)
  #   return(as.vector(unlist(new_point)))

  if (add) new_point <- cbind(coords, new_point) %>% dplyr::as_tibble()

  new_point
}


#' Merge two spatial \code{slendr} objects into one
#'
#' @param x Object of the class \code{slendr}
#' @param y Object of the class \code{slendr}
#' @param name Optional name of the resulting geographic region. If missing,
#'   name will be constructed from the function arguments.
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#'   spatial object of the class \code{sf} with several additional attributes
#'   (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
#'
#' @example man/examples/spatial_functions.R
join <- function(x, y, name = NULL) {
  check_spatial_pkgs()

  if (!inherits(x, "slendr")) x <- region(polygon = x)
  if (!inherits(y, "slendr")) y <- region(polygon = y)

  result <- sf::st_union(x, y)
  result$region.1 <- NULL
  if (is.null(name))
    result$region <- sprintf("(%s and %s)", x$region, y$region)
  else
    result$region <- name
  attrs <- if (!is.null(attr(x, "map"))) "map" else NULL
  result <- copy_attributes(result, x, attrs)
  sf::st_agr(result) <- "constant"
  result
}


#' Generate the overlap of two \code{slendr} objects
#'
#' @inheritParams join
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#'   spatial object of the class \code{sf} with several additional attributes
#'   (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
overlap <- function(x, y, name = NULL) {
  check_spatial_pkgs()

  if (!inherits(x, "slendr")) x <- region(polygon = x)
  if (!inherits(y, "slendr")) y <- region(polygon = y)

  result <- sf::st_intersection(x, y)

  if (nrow(result)) {
    if (nrow(result) > 1)
      result <- sf::st_sf(geometry = sf::st_combine(result))

    if (is.null(name)) {
      xname <- deparse(substitute(x))
      yname <- deparse(substitute(y))
      result$region <- sprintf("(overlap of %s and %s)", xname, yname)
    } else
      result$region <- name
    result <- result[, c("region", "geometry")]
  }

  map <- attr(x, "map")

  class(result) <- set_class(result, "region")
  attr(result, "map") <- map
  sf::st_agr(result) <- "constant"

  result
}


#' Generate the difference between two \code{slendr} objects
#'
#' @inheritParams join
#'
#' @return Object of the class \code{slendr_region} which encodes a standard
#'   spatial object of the class \code{sf} with several additional attributes
#'   (most importantly a corresponding \code{slendr_map} object, if applicable).
#'
#' @export
subtract <- function(x, y, name = NULL) {
  check_spatial_pkgs()

  if (!inherits(x, "slendr")) x <- region(polygon = x)
  if (!inherits(y, "slendr")) y <- region(polygon = y)

  result <- sf::st_difference(x, y)

  if (nrow(result)) {
    if (is.null(name)) {
      xname <- deparse(substitute(x))
      yname <- deparse(substitute(y))
      result$region <- sprintf("(%s minus %s)", xname, yname)
    } else
      result$region <- name
  }

  map <- attr(x, "map")

  result <- result[, c("region", "geometry")]
  class(result) <- set_class(result, "region")
  attr(result, "map") <- map
  sf::st_agr(result) <- "constant"

  result
}


#' Calculate the distance between a pair of spatial boundaries
#'
#' @param x,y Objects of the class \code{slendr}
#' @param measure How to measure distance? This can be either \code{'border'}
#'   (distance between the borders of \code{x} and \code{y}) or \code{'center'}
#'   (distance between their centroids).
#' @param time Time closest to the spatial maps of \code{x} and \code{y} if they
#'   represent \code{slendr_pop} population boundaries (ignored for general
#'   \code{slendr_region} objects)
#'
#' @return If the coordinate reference system was specified, a distance in
#'   projected units (i.e. meters) is returned. Otherwise the function returns a
#'   normal Euclidean distance.
#'
#' @examples
#' # create two regions on a blank abstract landscape
#' region_a <- region("A", center = c(20, 50), radius = 20)
#' region_b <- region("B", center = c(80, 50), radius = 20)
#' plot_map(region_a, region_b)
#'
#' # compute the distance between the centers of both population ranges
#' distance(region_a, region_b, measure = "center")
#'
#' # compute the distance between the borders of both population ranges
#' distance(region_a, region_b, measure = "border")
#' @export
distance <- function(x, y, measure, time = NULL) {
  check_spatial_pkgs()

  if (!measure %in% c("center", "border"))
    stop("Unknown distance measure method provided (must be either 'center' or 'border')", call. = FALSE)

  if ((inherits(x, "slendr_pop") | inherits(y, "slendr_pop")) & is.null(time))
    stop("Time of the nearest spatial snapshot must be be given when calculating
distance between population boundaries", call. = FALSE)

  if (inherits(x, "slendr_pop")) x <- x[which.min(abs(x$time - time)), ]
  if (inherits(y, "slendr_pop")) y <- y[which.min(abs(y$time - time)), ]

  if (measure == "center") {
    x <- sf::st_centroid(x)
    y <- sf::st_centroid(y)
  }

  as.vector(sf::st_distance(x, y))
}


#' Calculate the area covered by the given slendr object
#'
#' @param x Object of the class \code{slendr}
#'
#' @return Area covered by the input object. If a \code{slendr_pop}
#'   was given, a table with an population range area in each time
#'   point will be returned. If a \code{slendr_region} or
#'   \code{slendr_world} object was specified, the total area covered
#'   by this object's spatial boundary will be returned.
#'
#' @examples
#' region_a <- region("A", center = c(20, 50), radius = 20)
#' region_b <- region("B", polygon = list(c(50, 40), c(70, 40), c(70, 60), c(50, 60)))
#' plot_map(region_a, region_b)
#'
#' # note that area won't be *exactly* equal to pi*r^2:
#' #   https://stackoverflow.com/a/65280376
#' area(region_a)
#'
#' area(region_b)
#' @export
area <- function(x) {
  check_spatial_pkgs()

  if (!inherits(x, "slendr") & !inherits(x, "sf"))
    stop("Input must be of the type 'slendr' or 'sf'", call. = FALSE)

  if (inherits(x, "slendr_pop")) {
    areas <- purrr::map_dbl(seq_len(nrow(x)), ~ as.numeric(sum(sf::st_area(sf::st_make_valid(x[.x, ])))))
    times <- x$time
    return(dplyr::tibble(time = times, area = areas))
  } else if (inherits(x, "slendr_map") || inherits(x, "slendr_region"))
    return(as.numeric(sum(sf::st_area(x))))
  else
    stop("Only the areas covered by slendr_(pop/region/world) objects can be computed",
         call. = FALSE)
}

#' Define sampling events for a given set of populations
#'
#' Schedule sampling events at specified times and, optionally, a given set of
#' locations on a landscape
#'
#' If both times and locations are given, the the sampling will be scheduled on
#' each specified location in each given time-point. Note that for the
#' time-being, in the interest of simplicity, no sanity checks are performed on
#' the locations given except the restriction that the sampling points must fall
#' within the bounding box around the simulated world map. Other than that,
#' slendr will simply instruct its SLiM backend script to sample individuals as
#' close to the sampling points given as possible, regardless of whether those
#' points lie within a population spatial boundary at that particular moment of
#' time.
#'
#' @param model Object of the class \code{slendr_model}
#' @param times Integer vector of times (in model time units) at which to
#'   schedule remembering of individuals in the tree-sequence
#' @param ... Lists of two elements (\code{slendr_pop} population object-<number
#'   of individuals to sample), representing from which populations should how
#'   many individuals be remembered at times given by \code{times}
#' @param locations List of vector pairs, defining two-dimensional coordinates
#'   of locations at which the closest number of individuals from given
#'   populations should be sampled. If \code{NULL} (the default), individuals
#'   will be sampled randomly throughout their spatial boundary.
#' @param strict Should any occurence of a population not being present at a
#'   given time result in an error? Default is \code{FALSE}, meaning that
#'   invalid sampling times for any populations will be quietly ignored.
#'
#' @return Data frame with three columns: time of sampling, population to sample
#'   from, how many individuals to sample
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE, quit = TRUE) # dependencies must be present
#' }
#' init_env()
#'
#' # load an example model with an already simulated tree sequence
#' path <- system.file("extdata/models/introgression", package = "slendr")
#' model <- read_model(path)
#'
#' # afr and eur objects would normally be created before slendr model compilation,
#' # but here we take them out of the model object already compiled for this
#' # example (in a standard slendr simulation pipeline, this wouldn't be necessary)
#' afr <- model$populations[["AFR"]]
#' eur <- model$populations[["EUR"]]
#'
#' # schedule the recording of 10 African and 100 European individuals from a
#' # given model at 20 ky, 10 ky, 5ky ago and at present-day (time 0)
#' schedule <- schedule_sampling(
#'   model, times = c(20000, 10000, 5000, 0),
#'   list(afr, 10), list(eur, 100)
#' )
#'
#' # the result of `schedule_sampling` is a simple data frame (note that the locations
#' # of sampling locations have `NA` values because the model is non-spatial)
#' schedule
#' @export
schedule_sampling <- function(model, times, ..., locations = NULL, strict = FALSE) {
  if (!inherits(model, "slendr_model"))
    stop("A slendr_model object must be specified", call. = FALSE)

  times <- unique(as.integer(round(sort(times))))

  samples <- list(...)
  sample_pops <- purrr::map(samples, 1)
  sample_counts <- purrr::map(samples, 2)

  if (is.null(model$world) && !is.null(locations))
    stop("Sampling locations may only be specified for a spatial model", call. = FALSE)

  if (length(sample_pops) != length(sample_counts))
    stop("Samples must be represented by pairs of <slendr_pop>-<n>", call. = FALSE)

  if (!all(purrr::map_lgl(sample_pops, ~ inherits(.x, "slendr_pop"))))
    stop("Objects to sample from must be of the class 'slendr_pop'", call. = FALSE)

  if (!all(purrr::map_lgl(sample_counts, ~ .x == round(.x))))
    stop("Sample counts must be integer numbers", call. = FALSE)

  # make sure that all sampling times fall in the time window of the simulation itself
  oldest_time <- get_oldest_time(model$populations, model$direction)
  if ((model$direction == "forward" && (any(times > oldest_time + model$orig_length)
                                        || any(times < oldest_time))) ||
      (model$direction == "backward" && (any(times < oldest_time - model$orig_length)
                                         || any(times > oldest_time)))) {

    if (strict)
      stop("A sampling event was scheduled outside of the simulation time window", call. = FALSE)
    else
      times <- times[times <= model$orig_length]
  }

  schedule <- purrr::map_dfr(times, function(t) {
    purrr::map_dfr(samples, function(s) {
      pop <- s[[1]]
      n <- s[[2]]
      tryCatch(
        {
          check_removal_time(t, pop, direction = model$direction)
          check_present_time(t, pop, direction = model$direction, offset = model$generation_time)
          if (!is.infinite(n)) n <- as.integer(n)
          dplyr::tibble(time = t, pop = pop$pop[1], n = n)
        },
        error = function(cond) {
          if (!strict)
            return(NULL)
          else
            stop("Cannot schedule sampling for '", pop$pop, "' at time ", t,
                 " because the population will not yet be present in the simulation",
                 " at that point. Consider running this function with `strict = FALSE`",
                 " which will automatically retain only valid sampling events.",
                 call. = FALSE)
        })
    })
  })

  if (is.null(schedule))
    stop("No sampling events have been generated", call. = FALSE)

  if (!nrow(schedule)) {
    warning("No valid sampling events were retained", call. = FALSE)
    return(NULL)
  }

  if (!is.null(locations)) {
    check_location_bounds(locations, model$world)

    # convert the list of coordinate pairs into a data frame with x and y
    # columns transformed from world-based coordinates into raster-based
    # coordinates
    locations_df <- dplyr::tibble(
      orig_x = purrr::map_dbl(locations, ~ .[[1]]),
      orig_y = purrr::map_dbl(locations, ~ .[[2]])
    ) %>%
      reproject(
        coords = ., model = model,
        from = "world", to = "raster",
        input_prefix = "orig_", output_prefix = "",
        add = TRUE
      ) %>%
      dplyr::rename(x_orig = orig_x, y_orig = orig_y)

    schedule <- merge(schedule, locations_df)
  } else {
    schedule$x <- schedule$y <- schedule$x_orig <- schedule$y_orig <- NA
  }

  schedule
}

#' Activate slendr's own dedicated Python environment
#'
#' This function attempts to activate a dedicated slendr Miniconda Python
#' environment previously set up via \code{setup_env}.
#'
#' @param quiet Should informative messages be printed to the console? Default
#'   is \code{FALSE}.
#'
#' @return No return value, called for side effects
#'
#' @export
init_env <- function(quiet = FALSE) {
  if (!is_slendr_env_present())
    stop("Could not activate slendr's Python environment because it is not\npresent ",
         "on your system ('", PYTHON_ENV, "').\n\n",
         "To set up a dedicated Python environment you first need to run setup_env().", call. = FALSE)
  else {
    reticulate::use_condaenv(PYTHON_ENV, required = TRUE)

    # this is an awful workaround around the reticulate/Python bug which prevents
    # import_from_path (see zzz.R) from working properly -- I'm getting nonsensical
    #   Error in py_call_impl(callable, dots$args, dots$keywords) :
    #     TypeError: integer argument expected, got float
    # in places with no integer/float conversion in sight
    #
    # at least it prevents having to do things like:
    # reticulate::py_run_string("def get_pedigree_ids(ts): return [ind.metadata['pedigree_id']
    #                                                              for ind in ts.individuals()]")
    # (moved from ts_load() here because this is a better place for loading our Python functions)
    reticulate::source_python(file = system.file("pylib/pylib.py", package = "slendr"))

    if (!reticulate::py_module_available("msprime") ||
        !reticulate::py_module_available("tskit") ||
        !reticulate::py_module_available("pyslim") ||
        !reticulate::py_module_available("tspop")) {
      stop("Python environment ", PYTHON_ENV, " has been found but it",
           " does not appear to have msprime, tskit, pyslim and tspop modules all",
           " installed. Perhaps the environment got corrupted somehow?",
           " Running `clear_env()` and `setup_env()` to reset the slendr's Python",
           " environment is recommended.", call. = FALSE)
    } else {
      # pylib <<- reticulate::import_from_path(
      #   "pylib",
      #   path = system.file("python", package = "slendr"),
      #   delay_load = TRUE
      # )
      if (!quiet)
        message("The interface to all required Python modules has been activated.")
    }
  }
}

#' Setup a dedicated Python virtual environment for slendr
#'
#' This function will automatically download a Python miniconda distribution
#' dedicated to an R-Python interface. It will also create a slendr-specific
#' Python environment with all the required Python dependencies.
#'
#' @param quiet Should informative messages be printed to the console? Default
#'   is \code{FALSE}.
#' @param agree Automatically agree to all questions?
#' @param pip Should pip be used instead of conda for installing slendr's Python
#'   dependencies? Note that this will still use the conda distribution to
#'   install Python itself, but will change the repository from which slendr
#'   will install its Python dependencies. Unless explicitly set to \code{TRUE},
#'   Python dependencies will be installed from conda repositories by default,
#'   expect for the case of osx-arm64 Mac architecture, for which conda
#'   dependencies are broken.
#'
#' @return No return value, called for side effects
#'
#' @export
setup_env <- function(quiet = FALSE, agree = FALSE, pip = NULL) {
  if (is_slendr_env_present()) {
    message("A required slendr Python environment is already present. You can activate\n",
            "it by calling init_env().")
  } else {
    if (agree)
      answer <- 2
    else
      answer <- utils::menu(
        c("No", "Yes"),
        title = paste0(
          "This function will install a completely isolated Miniconda Python distribution\n",
          "just for slendr and create an environment with all required Python modules.\n",
          "\nEverything will be installed into a completely separate location into an\n",
          "isolated environment in an R library directory. This won't affect your other\n",
          "Python installations at all. You can always wipe out the automatically created\n",
          "environment by running clear_env().\n\n",
          "Do you wish to proceed with the automated Python environment setup?")
        )
    if (answer == 2) {
      message("=======================================================================")
      message("Installing slendr's Python environment. Please wait until")
      message("the installation procedure finishes. Do NOT interrupt the")
      message("process while the installation is still running.")
      message("======================================================================\n")
      Sys.sleep(10)

      if (!dir.exists(reticulate::miniconda_path()))
        reticulate::install_miniconda()

      # parse the Python env name back to the list of dependencies
      # (the environment is defined in .onAttach(), and this makes sure the
      # dependencies are defined all in one place)
      versions <- PYTHON_ENV %>% gsub("-", "==", .) %>% strsplit("_") %>% .[[1]]
      python_version <- gsub("Python==", "", versions[1])
      package_versions <- versions[-1]

      reticulate::conda_create(envname = PYTHON_ENV, python_version = python_version)
      reticulate::use_condaenv(PYTHON_ENV, required = TRUE)

      # msprime/tskit conda dependency is broken on M1 Mac architecture, fallback
      # to pip in cases like this (otherwise use conda to avoid any potential
      # compilation issues such as missing libgsl)
      if (is.null(pip))
        pip <- all(Sys.info()[c("sysname", "machine")] == c("Darwin", "arm64"))

      # tspop isn't available on conda so it will need to be installed by pip
      # no matter the user's preference (given by the pip function argument value)
      # TODO: check at some point later if tspop is on conda
      which_tspop <- grepl("tspop", package_versions)
      reticulate::conda_install(envname = PYTHON_ENV, packages = package_versions[!which_tspop], pip = pip)
      reticulate::conda_install(envname = PYTHON_ENV, packages = c(package_versions[which_tspop], "pyarrow"), pip = TRUE)

      if (!quiet) {
        message("======================================================================")
        message("Python environment for slendr has been successfuly created, and ",
                "the R\ninterface to msprime, tskit, and pyslim modules has been activated.\n")
        message("In future sessions, activate this environment by calling init_env().")
        message("=======================================================================")
      }
    } else
      warning("Your Python environment is not set up correctly which means that the tree\n",
              "sequence functionality of slendr will not work.", call. = FALSE)
  }
}

#' Remove the automatically created slendr Python environment
#'
#' @param force Ask before deleting any environment?
#' @param all Should all (present and past) slendr Python environments be removed
#'   (default is \code{FALSE}) or just the current environment?
#'
#' @return No return value, called for side effects
#'
#' @export
clear_env <- function(force = FALSE, all = FALSE) {
  envs_to_delete <- reticulate::conda_list()$name %>%
    grep("Python-.*_msprime-.*_tskit-.*_pyslim-.*", ., value = TRUE)
  if (!all)
    envs_to_delete <- grep(PYTHON_ENV, envs_to_delete, value = TRUE)

  if (length(envs_to_delete) == 0) {
    warning("No slendr Python environment has been found, nothing to delete.", call. = FALSE)
  } else {
    env_names <- paste(paste0("[#", seq_along(envs_to_delete), "]"), envs_to_delete)
    cat("The following slendr-looking Python environments have been identified:\n\n")
    cat(paste0(paste0(" - ", env_names, "\n"), collapse = ""))
    if (!force) {
      cat("\nYou will be asked for confirmation before any of them are deleted.\n")
      if (all == TRUE) {
        cat("To remove all of them at once, you can call `clear_env(force = TRUE, all = TRUE)`.\n")
      }
      cat("\n")
    }

    for (i in seq_along(envs_to_delete)) {
      env <- envs_to_delete[i]

      path <- reticulate::conda_list() %>%
        dplyr::filter(grepl(env, name)) %>%
        { gsub("bin\\/python", "", .$python) }

      if (force) {
        answer <- 2
      } else {
        answer <- utils::menu(
          c("No", "Yes"),
          title = paste0("Do you want to delete Python environment [#", i, "]? (Hit ESC to cancel.)\n\n", path)
        )
      }

      if (answer == 2) {
        reticulate::conda_remove(env)
        message("slendr environment '", env, "' has been sucessfully removed.")
      } else {
        message("slendr environment '", env, "' has not been removed.")
      }
    }
  }
}

#' Get the name of the current slendr Python environment
#'
#' @return Name of the slendr Python environment
get_env <- function() {
  PYTHON_ENV
}

#' Check that the active Python environment is setup for slendr
#'
#' This function inspects the Python environment which has been activated by the
#' reticulate package and prints the versions of all slendr Python dependencies
#' to the console.
#'
#' @param verbose Should a log message be printed? If \code{FALSE}, only a logical
#'   value is returned (invisibly).
#'
#' @return Either \code{TRUE} (slendr Python environment is present) or \code{FALSE}
#'   (slendr Python environment is not present).
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE, quit = TRUE) # dependencies must be present
#' }
#' init_env()
#' check_env()
#' @export
check_env <- function(verbose = TRUE) {
  # if there is no Python available on user's system, don't immediately
  # jump to installing miniconda (let's deal with that in setup_env())
  orig_env <- Sys.getenv("RETICULATE_MINICONDA_ENABLED")
  Sys.setenv(RETICULATE_MINICONDA_ENABLED = FALSE)
  on.exit(Sys.setenv(RETICULATE_MINICONDA_ENABLED = orig_env))

  py <- reticulate::py_discover_config()

  has_tskit <- reticulate::py_module_available("tskit")
  has_msprime <- reticulate::py_module_available("msprime")
  has_pyslim <- reticulate::py_module_available("pyslim")
  has_tspop <- reticulate::py_module_available("tspop")
  # has_pylib <- !is.null(pylib)

  if (has_tskit)
    tskit_version <- paste("version", tskit[["_version"]]$tskit_version, "\u2713")
  else
    tskit_version <- "MISSING \u274C"

  if (has_msprime)
    msprime_version <- paste("version", msp[["_version"]]$version, "\u2713")
  else
    msprime_version <- "MISSING \u274C"

  if (has_pyslim)
    pyslim_version <- paste("version", pyslim$pyslim_version, "\u2713")
  else
    pyslim_version <- "MISSING \u274C"

  if (has_tspop)
    tspop_version <- paste("present \u2713")
  else
    tspop_version <- "MISSING \u274C"

  # if (has_pylib)
  #   pylib_status <- "successfully loaded \u2713"
  # else
  #   pylib_status <- "NOT LOADED \u274C"

  if (verbose) {
    cat("Summary of the currently active Python environment:\n\n")
    cat("Python binary:", py$python, "\n")
    cat("Python version:", py$version_string, "\n")

    cat("\nslendr requirements:\n")
    cat(" - tskit:", tskit_version, "\n")
    cat(" - msprime:", msprime_version, "\n")
    cat(" - pyslim:", pyslim_version, "\n")
    cat(" - tspop:", tspop_version, "\n")
    # cat(" - slendr module:", pylib_status, "\n")
  }

  if (!all(c(has_tskit, has_pyslim, has_msprime, has_tspop))) {
    return_value <- FALSE
    if (verbose)
      cat("\nNote that due to the technical limitations of embedded Python,",
          "if you\nwant to switch to another Python environment you will need",
          "to restart\nyour R session first.\n")
    # reference: https://github.com/rstudio/reticulate/issues/27#issuecomment-512256949
  } else
    return_value <- TRUE

  if (verbose)
    return(invisible(return_value))
  else
    return(return_value)
}

Try the slendr package in your browser

Any scripts or data that you put into this service are public.

slendr documentation built on June 22, 2024, 6:56 p.m.