R/extract.R

Defines functions assert_and_assign_strategy check_extent border_input_from_polygon border_input_from_bbox create_border_input extract

Documented in extract

#' Create geographical extracts from an OSM file
#'
#' Creates geographical extracts from an OSM data file or an OSM history file.
#' The geographical extent can be given either as a bounding box or as a
#' (multi)polygon.
#'
#' @param input_path A string. The path to the OSM data/history file whose
#'   extent should be extracted from. Please see [file_formats] for a list of
#'   supported file formats.
#' @param extent Either a `POLYGON` or a `MULTIPOLYGON` `sf` object with only
#'   one feature or a `bbox` object, created by [sf::st_bbox].
#' @param output_path A string. The path to the file where the output should be
#'   written to. Please see [file_formats] for a list of supported file formats.
#' @param strategy A string. The strategy to use when creating the extract.
#'   Available strategies are `"complete_ways"`, `"smart"` and `"simple"`.
#'   Defaults to `"complete_ways"`. Please see the "Strategies" section for a
#'   description of each one of them.
#' @param overwrite A logical. Whether existing files should be overwritten by
#'   the output. Defaults to `FALSE`.
#' @param echo_cmd A logical. Whether to print the Osmium command generated by
#'   the function call to the screen. Defaults to `FALSE`.
#' @param echo A logical. Whether to print the standard output and error
#'   generated by the Osmium call to the screen. Defaults to `TRUE`.
#' @param spinner A logical. Whether to show a reassuring spinner while the
#'   Osmium call is being executed. Defaults to `TRUE`.
#' @param verbose A logical. Whether to display detailed information on the
#'   running command. Defaults to `FALSE`.
#'
#' @section Strategies:
#' Different strategies can be used when creating extracts. Depending on the
#' strategy, different objects will end up in the extracts. The strategies
#' differ in how much memory they need and how often they need to read the input
#' file. The choice of strategy depends on how you want to use the generated
#' extracts and how much memory and time you have.
#'
#' - `"simple"` - runs in a single pass. The extract will contain all nodes
#' inside the region and all ways referencing those nodes, as well as all
#' relations referencing any nodes or ways already included. Ways crossing the
#' region boundary will not be reference-complete. Relations will not be
#' reference-complete. This strategy is fast, because it reads the input only
#' once, but the result is not enough for most use cases. This strategy will not
#' work for history files.
#'
#' - `"complete_ways"` - runs in two passes. The extract will contain all nodes
#' inside the region and all ways referencing those nodes, as well as all nodes
#' referenced by those ways. The extract will also contain all relations
#' referenced by nodes inside the region or ways already included and,
#' recursively, their parent relations. The ways are reference-complete, but the
#' relations are not.
#'
#' - `"smart"` - runs in three passes. The extract will contain all nodes inside
#' the region and all ways referencing those nodes, as well as all nodes
#' referenced by those ways. The extract will also contain all relations
#' referenced by nodes inside the region or ways already included and,
#' recursively, their parent relations. The extract will also contain all nodes
#' and ways (and the nodes they reference) referenced by relations tagged
#' "type=multipolygon" directly referencing any nodes in the region or ways
#' referencing nodes in the region. The ways are reference-complete, and all
#' multipolygon relations referencing nodes in the regions or ways that have
#' nodes in the region are reference-complete. Other relations are not
#' reference-complete.
#'
#' @return The normalized path to the output file.
#'
#' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
#' pbf_path <- system.file("extdata/cur.osm.pbf", package = "rosmium")
#'
#' file.size(pbf_path)
#'
#' # buffering the pbf bounding box 4000 meters inward and using the result
#' # extent to extract the osm data inside it. transforming the crs because
#' # inward buffers only work with projected crs
#'
#' lines <- sf::st_read(pbf_path, layer = "lines", quiet = TRUE)
#' bbox <- sf::st_bbox(lines)
#' bbox_polygon <- sf::st_as_sf(sf::st_as_sfc(bbox))
#' smaller_bbox_poly <- sf::st_buffer(
#'   sf::st_transform(bbox_polygon, 5880),
#'   -4000
#' )
#' smaller_bbox_poly <- sf::st_transform(smaller_bbox_poly, 4326)
#'
#' output_path <- extract(
#'   pbf_path,
#'   smaller_bbox_poly,
#'   tempfile(fileext = ".osm.pbf")
#' )
#'
#' file.size(output_path)
#'
#' @export
extract <- function(input_path,
                    extent,
                    output_path,
                    strategy = c("complete_ways", "smart", "simple"),
                    overwrite = FALSE,
                    echo_cmd = FALSE,
                    echo = TRUE,
                    spinner = TRUE,
                    verbose = FALSE) {
  assert_osmium_is_installed()

  checkmate::assert_file_exists(input_path)
  checkmate::assert_logical(overwrite, any.missing = FALSE, len = 1)
  checkmate::assert_logical(echo, any.missing = FALSE, len = 1)
  checkmate::assert_logical(echo_cmd, any.missing = FALSE, len = 1)
  checkmate::assert_logical(spinner, any.missing = FALSE, len = 1)
  checkmate::assert_logical(verbose, any.missing = FALSE, len = 1)
  assert_extent(extent)
  assert_output_path_multi_ext(output_path, overwrite)

  strategy_arg <- assert_and_assign_strategy(strategy)
  border_arg <- create_border_input(extent)
  output_arg <- paste0("--output=", output_path)
  overwrite_arg <- if (overwrite) "--overwrite" else character()
  verbose_arg <- if (verbose) "--verbose" else character()

  args <- c(
    "extract",
    input_path,
    border_arg,
    output_arg,
    strategy_arg,
    overwrite_arg,
    verbose_arg
  )

  logs <- processx::run(
    "osmium",
    args,
    echo = echo,
    spinner = spinner,
    echo_cmd = echo_cmd
  )

  return(normalizePath(output_path))
}

create_border_input <- function(x) {
  if (inherits(x, "bbox")) {
    border_input_from_bbox(x)
  } else {
    border_input_from_polygon(x)
  }
}

border_input_from_bbox <- function(x) {
  bottom_left_edge <- paste(x$xmin, x$ymin, sep = ",")
  top_right_edge <- paste(x$xmax, x$ymax, sep = ",")

  input <- paste0("--bbox=", bottom_left_edge, ",", top_right_edge)

  return(input)
}

border_input_from_polygon <- function(x) {
  # simplify needs to be FALSE, otherwise objects with only one feature (which
  # is always our case) are represented as a geojson vector, even though we need
  # it to be either a feature or a feature collection

  geojson_content <- geojsonsf::sf_geojson(x, simplify = FALSE)

  tmp_geojson <- tempfile("polygon", fileext = ".geojson")
  writeLines(geojson_content, tmp_geojson)

  input <- paste0("--polygon=", tmp_geojson)

  return(input)
}

check_extent <- function(extent) {
  multi_class_res <- checkmate::check_multi_class(extent, c("sf", "bbox"))
  if (!isTRUE(multi_class_res)) return(multi_class_res)

  if (inherits(extent, "bbox")) {
    is_numeric_len_4 <- checkmate::test_numeric(
      extent,
      finite = TRUE,
      any.missing = FALSE,
      len = 4
    )

    is_correctly_named <- checkmate::test_subset(
      names(extent),
      choices = c("xmin", "ymin", "xmax", "ymax")
    )

    if (!(is_numeric_len_4 && is_correctly_named)) {
      return(
        paste0(
          "Bounding box must contain 4 elements named 'xmin', 'ymin', 'xmax' ",
          "and 'ymax'"
        )
      )
    }
  } else {
    if (nrow(extent) > 1) {
      return(
        paste0(
          "sf object must contain only one feature. Hint: try using ",
          "sf::st_union() to union multiple features into a single one"
        )
      )
    }

    geometry_type <- as.character(sf::st_geometry_type(extent))
    if (! geometry_type %in% c("POLYGON", "MULTIPOLYGON")) {
      msg <- paste0(
        "Geometry type of sf object must be either POLYGON or MULTIPOLYGON. ",
        "Found ", geometry_type, " instead"
      )

      if (geometry_type == "GEOMETRYCOLLECTION") {
        msg <- paste0(
          msg,
          ". Hint: try using sf::st_collection_extract(type = \"POLYGON\") to ",
          "extract the polygons/multipolygons from the collection"
        )
      }

      return(msg)
    }
  }

  return(TRUE)
}

assert_extent <- checkmate::makeAssertionFunction(check_extent)

assert_and_assign_strategy <- function(strategy) {
  possible_choices <- c("complete_ways", "smart", "simple")

  if (!identical(strategy, possible_choices)) {
    coll <- checkmate::makeAssertCollection()
    checkmate::assert_string(strategy, add = coll)
    checkmate::assert_names(strategy, subset.of = possible_choices, add = coll)
    checkmate::reportAssertions(coll)
  }

  strategy_input <- strategy[1]
  strategy_input <- paste0("--strategy=", strategy_input)

  return(strategy_input)
}

Try the rosmium package in your browser

Any scripts or data that you put into this service are public.

rosmium documentation built on Nov. 28, 2023, 1:07 a.m.