#' Get a data product
#'
#' @param layer (character) Identifier of the data product. See [mrp_list]
#' @param path (character) Path to save the requests. Default is [base::tempdir()]. See details.
#' @param cql_filter (character) Contextual Query Language (CQL) filter. See details.
#' @param filter (character) Standard OGC filter specification. See details.
#' @param count (numeric) Maximum number of features to be retrieved.
#'
#' @details
#' This function uses [WFS services](https://en.wikipedia.org/wiki/Web_Map_Service) to download the
#' Marine Regions layers as ESRI Shapefiles.
#'
#' ## Caching
#' By default, the layers are downloaded to a temporal directory ([base::tempdir()]). You can provide
#' a path in the `path` argument. But you can also set a path with `# options("mregions2.download_path" = "my/path/")`.
#'
#' Because it is possible to add filters, each request is identified with a crc32 hash, provided with
#' [digest::digest()] and attached to the file downloaded.
#'
#' Once a layer is downloaded, it will be read from the cache during the next two weeks. To avoid this, simply
#' delete the layers in the cache path.
#'
#' ## Filters
#'
#' Both the [Contextual Query Language (CQL) filter](https://portal.ogc.org/files/96288) and the [standard OGC filter specification](https://www.ogc.org/publications/standard/filter/) allow to
#' query the server before performing a request. This will boost performance as you will only retrieve the area of your interest. It is possible to query on attributes, but also perform
#' geospatial queries. For instance, you can query a bounding box of interest.
#'
#' CQL filters are possible only in geoserver. Marine Regions uses a geoserver instance to serve its data products.
#' A tutorial on CQL filters is available in the [geoserver web site](https://docs.geoserver.org/stable/en/user/tutorials/cql/cql_tutorial.html).
#'
#'
#' @return An sf object with the Marine Regions data product
#' @export
#'
#' @seealso [mrp_list] to describe the list of products, [mrp_view()] to visualize the data product in advance, [mrp_colnames()] and [mrp_col_unique()] to get the name, data type and unique values of a the columns of a data product, useful to query
#' with the arguments `cql_filter` or `filter`
#'
#' @examples \donttest{
#' # Set cache path. Default is a temporal directory
#' options(mregions2.download_path = tempdir())
#'
#' getOption("mregions2.download_path")
#' #> [1] "/tmp/RtmpARLgoE"
#'
#' # See the list of all data products
#' mrp_list
#'
#' # We want the Exclusive Economic Zones of Portugal. Let's first visualize the product:
#' mrp_view("eez")
#'
#' # See all the columns on this data product
#' mrp_colnames("eez")
#'
#' # We should query on sovereign
#' # See all the possible values of sovereign1, sovereign2 and sovereign3
#' sov1 = mrp_col_unique("eez", "sovereign1")
#' sov2 = mrp_col_unique("eez", "sovereign2")
#' sov3 = mrp_col_unique("eez", "sovereign3")
#'
#' # Is Portugal a value in the sovereign1, 2 and 3?
#' "Portugal" %in% sov1
#' #> [1] TRUE
#'
#' "Portugal" %in% sov2
#' #> [1] FALSE
#'
#' "Portugal" %in% sov3
#' #> [1] FALSE
#'
#' # Portugal is only in sovereign1. Let's write a CQL filter to get only
#' # the EEZs of Portugal, or those where Portugal is a party of a dispute or a joint regime
#' portugal_eez <- mrp_get("eez", cql_filter = "sovereign1 = 'Portugal'")
#'
#' # If you perform this request again, it will be read from the cache instead
#' portugal_eez <- mrp_get("eez", cql_filter = "sovereign1 = 'Portugal'")
#' #> Cache is fresh. Reading: /tmp/RtmpARLgoE/eez-1951c8b7/eez.shp
#' #> (Last Modified: 2023-04-24 17:45:16)
#'
#' # You can also limit the number of features to be requested
#' mrp_get("eez", count = 5)
#' }
mrp_get <- function(layer, path = getOption("mregions2.download_path", tempdir()), cql_filter = NULL, filter = NULL, count = NULL){
# Assertions
checkmate::assert_character(layer, len = 1)
checkmate::assert_choice(layer, mregions2::mrp_list$layer)
checkmate::assert_character(cql_filter, null.ok = TRUE, len = 1)
checkmate::assert_character(filter, null.ok = TRUE, len = 1)
assert_only_one_filter(cql_filter, filter)
count <- checkmate::assert_integerish(count, lower = 1, len = 1,
coerce = TRUE, null.ok = TRUE)
stopifnot(dir.exists(path))
assert_internet()
# Config
namespace <- subset(mregions2::mrp_list$namespace, mregions2::mrp_list$layer == layer)
url <- httr2::url_parse("https://geo.vliz.be/geoserver/ows")
url$query <- list(service = "wfs",
version = "2.0.0",
request = "GetFeature",
srsName = "EPSG:4326",
typeName = glue::glue("{namespace}:{layer}"),
cql_filter = cql_filter,
filter = filter,
count = count,
outputFormat = "SHAPE-ZIP")
url <- httr2::url_build(url)
# Cache
hash <- digest::digest(url, algo = "crc32")
hash <- glue::glue('{layer}-{hash}')
cached_zip_path <- file.path(path, glue::glue('{hash}.zip'))
cached_unzip_path <- file.path(path, hash)
cached_file_path <- file.path(cached_unzip_path, glue::glue('{layer}.shp'))
do_request <- TRUE
if(file.exists(cached_file_path)) {
cached_file_time <- file.info(cached_file_path)$ctime
cached_file_is_fresh <- difftime(
Sys.time(), cached_file_time, units = "weeks"
) %>% as.numeric() %>% `<`(cache_max_time())
if(cached_file_is_fresh){
do_request <- FALSE
cli::cli_text("Cache is fresh. Reading: {.path {cached_file_path}}")
cli::cli_text("(Last Modified: {.emph {cached_file_time}})")
}
}
# Perform
if(do_request){
resp <- httr2::request(url) %>%
httr2::req_user_agent(mr_user_agent) %>%
httr2::req_error(is_error = function(resp) FALSE) %>%
httr2::req_perform(path = cached_zip_path) %>%
mrp_get_sanity_check()
dir.create(cached_unzip_path, showWarnings = FALSE)
utils::unzip(zipfile = cached_zip_path, exdir = cached_unzip_path, overwrite = TRUE)
if(!is_test()) try_clean_up(cached_zip_path)
}
check_server_warning(cached_unzip_path)
mrp_list <- NULL # Avoid R CMD Check note
out <- sf::st_read(cached_file_path, quiet = TRUE, stringsAsFactors = FALSE)
attr(out, "class") <- c("sf", "tbl_df", "tbl", "data.frame")
out
}
cache_max_time <- function(){
weeks <- Sys.getenv("TESTPKG.CACHETIME", 4)
weeks
}
mrp_get_sanity_check <- function(resp){
is_error <- httr2::resp_is_error(resp)
if(is_error){
status <- httr2::resp_status(resp)
desc <- httr2::resp_status_desc(resp)
msg <- c("!" = "HTTP {status} {desc}")
try({
exception <- httr2::resp_body_string(resp) %>%
xml2::read_xml() %>%
xml2::xml_find_all(glue::glue("//ows:Exception"))
exception_code <- exception %>%
xml2::xml_attr("exceptionCode")
exception_text <- exception %>%
xml2::xml_find_all(glue::glue("//ows:ExceptionText")) %>%
xml2::xml_text()
msg <- c(msg,
"i" = "Exception Code: {.emph {exception_code}}",
"i" = "Exception Text: {.emph {exception_text}}"
)
if(!is_test()) try_clean_up(resp$body)
})
cli::cli_abort(msg)
}
resp
}
try_clean_up <- function(path) try({file.remove(path)}, silent = TRUE)
check_server_warning <- function(cached_unzip_path){
readme <- file.path(cached_unzip_path, "README.TXT")
if(file.exists(readme)){
msg <- readLines(readme, warn = FALSE, skipNul = TRUE)
msg <- paste0(msg, collapse = "; ")
warning(msg, call. = FALSE)
}
invisible(NULL)
}
.mrp_colnames <- function(layer){
checkmate::assert_character(layer, len = 1)
checkmate::assert_choice(layer, mregions2::mrp_list$layer)
# Config
namespace <- subset(mregions2::mrp_list$namespace, mregions2::mrp_list$layer == layer)
url <- httr2::url_parse("https://geo.vliz.be/geoserver/ows")
url$query <- list(service = "wfs",
version = "2.0.0",
request = "DescribeFeatureType",
typeName = glue::glue("{namespace}:{layer}")
)
# Perform
request <- httr2::url_build(url) %>%
httr2::request() %>%
httr2::req_user_agent(mr_user_agent) %>%
httr2::req_perform()
resp <- request %>%
httr2::resp_body_xml() %>%
xml2::xml_find_all("//xsd:element")
out <- data.frame(
layer = layer,
colname = xml2::xml_attr(resp, "name"),
type = gsub("xsd:", "", xml2::xml_attr(resp, "type")),
stringsAsFactors = FALSE
)
out <- subset(out, out$colname != "the_geom")
out <- subset(out, out$colname != layer)
attr(out, "class") <- c("tbl_df", "tbl", "data.frame")
mrp_list <- NULL
out
}
#' Get the names of the columns and data type of the data product
#'
#' @param layer (character) Identifier of the data product. See [mrp_list]
#'
#' @details
#' This function becomes useful to write CQL or OGC filters that you can pass to [mrp_get()] or [mrp_view()] as
#' it allows you to know the column names and the data types beforehand. Use it together with [mrp_col_unique()] to
#' know all the possible values in the column name that you want to query on.
#'
#' The actual description of each column is available only to the Maritime Boundaries products.
#' See <https://marineregions.org/eezattribute.php>
#'
#' @return A data frame with the column names and data type in the Marine Regions data product
#' @export
#'
#' @seealso [mrp_list] to describe the list of products, [mrp_col_unique()] to get the unique values of a the
#' columns of a data product, useful to write queries that can be passed to [mrp_get()] or [mrp_view()] via the
#' arguments `cql_filter` or `filter`.
#'
#' @examples \donttest{
#' mrp_colnames("eez")
#' mrp_colnames("ecoregions")
#' }
mrp_colnames <- memoise::memoise(.mrp_colnames)
.mrp_col_unique <- function(layer, colname){
checkmate::assert_character(layer, len = 1)
checkmate::assert_choice(layer, mregions2::mrp_list$layer)
checkmate::assert_character(colname, len = 1)
column_names <- mrp_colnames(layer)
checkmate::assert_choice(colname, column_names$colname)
# Config
namespace <- subset(mregions2::mrp_list$namespace, mregions2::mrp_list$layer == layer)
url <- httr2::url_parse("https://geo.vliz.be/geoserver/ows")
url$query <- list(service = "wfs",
version = "2.0.0",
request = "GetPropertyValue",
typeNames = glue::glue("{namespace}:{layer}"),
valueReference = colname
)
# Perform
resp <- httr2::url_build(url) %>%
httr2::request() %>%
httr2::req_user_agent(mr_user_agent) %>%
httr2::req_perform() %>%
httr2::resp_body_xml() %>%
xml2::xml_find_all(glue::glue("//wfs:member")) %>%
xml2::xml_text() %>%
unique()
datatype <- tolower(subset(
column_names$type,
column_names$colname == colname
))
if(datatype %in% c("numeric", "int", "double")) resp <- resp %>% as.numeric()
if(datatype %in% c("date")) resp <- resp %>% as.Date()
if(datatype %in% c("timestamp")) resp <- resp %>% as.POSIXct(tz = "UTC")
mrp_list <- NULL
sort(resp)
}
#' Get all the possible values of a column of a Marine Regions data product
#'
#' @param layer (character) Identifier of the data product. See [mrp_list]
#' @param colname (character) Column name in the data product. See [mrp_colnames()]
#'
#' @details
#' This function becomes useful to write CQL or OGC filters that you can pass to [mrp_get()] or [mrp_view()] as
#' it helps to know all the possible values in the column name that you want to query on beforehand. Use it
#' together with [mrp_colnames()] to know the columns and data types in the data product.
#'
#' ## Geometry columns
#' Note that columns of type `geometry` are forbidden as their performance is sub-optimal and would likely
#' crash your R session.
#'
#' @return A numeric or character vector with the unique values of a column of a Marine Regions data product.
#' @export
#'
#' @seealso [mrp_list] to describe the list of products, [mrp_colnames()] to get the names and data type of
#' the columns of a data product, useful to write queries that can be passed to [mrp_get()] or [mrp_view()] via
#' the arguments `cql_filter` or `filter`.
#'
#' @examples \donttest{
#' mrp_col_unique("ecs", "pol_type")
#' mrp_col_unique("ecs_boundaries", "line_type")
#' }
mrp_col_unique <- memoise::memoise(.mrp_col_unique)
#' @rdname mrp_col_unique
#' @export
mrp_col_distinct <- mrp_col_unique
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.