R/utils-classes.R

Defines functions cat_line_wrap finalize_cql show_query.bcdc_sf show_query.bcdc_promise collect.bcdc_promise mutate.bcdc_promise names.bcdc_promise tail.bcdc_promise head.bcdc_promise select.bcdc_promise filter.bcdc_promise print.bcdc_query print.bcdc_group print.bcdc_recordlist record_print_helper print.bcdc_record print.bcdc_promise as.bcdc_query as.bcdc_sf as.bcdc_promise

Documented in collect.bcdc_promise filter.bcdc_promise mutate.bcdc_promise select.bcdc_promise show_query.bcdc_promise show_query.bcdc_sf

# Copyright 2019 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

## Add "bcdc_promise" class
as.bcdc_promise <- function(res) {
  structure(res,
            class = c("bcdc_promise", setdiff(class(res), "bcdc_promise"))
  )
}

as.bcdc_sf <- function(x, query_list, url, full_url) {
  structure(x,
            class = c("bcdc_sf", setdiff(class(x), "bcdc_sf")),
            query_list = query_list,
            url = url, full_url = full_url, time_downloaded = Sys.time())
}


as.bcdc_query <- function(x) {
  structure(x,
            class = c("bcdc_query", setdiff(class(x), "bcdc_query"))
  )
}


# print methods -----------------------------------------------------------

#' @export
print.bcdc_promise <- function(x, ...) {

  x$query_list$CQL_FILTER <- finalize_cql(x$query_list$CQL_FILTER)

  if (is.null(x$query_list$count)) {
    query_list <- c(x$query_list, count = 6) ## only add if not there.
  } else {
   query_list <- x$query_list
  }

  cli <- x$cli
  cc <- cli$post(body = query_list, encode = "form")

  catch_wfs_error(cc)

  ## pagination printing
  number_of_records <- bcdc_number_wfs_records(x$query_list, x$cli)
  chunk_size <- check_chunk_limit()

  if (!is.null(x$query_list$count)) {
    # head or tail have updated the count
    number_of_records <- x$query_list$count
  }

  parsed <- bcdc_read_sf(cc$parse("UTF-8"))
  fields <- ncol(parsed) - 1

  # Check if this was called using a whse name directly without going
  # through a catalogue record so don't have this info
  name <- ifelse(is_record(x$record),
                 paste0("'", x[["record"]][["name"]], "'"),
                 paste0("'", x[["query_list"]][["typeNames"]], "'"))
  cat_line_wrap(glue::glue("Querying {col_red(name)} record"))

  cat_bullet(strwrap(glue::glue("Using {col_blue('collect()')} on this object will return {col_green(number_of_records)} features ",
                        "and {col_green(fields)} fields")))
  if (number_of_records > chunk_size) { # this triggers pagination
    cat_bullet(strwrap(glue::glue("Accessing this record requires pagination and will make {col_green(ceiling(number_of_records/chunk_size))} separate requests to the WFS. ",
                                              "See ?bcdc_options")))
  }

  cat_bullet(strwrap("At most six rows of the record are printed here"))
  cat_rule()
  print(parsed)
  invisible(x)
}

#' @export
print.bcdc_record <- function(x, ...) {
  cat_line_wrap(cli::col_blue(cli::style_bold("B.C. Data Catalogue Record: ")), x$title)
  cat_line_wrap(cli::col_blue(cli::style_italic("Name: ")), x$name, " (ID: ", x$id, ")")
  cat_line_wrap(cli::col_blue(cli::style_italic("Permalink: ")), paste0(catalogue_base_url(), "dataset/", x$id))
  cat_line_wrap(cli::col_blue(cli::style_italic("Licence: ")), x$license_title)
  cat_line_wrap(cli::col_blue(cli::style_italic("Description: ")), x$notes)


  tidy_resources <- bcdc_tidy_resources(x)
  avail_res <- tidy_resources[tidy_resources$bcdata_available, , drop = FALSE]
  cat_line_wrap(cli::col_blue(cli::style_italic("Available Resources (", nrow(avail_res), "):")))
  cli::cat_line(" ", seq_len(nrow(avail_res)), ". ", avail_res$name, " (", avail_res$format, ")")

  cat_line_wrap(cli::col_blue(cli::style_italic("Access the full 'Resources' data frame using: ")),
                cli::col_red("bcdc_tidy_resources('", x$id, "')"))

  if ("wms" %in% formats_from_record(x)) {
    cat_line_wrap(cli::col_blue(cli::style_italic("Query and filter this data using: ")),
                  cli::col_red("bcdc_query_geodata('", x$id, "')"))
  }

  invisible(x)
}

record_print_helper <- function(r, n, print_avail = FALSE) {
  cat_line_wrap(n, ") ", clean_wfs(r$name))
  #cat_line_wrap("    description:", r$description)
  cat_line_wrap("format: ", clean_wfs(formats_from_resource(r)), indent = 3)
  if (r$format != "wms") cat_line_wrap("url: ", r$url, indent = 3)
  cat_line_wrap("resource: ", r$id, indent = 3)
  if (print_avail) {
    cat_line_wrap("available in R via bcdata: ",
        if (r$format == "zip") {
          "Will attempt - unknown format (zipped)"
        } else {
          r$bcdata_available
        })
  }
  if (r$bcdata_available)
    cat_line_wrap("code: ", "bcdc_get_data(record = '", r$package_id,
        "', resource = '",r$id,"')", indent = 3)
  cat_line()
}

#' @export
print.bcdc_recordlist <- function(x, ...) {

  len <- length(x)

  if (len == 0L) {
    cat_line_wrap("Your search returned no results. Please try a different query.")
    return(x)
  }

  cat_line_wrap("List of B.C. Data Catalogue Records")
  n_print <- min(50, len)
  cat_line_wrap(cli::col_blue("Number of records: ", len))
  if (n_print < len) {
    cat_line_wrap(cli::col_blue("Showing the top 50 results. You can assign the output of bcdc_search, to an object and subset with `[` to see other results in the set."))
  cat_line("")
    }
  cat_line_wrap("Titles:")
  x <- purrr::set_names(x, NULL)

  purrr::imap(unclass(x)[1:n_print], ~ {

    if (!nrow(bcdc_tidy_resources(x[[.y]]))) {
      cat_line_wrap(.y, ": ",purrr::pluck(.x, "title"))
      cat_line_wrap("This record has no resources. bcdata will not be able to access any data.", col = "red")
    } else {
      cat_line_wrap(.y, ": ",purrr::pluck(.x, "title"),
                    " (", paste0(unique(formats_from_record(.x)), collapse = ", "),
                    ")")
    }

    cat_line_wrap("ID: ", purrr::pluck(.x, "id"), indent = 1, exdent = 2)
    cat_line_wrap("Name: ", purrr::pluck(.x, "name"), indent = 1, exdent = 2)
  })

  cat_line()
  cat_line_wrap("Access a single record by calling `bcdc_get_record(ID)`
      with the ID from the desired record.")
  invisible(x)
}

#' @export
print.bcdc_group <- function(x, ...) {

  cat_line_wrap(cli::col_blue(
    cli::style_italic(
      "Group Description: "
    )
  ), unique(attr(x, "description")))



  cat_line_wrap(cli::col_blue(
    cli::style_italic("Number of datasets: ")), nrow(x))

  print(tibble::as_tibble(x))
}


#' @export
print.bcdc_query <- function(x, ...) {

  cat_line("<url>")
  if (length(x$url) > 1) {
    for (i in seq_along(x$url)) {
      cat_line_wrap(glue::glue("Request {i} of {length(x$url)} \n{x$url[i]}"))
    }
  }

  cat_line("<body>")
  cat_line_wrap(glue::glue("   {names(x$query_list)}: {x$query_list}"))
  cat_line()
  cat_line("<full query url>")
  cat_line_wrap(x$full_url)
  invisible(x)
}


# dplyr methods -----------------------------------------------------------


#' Filter a query from bcdc_query_geodata()
#'
#' Filter a query from Web Feature Service using dplyr
#' methods. This filtering is accomplished lazily so that
#' the full sf object is not read into memory until
#' `collect()` has been called.
#'
#' @param .data object of class `bcdc_promise` (likely passed from [bcdc_query_geodata()])
#' @param ... Logical predicates with which to filter the results. Multiple
#' conditions are combined with `&`. Only rows where the condition evaluates to
#' `TRUE` are kept. Accepts normal R expressions as well as any of the special
#' [CQL geometry functions][cql_geom_predicates] such as `WITHIN()` or `INTERSECTS()`.
#' If you know `CQL` and want to write a `CQL` query directly, write it enclosed
#' in quotes, wrapped in the [CQL()] function. e.g., `CQL("ID = '42'")`.
#'
#' If your filter expression contains calls that need to be executed locally, wrap them
#' in `local()` to force evaluation in R before the request is sent to the server.
#'
#' @describeIn filter filter.bcdc_promise
#' @examples
#' \donttest{
#' try(
#'   crd <- bcdc_query_geodata("regional-districts-legally-defined-administrative-areas-of-bc") %>%
#'     filter(ADMIN_AREA_NAME == "Cariboo Regional District") %>%
#'     collect()
#' )
#'
#' try(
#'   ret1 <- bcdc_query_geodata("fire-perimeters-historical") %>%
#'     filter(FIRE_YEAR == 2000, FIRE_CAUSE == "Person", INTERSECTS(crd)) %>%
#'     collect()
#' )
#'
#' # Use local() to force parts of your call to be evaluated in R:
#' try({
#'   # Create a bounding box around two points and use that to filter
#'   # the remote data set
#'   library(sf)
#'   two_points <- st_sfc(st_point(c(1164434, 368738)),
#'                      st_point(c(1203023, 412959)),
#'                      crs = 3005)
#'
#'   # Wrapping the call to `st_bbox()` in `local()` ensures that it
#'   # is executed in R to make a bounding box that is then sent to
#'   # the server for the filtering operation:
#'   res <- bcdc_query_geodata("local-and-regional-greenspaces") %>%
#'     filter(BBOX(local(st_bbox(two_points, crs = st_crs(two_points))))) %>%
#'     collect()
#' })
#' }
#' @export
filter.bcdc_promise <- function(.data, ...) {

  current_cql = cql_translate(..., .colnames = .data$cols_df$col_name %||% character(0))
  ## Change CQL query on the fly if geom is not GEOMETRY
  current_cql = specify_geom_name(.data$cols_df, current_cql)

  # Add cql filter statement to any existing cql filter statements.
  # ensure .data$query_list$CQL_FILTER is class sql even if NULL, so
  # dispatches on sql class and dbplyr::c.sql method is used
  .data$query_list$CQL_FILTER <- c(dbplyr::sql(.data$query_list$CQL_FILTER),
                                   current_cql,
                                   drop_null = TRUE)

  as.bcdc_promise(list(query_list = .data$query_list, cli = .data$cli,
                       record = .data$record, cols_df = .data$cols_df))
}

#' Select columns from bcdc_query_geodata() call
#'
#' Similar to a `dplyr::select` call, this allows you to select which columns you want the Web Feature Service to return.
#' A key difference between `dplyr::select` and `bcdata::select` is the presence of "sticky" columns that are
#' returned regardless of what columns are selected. If any of these "sticky" columns are selected
#' only "sticky" columns are return. `bcdc_describe_feature` is one way to tell if columns are sticky in advance
#' of issuing the Web Feature Service call.
#'
#' @param .data object of class `bcdc_promise` (likely passed from [bcdc_query_geodata()])
#' @param ... One or more unquoted expressions separated by commas. See details.
#'
#' @describeIn select select.bcdc_promise
#'
#' @examples
#' \donttest{
#' try(
#'   feature_spec <- bcdc_describe_feature("bc-airports")
#' )
#'
#' try(
#'   ## Columns that can selected:
#'   feature_spec[feature_spec$sticky == TRUE,]
#' )
#'
#' ## Select columns
#' try(
#'   res <- bcdc_query_geodata("bc-airports") %>%
#'     select(DESCRIPTION, PHYSICAL_ADDRESS)
#' )
#'
#' ## Select "sticky" columns
#' try(
#'   res <- bcdc_query_geodata("bc-airports") %>%
#'     select(LOCALITY)
#' )
#' }
#'
#'
#'@export
select.bcdc_promise <- function(.data, ...){

  ## Eventually have to migrate to tidyselect::eval_select
  ## https://community.rstudio.com/t/evaluating-using-rlang-when-supplying-a-vector/44693/10
  cols_to_select <- tidyselect::vars_select(.data$cols_df$col_name, ...)

  ## id is always added in. web request doesn't like asking for it twice
  cols_to_select <- remove_id_col(cols_to_select)
  ## Always add back in the geom
  cols_to_select <- paste(geom_col_name(.data$cols_df), paste0(cols_to_select, collapse = ","), sep = ",")

  query_list <- c(.data$query_list, propertyName = cols_to_select)

  as.bcdc_promise(list(query_list = query_list, cli = .data$cli,
                       record = .data$record, cols_df = .data$cols_df))

}

#' @importFrom utils head
#' @export
head.bcdc_promise <- function(x, n = 6L, ...) {
  sorting_col <- pagination_sort_col(x$cols_df)
  x$query_list <- c(
    x$query_list,
    count = n,
    sortBy = sorting_col
  )
  x
}

#' @importFrom utils tail
#' @export
tail.bcdc_promise <- function(x, n = 6L, ...) {
  number_of_records <- bcdc_number_wfs_records(x$query_list, x$cli)
  sorting_col <- pagination_sort_col(x$cols_df)
  x$query_list <- c(
    x$query_list,
    count = n,
    sortBy = sorting_col,
    startIndex = number_of_records - n
  )
  x
}


#' @export
names.bcdc_promise <- function(x) {
  cols <- x[["cols_df"]]
  query <- x[["query_list"]]

  if (!is.null(query$propertyName)) {
    select_cols <- strsplit(query$propertyName, ",")[[1]]
    cols <- cols$col_name[cols$sticky | cols$col_name %in% select_cols]
  } else {
    cols <- cols$col_name
  }

  cols[cols == "SHAPE" | cols == "GEOMETRY"] <- "geometry"
  geom_idx <- which(cols == "geometry")

  cols[c(seq_along(cols)[-geom_idx], geom_idx)]

}


#' Throw an informative error when attempting mutate on a `bcdc_promise` object
#'
#' The CQL syntax to generate WFS calls does not current allow arithmetic operations. Therefore
#' this function exists solely to generate an informative error that suggests an alternative
#' approach to use mutate with bcdata
#'
#' @param .data object of class `bcdc_promise` (likely passed from [bcdc_query_geodata()])
#' @param ... One or more unquoted expressions separated by commas. See details.
#' @describeIn mutate mutate.bcdc_promise
#' @examples
#' \donttest{
#'
#' ## Mutate columns
#' try(
#'   res <- bcdc_query_geodata("bc-airports") %>%
#'     mutate(LATITUDE * 100)
#' )
#' }
#'
#'@export
mutate.bcdc_promise <- function(.data, ...){
  dots <- rlang::exprs(...)

  stop(glue::glue(
    "You must type collect() before using mutate() on a WFS. \nAfter using collect() add this mutate call::
    mutate({dots}) "), call. = FALSE)
}


#' Force collection of Web Feature Service request from B.C. Data Catalogue
#'
#' After tuning a query, `collect()` is used to actually bring the data into memory.
#' This will retrieve an sf object into R. The `as_tibble()` function can be used
#' interchangeably with `collect` which matches `dbplyr` behaviour.
#'
#' @param x object of class `bcdc_promise`
#' @inheritParams collect
#' @rdname collect-methods
#' @export
#'
#' @examples
#' \donttest{
#' try(
#'   bcdc_query_geodata("bc-airports") %>%
#'     collect()
#' )
#'
#' try(
#'   bcdc_query_geodata("bc-airports") %>%
#'     as_tibble()
#' )
#' }
#'
collect.bcdc_promise <- function(x, ...){

  x$query_list$CQL_FILTER <- finalize_cql(x$query_list$CQL_FILTER)

  query_list <- x$query_list
  cli <- x$cli

  ## Determine total number of records for pagination purposes
  number_of_records <- bcdc_number_wfs_records(query_list, cli)
  chunk_size <- check_chunk_limit()

  if (number_of_records <= chunk_size) {
    cc <- tryCatch(cli$post(body = query_list, encode = "form"),
                   error = function(e) {
                     stop("There was an issue processing this request.
                     Try reducing the size of the object you are trying to retrieve.", call. = FALSE)})

    catch_wfs_error(cc)
    url <- cc$url
    full_url <- cli$url_fetch(query = query_list)
  } else {
    message(glue::glue("This object has {number_of_records} records and requires {ceiling(number_of_records/chunk_size)} paginated requests to complete."))
    sorting_col <- pagination_sort_col(x$cols_df)

    query_list <- c(query_list, sortby = sorting_col)

    # Create pagination client
    cc <- crul::Paginator$new(
      client = cli,
      by = "limit_offset",
      limit_param = "count",
      offset_param = "startIndex",
      limit = number_of_records,
      chunk = chunk_size,
      progress = interactive()
    )

    message("Retrieving data")

    tryCatch(cc$post(body = query_list, encode = "form"),
             error = function(e) {
               stop("There was an issue processing this request.
                     Try reducing the size of the object you are trying to retrieve.", call. = FALSE)})

    url <- cc$url
    full_url <- cc$url_fetch(query = query_list)

    catch_wfs_error(cc)
  }

  txt <- cc$parse("UTF-8")

  as.bcdc_sf(bcdc_read_sf(txt), query_list = query_list, url = url,
             full_url = full_url)
}


#' @inheritParams collect.bcdc_promise
#' @rdname collect-methods
#' @export
as_tibble.bcdc_promise <- collect.bcdc_promise

#' Show SQL and URL used for Web Feature Service request from B.C. Data Catalogue
#'
#' Display Web Feature Service query CQL
#'
#' @param x object of class bcdc_promise or bcdc_sf
#' @inheritParams show_query
#' @describeIn show_query show_query.bcdc_promise
#'
#' @export
#' @examples
#' \donttest{
#' try(
#'   bcdc_query_geodata("bc-environmental-monitoring-locations") %>%
#'     filter(PERMIT_RELATIONSHIP == "DISCHARGE") %>%
#'     show_query()
#' )
#'   }
#'
show_query.bcdc_promise <- function(x, ...) {

  y <- list()
  y$base_url <- x$cli$url
  y$query_list <- x$query_list
  y$query_list$CQL_FILTER <- finalize_cql(y$query_list$CQL_FILTER)
  y$full_url <- x$cli$url_fetch(query = y$query_list)

  as.bcdc_query(y)

}



#' @describeIn show_query show_query.bcdc_promise
#'
#' @export
#' @examples
#' \donttest{
#' try(
#'   air <- bcdc_query_geodata("bc-airports") %>%
#'     collect()
#' )
#'
#' try(
#'   show_query(air)
#' )
#' }
show_query.bcdc_sf <- function(x, ...) {

  y <- list()
  y$url <- attr(x, "url")
  y$query_list <- attr(x, "query_list")
  y$full_url <- attr(x, "full_url")

  as.bcdc_query(y)
}

# collapse vector of cql statements into one
finalize_cql <- function(x, con = wfs_con) {
  if (is.null(x) || !length(x)) return(NULL)
  dbplyr::sql_vector(x, collapse = " AND ", con = con)
}

cat_line_wrap <- function(..., indent = 0, exdent = 1, col = NULL, background_col = NULL, file = stdout()) {
  txt <- strwrap(paste0(..., collapse = ""), indent = indent, exdent = exdent)
  cat_line(txt, col = col, background_col = background_col, file = file)
}

#' @export
"[.bcdc_recordlist" <- function(x, i, j, ..., drop = FALSE) {
  out <- unclass(x)[i]
  as.bcdc_recordlist(out)
}
bcgov/bcdata documentation built on Nov. 22, 2023, 6:06 p.m.