R/geocode.R

Defines functions reverse_geocode geocode

Documented in geocode reverse_geocode

# ===================================================================== #
#  An R package by Certe:                                               #
#  https://github.com/certe-medical-epidemiology                        #
#                                                                       #
#  Licensed as GPL-v2.0.                                                #
#                                                                       #
#  Developed at non-profit organisation Certe Medical Diagnostics &     #
#  Advice, department of Medical Epidemiology.                          #
#                                                                       #
#  This R package is free software; you can freely use and distribute   #
#  it for both personal and commercial purposes under the terms of the  #
#  GNU General Public License version 2.0 (GNU GPL-2), as published by  #
#  the Free Software Foundation.                                        #
#                                                                       #
#  We created this package for both routine data analysis and academic  #
#  research and it was publicly released in the hope that it will be    #
#  useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
# ===================================================================== #

#' Geocoding to Find Coordinates and Addresses
#'
#' **Geocoding** is the process of retrieving geographic coordinates based on text, such as an address or the name of a place ([Wikipedia page](https://en.wikipedia.org/wiki/Address_geocoding)). On the other hand, **reverse geocoding** is the process of retrieving the name and address from geographic coordinates ([Wikipedia page](https://en.wikipedia.org/wiki/Reverse_geocoding)).
#' @param place a (vector of) names or addresses of places
#' @param sf_data an 'sf' object or an 'sfc' object (i.e., a vector with geometric `sfc_POINT`s). Can also be a character vector, in which case [geocode()] will be called first.
#' @param as_coordinates a [logical] to indicate whether the result should be returned as coordinates (i.e., class `sfc_POINT`)
#' @param only_netherlands a [logical] to indicate whether only Dutch places should be searched
#' @param api_key free API key created at <https://geocode.maps.co>
#' @param api_requests_per_second number of requests per second
#' @details
#' These functions use [OpenStreetMap (OSM)](https://openstreetmap.org), by using the API of <https://geocode.maps.co>.
#'
#' [geocode()] provides geocoding and returns an 'sf' [data.frame] at default. In case of multiple results, the distance from the main Certe building in Groningen is leading.
#'
#' [reverse_geocode()] provides reversed geocoding and returns a [data.frame] with the columns "name", "address", "zipcode" and "city".
#'
#' For both functions, the <https://geocode.maps.co> API will only be called on unique input values, to increase speed. 
#' @source Data © OpenStreetMap contributors, ODbL 1.0. <https://osm.org/copyright>
#' @name geocoding
#' @rdname geocoding
#' @export
#' @examples
#' \dontrun{
#' 
#' # geocoding: retrieve 'sf' data.frame based on place names
#' coord <- geocode("Van Swietenlaan 2, Groningen")
#' coord
#' 
#' # reverse geocoding: get the name and address
#' reverse_geocode(coord)
#' 
#' # places can be any text, and the results are prioritised based on
#' # the distance from the main Certe building, so:
#' reverse_geocode(c("Certe", "IKEA"))
#' 
#' hospitals <- geocode(c("Martini ziekenhuis",
#'                        "Medisch Centrum Leeuwarden",
#'                        "Tjongerschans Heerenveen",
#'                        "Scheper Emmen"))
#' hospitals
#' 
#' if (require("certeplot2")) {
#'   geo_gemeenten |>
#'     crop_certe() |>
#'     plot2(datalabels = FALSE) |>
#'     add_sf(hospitals, colour = "certeroze", datalabels = place)
#' }
#' 
#' }
geocode <- function(place, as_coordinates = FALSE, only_netherlands = TRUE, api_key = read_secret("gis.api_key"), api_requests_per_second = 1) {
  check_is_installed("sf")
  
  api <- paste("https://geocode.maps.co/search?format=json",
               "q={place}",
               ifelse(isTRUE(only_netherlands), "countrycodes=nl", ""),
               "limit=50",
               "namedetails=1",
               paste0("api_key=", api_key),
               sep = "&")
  
  place_long <- place
  place <- unique(place)
  
  out <- data.frame(place = place,
                    name = character(length(place)),
                    latitude = numeric(length(place)),
                    longitude = numeric(length(place)),
                    stringsAsFactors = FALSE)
  
  # HQ of Certe
  van_swietenlaan_2 <- sf::st_sfc(sf::st_point(c(6.5504128, 53.1931877)),
                                  # set CRS to degrees
                                  crs = 4326)
  
  message("Geocoding (", api_requests_per_second, " per second) ", appendLF = FALSE)
  for (i in seq_len(length(place))) {
    message(".", appendLF = FALSE)
    url <- gsub("{place}", place[i], api, fixed = TRUE)
    osm <- tryCatch({
      result <- get_df_from_remote_json(url)
      # wait for new API requests
      Sys.sleep(1 / api_requests_per_second)
      if (NROW(result) > 1) {
        # keep track of how the OSM algorithm sorts:
        result$osm_sorting <- seq_len(nrow(result))
        # multiple results, determine distance from the HQ of Certe
        result$metres_from_certe <- round(
          vapply(FUN.VALUE = double(1),
                 X = get_bbox(result$boundingbox,
                              # set CRS to degrees
                              crs = 4326),
                 FUN = function(x)
                   as.double(sf::st_distance(sf::st_as_sfc(x),
                                             van_swietenlaan_2))),
          # round on 1000 metres:
          digits = -3)
        # return first hit based on distance from Certe HQ and OSM sorting
        result[order(result$metres_from_certe, result$osm_sorting)[1], , drop = FALSE]
      } else {
        result[1, , drop = FALSE]
      }
    }, error = function(e) {
      return(NULL)
    })
    if (is.null(osm) || length(osm) == 0 || is.na(osm[[1]])) {
      message("No coordinates found for '", place[i], "'", appendLF = TRUE)
      out$latitude[i] <- 0
      out$longitude[i] <- 0
    } else {
      out$name[i] <- ifelse("short_name" %in% colnames(osm$namedetails),
                            osm$namedetails$short_name,
                            ifelse("name" %in% colnames(osm$namedetails),
                                   osm$namedetails$name,
                                   NA_character_))
      out$latitude[i] <- as.double(osm$lat)
      out$longitude[i] <- as.double(osm$lon)
    }
  }
  message(" Done.")
  
  # 'de-uniquify' the data set
  out <- out[match(place_long, place), , drop = FALSE]
  rownames(out) <- NULL
  
  # as sf with geometry column and same CRS as included data sets
  out <- sf::st_as_sf(out,
                      coords = c("longitude", "latitude"),
                      # take the CRS from included datasets
                      crs = 4326)
  # replace lat=0,lon=0 coordinates with valid empty ones
  out$geometry[which(vapply(FUN.VALUE = logical(1),
                            out$geometry,
                            function(sfc) identical(unclass(sfc), c(0, 0))))] <- sf::st_point()
  
  # transform to same CRS as used in the included datasets
  out <- sf::st_transform(out, crs = sf::st_crs(certegis::geo_postcodes4))
  
  if (isTRUE(as_coordinates)) {
    out$geometry
  } else {
    out
  }
}

#' @rdname geocoding
#' @importFrom dplyr filter
#' @export
reverse_geocode <- function(sf_data, api_key = read_secret("gis.api_key"), api_requests_per_second = 1) {
  check_is_installed("sf")
  loadNamespace("sf")
  
  api <- paste("https://geocode.maps.co/reverse?format=json",
               "lat={latitude}",
               "lon={longitude}",
               "limit=50",
               "namedetails=1",
               paste0("api_key=", api_key),
               sep = "&")
  
  if (NCOL(sf_data) == 1 && is.character(sf_data)) {
    # no coordinates but text instead, do regular geocoding first
    sf_data <- geocode(sf_data, api_key = api_key, api_requests_per_second = api_requests_per_second)
  }
  
  # get the lat/lon
  if (inherits(sf_data, c("sf", "sfc"))) {
    lat <- latitude(sf_data)
    lon <- longitude(sf_data)
  } else {
    stop("unable to retrieve latitude/longitude")
  }
  coord_long <- paste(lat, lon)
  coord <- unique(coord_long)
  lat <- lat[match(coord, coord_long)]
  lon <- lon[match(coord, coord_long)]
  out <- data.frame(coord = coord,
                    name = rep(NA_character_, length(coord)),
                    address = rep(NA_character_, length(coord)),
                    zipcode = rep(NA_character_, length(coord)),
                    city = rep(NA_character_, length(coord)))
  for (i in seq_len(length(coord))) {
    if (is.nan(lat[i])) {
      # missing coordinates
      next
    }
    url <- api
    url <- gsub("{latitude}", lat[i], url, fixed = TRUE)
    url <- gsub("{longitude}", lon[i], url, fixed = TRUE)
    osm <- tryCatch({
      result <- as.data.frame(get_df_from_remote_json(url)$address, stringsAsFactors = FALSE)
      if (all(c("road", "house_number") %in% colnames(result))) {
        result$address <- trimws(paste(result$road, result$house_number))
      } else {
        result$address <- trimws(result$road)
      }
      if (any(c("amenity", "shop", "building") %in% colnames(result)) && !"place" %in% colnames(result)) {
        result$place <- c(result$amenity, result$shop, result$building)[1]
      }
      # fair use is 1 per second
      Sys.sleep(0.25)
      result$zipcode <- result$postcode
      result[, colnames(result)[which(colnames(result) %in% c("place", "address", "zipcode", "city", "town"))], drop = FALSE]
    }, error = function(e) {
      message("unable to retrieve address: ", e$message)
      return(NULL)
    })
    if (!is.null(osm)) {
      if ("place" %in% colnames(osm)) {
        out$name[i] <- osm$place
      }
      out$address[i] <- osm$address
      out$zipcode[i] <- osm$zipcode
      if ("city" %in% colnames(osm)) {
        out$city[i] <- osm$city
      } else if ("town" %in% colnames(osm)) {
        out$city[i] <- osm$town      
      }
    }
  }
  out <- out[match(coord_long, coord), c("name", "address", "zipcode", "city"), drop = FALSE]
  rownames(out) <- NULL
  out
}
certe-medical-epidemiology/certegis documentation built on Dec. 10, 2024, 6:27 a.m.