R/utils.R

Defines functions check_api_options apply_api_options_defaults check_here_return_input initialize_init flatten_override_warning api_url_modification query_complete_message query_start_message create_progress_bar show_progress_bar extract_bing_latlng extract_osm_reverse_full check_limit_for_batch check_limit_return_input check_common_args check_method check_verbose_quiet check_address_argument_datatype format_address pause_until print_time get_seconds_elapsed rm_quote get_na_value split_coords filler_df get_setting_value

# Common utility functions


# utility function for extracting setting values according to the method column
get_setting_value <- function(df, method, value_column) {
  return(
    df[which(df[["method"]] == method), ][[value_column]]
  )
}


# For a list of dataframes, creates an NA df with 1 row with the column name supplied
# this is used in parsing the response of the geocodio batch geocoder
filler_df <- function(x, column_names) {
  if (length(x) == 0) {
    filler_df <- data.frame(row.names = 1)

    for (col_name in column_names) {
      filler_df[col_name] <- NA
    }
    return(filler_df)
  } else {
    return(x)
  }
}

# Used by batch census function
# input is a single character value.
# output is an unnamed numeric list with 2 elements: lat, long
# if comma contained in input then split it. otherwise return NA list
split_coords <- function(input) {
  if (grepl(",", input, fixed = TRUE)) {
    split <- as.list(unlist(strsplit(input, ",", fixed = TRUE)))
  } else {
    split <- (list("", ""))
  }
  return(as.numeric(split))
}

# Return a 2 column, 1 row NA tibble dataframe for coordinates that aren't found
# Given the column names (as strings)
get_na_value <- function(lat, long, rows = 1) {
  NA_df <- tibble::tibble(a = rep(as.numeric(NA), rows), b = rep(as.numeric(NA), rows))
  colnames(NA_df) <- c(lat, long)
  return(NA_df)
}

# remove a literal double quote from a string
# used with NSE
rm_quote <- function(string) gsub("\"", "", string)

# How many seconds have elapsed since start time t0 (as defined by a t0 <- Sys.time() call)
get_seconds_elapsed <- function(t0) {
  return(as.numeric(difftime(Sys.time(), t0, units = "secs")))
}

# print time
print_time <- function(text, num_seconds) {
  message(paste0(text, ": ", round(num_seconds, 1), " seconds"))
}

# Use Sys.sleep() to pause until a certain amount of time has elapsed
pause_until <- function(start_time, min_time, debug = FALSE) {
  ## Make sure the proper amount of time has elapsed for the query per min_time
  seconds_elapsed <- get_seconds_elapsed(start_time)
  if (debug == TRUE) print_time("Query completed in", seconds_elapsed)

  # Sleep if necessary to make query take the minimum amount of time
  if (seconds_elapsed < min_time) {
    Sys.sleep(min_time - seconds_elapsed)
    total_time_elapsed <- get_seconds_elapsed(start_time)
    if (debug == TRUE) print_time("Total query time (including sleep)", total_time_elapsed)
  }
}

# Used for mapquest - provide formatted address based on fields
# Could be extended to other providers if no frmt.address is provided - non specific
# input is a data.frame/tibble and the list of fields used for creating
# a formatted address
# output is a tibble with the formatted address
# formatted address follow the order of fields vector
# Result sample:
# # A tibble: 1 x 1
# formatted_address
# <chr>
# 1 ES, 2 Calle de Espoz y Mina
format_address <- function(df, fields) {
  frmt_df <- tibble::as_tibble(df)
  col_order <- intersect(fields, names(frmt_df))
  frmt_df <- dplyr::relocate(frmt_df[col_order], col_order)

  frmt_char <- as.character(apply(frmt_df, 1, function(x) {
    y <- unique(as.character(x))
    y <- y[!y %in% c("", "NA")]
    paste0(y, collapse = ", ")
  }))

  frmt_char[frmt_char == "NA"] <- NA
  frmt_out <- tibble::tibble(formatted_address = frmt_char)
  return(frmt_out)
}


# QA Checks --------------------------------------------------------------------
# functions called by reverse_geo() and/or geo()

# check the data type of an address argument - called by geo() function
# should not be a matrix, class, or dataframe for instance
# allow factor since it could be coerced to a datatype by address handler function
# allow numeric for zip codes etc.
check_address_argument_datatype <- function(arg, arg_name) {
  if (!(is.null(arg) || is.character(arg) || is.numeric(arg) || is.na(arg) || is.factor(arg))) {
    stop(paste0("Improper datatype for ", arg_name, ". See ?geo"), call. = FALSE)
  }
}

check_verbose_quiet <- function(verbose, quiet, reverse) {
  input_terms <- get_coord_address_terms(reverse)

  if (quiet == TRUE && verbose == TRUE) {
    stop(paste0("quiet and verbose cannot both be TRUE. See ?", input_terms$base_func_name))
  }
}

# check that method argument is valid
check_method <- function(method, reverse, mode, batch_funcs) {
  input_terms <- get_coord_address_terms(reverse)

  # all possible methods
  method_services <- unique(tidygeocoder::api_parameter_reference[["method"]])

  # legal batch methods
  batch_methods <- names(batch_funcs)

  # which methods are legal for single input queries
  single_input_methods <- if (reverse == FALSE) {
    c(method_services)
  } else {
    # remove methods that don't have a reverse mode (currently only 'census')
    method_services[!method_services %in% pkg.globals$no_reverse_methods]
  }

  if (mode == "batch" && (!method %in% batch_methods)) {
    stop(paste0(
      'The "', method, '" method does not have a batch',
      if (reverse == TRUE) " reverse" else "",
      " geocoding function. See ?", input_terms$base_func_name
    ), call. = FALSE)
  }

  if (!(method %in% single_input_methods)) {
    stop(paste0("Invalid method argument. See ?", input_terms$base_func_name), call. = FALSE)
  }
}

# check some arguments common to geo() and reverse_geo()
# fun_name is the name of the function that calls this one
check_common_args <- function(fun_name, mode, limit, batch_limit, min_time) {
  if (!(mode %in% c("", "single", "batch"))) {
    stop(paste0("Invalid mode argument. See ?", fun_name), call. = FALSE)
  }

  # limit should either be NULL or numeric and >= 1
  if (!(is.null(limit) || (is.numeric(limit) && limit >= 1))) {
    stop(paste0("limit must be NULL or >= 1. See ?", fun_name), call. = FALSE)
  }

  # batch_limit should either be NULL or numeric and >= 1
  if (!(is.null(batch_limit) || (is.numeric(batch_limit) && batch_limit >= 1))) {
    stop(paste0("batch_limit must be NULL or >= 1. See ?", fun_name), call. = FALSE)
  }

  # min_time should either be NULL or numeric and >= 0
  if (!(is.null(min_time) || (is.numeric(min_time) && min_time >= 0))) {
    stop(paste0("min_time must be NULL or >= 0. See ?", fun_name), call. = FALSE)
  }
}

# This check prevents a address-results misalignment issue https://github.com/jessecambon/tidygeocoder/issues/88
# used in geocode() and reverse_geocode()
check_limit_return_input <- function(limit, return_input) {
  if ((is.null(limit) || limit != 1) && return_input == TRUE) {
    stop("To use limit > 1 or limit = NULL, set return_input to FALSE.", call. = FALSE)
  }
}

# check for conflict between limit and return_coords/return_addresses argument in reverse_geo() and geo()
# return_input = return_coords (or return_addresses
check_limit_for_batch <- function(limit, return_input, reverse) {
  input_terms <- get_coord_address_terms(reverse)

  if ((is.null(limit) || limit != 1) && return_input == TRUE) {
    stop(paste0(
      "For batch geocoding (more than one ", input_terms$input_singular,
      " per query) the limit argument must
    be 1 (the default) OR the ", input_terms$return_arg, ' argument must be FALSE. Possible solutions:
    1) Set the mode argument to "single" to force single (not batch) geocoding
    2) Set limit argument to 1 (ie. 1 result is returned per ', input_terms$input_singular, ")
    3) Set ", input_terms$return_arg, " to FALSE
    See ?", input_terms$base_func_name, " for details."
    ),
    call. = FALSE
    )
  }
}

# Misc -----------------------------------------------------------------------------------------

## function for extracting everything except the single line
## address from the reverse geocoding results of osm and iq
extract_osm_reverse_full <- function(response) {
  a <- response[!(names(response) %in% c("display_name", "boundingbox", "address"))]
  a[sapply(a, function(x) length(x) == 0)] <- NULL # get rid of empty lists
  b <- tibble::as_tibble(response[["address"]])
  c <- tibble::tibble(boundingbox = list(response$boundingbox))

  return(
    tibble::as_tibble(dplyr::bind_cols(as.data.frame(a), b, c))
  )
}

# note issue #112: https://github.com/jessecambon/tidygeocoder/issues/112
extract_bing_latlng <- function(response) {
  # if no rows are found then return an empty data frame so NA results will be returned
  if (length(response$resourceSets$resources[[1]]) == 0) {
    return(data.frame())
  }

  # otherwise extract the latitude and longitude
  latlng <- as.data.frame(matrix(unlist(response$resourceSets$resources[[1]]$point$coordinates),
    ncol = 2, byrow = TRUE
  ), col.names = c("lat", "long"))
  return(latlng)
}


## Progress bars -----------------------------------------------------------------------------

# Conditions for displaying a progress bar
# For consistency/continuity, these are the same conditions
# that are used for the {readr} package
show_progress_bar <- function() {
  getOption("tidygeocoder.progress_bar", TRUE) && # option is TRUE or not found
    interactive() && # interactive session
    !isTRUE(getOption("rstudio.notebook.executing")) && # Not running in an RStudio notebook chunk
    !isTRUE(getOption("knitr.in.progress")) # Not actively knitting a document
}


# create a progress bar using the {progress} package
# format_text formats the progress bar and total_count is the total
# number of iterations for the progress bar (number of addresses or coordinates)
create_progress_bar <- function(total_count,
                                format_text = "[:bar] :current/:total (:percent) Elapsed: :elapsed Remaining: :eta") {
  pb <- progress::progress_bar$new(
    format = format_text,
    clear = FALSE,
    total = total_count,
    show_after = 0
  )

  pb$tick(0) # start progress bar
  return(pb)
}

# Create a message to tell user how many addresses/coordinates are getting sent
# in a batch query and to what geocoding service
# reverse = TRUE for reverse geocoding
query_start_message <- function(method, num_inputs, reverse, batch, display_time = FALSE) {
  input_terms <- get_coord_address_terms(reverse)

  message(paste0(
    "Passing ",
    format(num_inputs, big.mark = ","), " ",
    if (num_inputs == 1) input_terms$input_singular else input_terms$input_plural,
    " to the ",
    # get proper name of the service
    get_setting_value(tidygeocoder::api_info_reference, method, "method_display_name"), " ",
    if (batch == TRUE) "batch" else paste0("single ", input_terms$input_singular),
    " geocoder",
    # display time when query was sent
    if (display_time == TRUE) paste0(" - ", format(Sys.time(), "%I:%M %p")) else ""
  ))
}

query_complete_message <- function(start_time) {
  print_time("Query completed in", get_seconds_elapsed(start_time))
}

# Misc -------------------------------------------------------------------------

# if necessary, modify the API URL - called by geo() and reverse_geo()
# returns the API URL
# reverse indicates if query is reverse geocoding or forward geocoding
api_url_modification <- function(method, api_url, generic_query, custom_query, reverse) {

  # Workaround for Mapbox/TomTom - The search_text should be in the API URL
  if (method %in% c("mapbox", "tomtom")) {
    api_url <- if (reverse == TRUE) {
      paste0(api_url, custom_query[["to_url"]], ".json")
    } else {
      gsub(" ", "%20", paste0(api_url, generic_query[["address"]], ".json"))
    }
    # Remove semicolons (Reserved for batch)
    api_url <- gsub(";", ",", api_url)
  }

  return(api_url)
}


# for specific geocoders in batch setting...
# give a warning that the query is going to run with flatten=TRUE
# even though the user specified flatten=FALSE
flatten_override_warning <- function(flatten, method, reverse, batch) {
  if (flatten == FALSE && (method %in% pkg.globals$batch_flatten_required_methods)) {
    input_terms <- get_coord_address_terms(reverse)
    message(paste0(
      "Note: flatten=FALSE is ignored. Outputs must be flattened for the ",
      get_setting_value(tidygeocoder::api_info_reference, method, "method_display_name"), " ",
      if (batch == TRUE) "batch" else paste0("single ", input_terms$input_singular),
      " geocoder"
    ))
  }
}



# Api options functions ----------------------------------------------------------------------

# Set the api_options[["init"]] parameter
# init is for internal package use only, used to designate if the geo() or reverse_geo() function
# is being called for the first time (init = TRUE) or if it has called itself
# recursively (init = FALSE)
initialize_init <- function(api_options) {
  if (is.null(api_options[["init"]])) {
    api_options[["init"]] <- TRUE
  }
  return(api_options)
}

# check for HERE method batch queries --- for use in geo() and reverse_geo()
check_here_return_input <- function(here_request_id, return_input, reverse) {
  input_terms <- get_coord_address_terms(reverse)
  
  # If a previous job is requested return_addresses should be FALSE
  # This is because the job won't send the addresses, but would recover the
  # results of a previous request
  if (is.character(here_request_id) && return_input == TRUE) {
    stop("HERE: When requesting a previous job via here_request_id, set ", input_terms$return_arg,
         " to FALSE. See ?", input_terms$base_func_name, " for details.",
         call. = FALSE
    )
  }
}

# apply api options defaults for options not specified by the user
# that are relevant for the specified method
# called by geo() and reverse_geo()
apply_api_options_defaults <- function(method, api_options) {
  for (api_opt in names(pkg.globals$default_api_options)) {
    api_opt_method <- strsplit(api_opt, "_")[[1]][[1]] # extract method name from api option
    
    if ((method == api_opt_method) && is.null(api_options[[api_opt]])) {
      api_options[[api_opt]] <- pkg.globals$default_api_options[[api_opt]]
    } 
  }
  return(api_options)
}

# throw error if method and a specified api_option is mismatched 
# ie. method='census' and api_options(list(geocodio_hipaa=TRUE))
# return_inputs : return_addresses for geo() or return_inputs for reverse_geo()
check_api_options <- function(method, api_options, reverse, return_inputs) {
  
  stopifnot(
    is.null(api_options[["mapbox_permanent"]]) || is.logical(api_options[["mapbox_permanent"]]),
    is.null(api_options[["here_request_id"]]) || is.character(api_options[["here_request_id"]]),
    is.null(api_options[["mapquest_open"]]) || is.logical(api_options[["mapquest_open"]]),
    is.null(api_options[["geocodio_hipaa"]]) || is.logical(api_options[["geocodio_hipaa"]])
  )
  
  if (method == "here") check_here_return_input(api_options[["here_request_id"]], return_inputs, reverse = reverse)
  
  
  # cycle through the api options specified (except for init)
  #  if (api_options$init == TRUE) {
  api_method_mismatch_args <- c() # store mismatch api_options here
  api_bad_args <- c() # store invalid api_options here
  error_message <- c() # store error message here (if any)
  
  for (api_opt in names(api_options)[!names(api_options) %in% pkg.globals$special_api_options]) {
    # extract method name from api_option
    api_opt_method <- strsplit(api_opt, "_")[[1]][[1]]
    
    # check if api parameter is valid
    if (!api_opt %in% names(pkg.globals$default_api_options)) {
      api_bad_args <- c(api_bad_args, api_opt)
    }
    # if api parameter is valid but there is a mismatch with selected method
    # then add offending arg to vector
    else if (api_opt_method != method) {
      api_method_mismatch_args <- c(api_method_mismatch_args, api_opt)
    }
  } # end loop
  
  # error message for bad api arguments
  if (length(api_bad_args) != 0) {
    error_message <- c(error_message,
       paste0(
         "Invalid api_options parameter(s) used:\n\n",
         paste0(api_bad_args, sep = "  "), "\n\n"
       ))
  }
  
  # error message for api arguments that mismatch with the method argument
  if (length(api_method_mismatch_args) != 0) {
    error_message <- c(error_message,
      'method = "', method, '" is not compatible with the specified api_options parameter(s):\n\n',
      paste0(api_method_mismatch_args, sep = "  "), "\n\n"
    )
  }
  
  # show error (if applicable)
  if (length(error_message) != 0) {
    stop(error_message,
      'See ?', if (reverse == TRUE) "reverse_geo" else "geo",
      call. = FALSE
    )
  }
#  }
}
jessecambon/tidygeocoder documentation built on Jan. 26, 2023, 4:03 p.m.