R/meteo_distance.R

Defines functions meteo_nearby_stations meteo_distance meteo_process_geographic_data meteo_spherical_distance deg2rad

Documented in deg2rad meteo_distance meteo_nearby_stations meteo_process_geographic_data meteo_spherical_distance

#' Find weather monitors near locations
#'
#' This function inputs a dataframe with latitudes and longitudes of locations
#' and creates a dataframe with monitors within a certain radius of those
#' locations. The function can also be used, with the \code{limit} argument, to
#' pull a certain number of the closest weather monitors to each location.
#' The weather monitor IDs in the output dataframe can be used with other
#' \code{rnoaa} functions to pull data from all available weather stations near
#' a location (e.g., \code{\link{meteo_pull_monitors}}).
#'
#' Great circle distance is used to determine whether a weather monitor is
#' within the required radius.
#'
#' @export
#'
#' @param lat_lon_df A dataframe that contains the latitude, longitude, and
#'    a unique identifier for each location (\code{id}). For an example of the
#'    proper format for this dataframe, see the examples below. Latitude and
#'    longitude must both be in units of decimal degrees. Southern latitudes
#'    and Western longitudes should be given as negative values.
#' @param lat_colname A character string giving the name of the latitude column
#'    in the \code{lat_lon_df} dataframe.
#' @param lon_colname A character string giving the name of the longitude column
#'    in the \code{lat_lon_df} dataframe.
#' @param station_data The output of \code{ghcnd_stations()}, which is
#'    a current list of weather stations available through NOAA for the GHCND
#'    dataset. The format of this is a dataframe
#'    with one row per weather station. Latitude and longitude for the station
#'    locations should be in columns with the names "latitude" and "longitude",
#'    consistent with the output from \code{ghcnd_stations()}. To save time,
#'    run the \code{ghcnd_stations} call and save the output to an object,
#'    rather than rerunning the default every time (see the examples in
#'    \code{\link{meteo_nearby_stations}}).
#' @param year_min A numeric value giving the earliest year from which you
#'    ultimately want weather data (e.g., 2013, if you only are interested in
#'    data from 2013 and later).
#' @param year_max A numeric value giving the latest year from which you
#'    ultimately want weather data.
#' @param radius A numeric vector giving the radius (in kilometers) within which
#'    to search for monitors near a location.
#' @param limit An integer giving the maximum number of monitors to include for
#'    each location. The [x] closest monitors will be kept. Default is NULL
#'    (pull everything available, within the radius if the radius is specified).
#' @inheritParams ghcnd_search
#'
#' @return A list containing dataframes with the sets of unique weather stations
#'    within the search radius for each location. Site IDs for the weather
#'    stations given in this dataframe can be used in conjunction with other
#'    functions in the \code{rnoaa} package to pull weather data for the
#'    station. The dataframe for each location includes:
#'    \itemize{
#'    \item \code{id}: The weather station ID, which can be used in other
#'    functions to pull weather data from the station;
#'    \item \code{name}: The weather station name;
#'    \item \code{latitude}: The station's latitude, in decimal degrees.
#'    Southern latitudes will be negative;
#'    \item \code{longitude}: The station's longitude, in decimal degrees.
#'    Western longitudes will be negative;
#'    \item \code{distance}: The station's distance, in kilometers, from the
#'    location.
#'    }
#'
#' @note By default, this function will pull the full station list from NOAA
#'    to use to identify nearby locations. If you will be creating lists of
#'    monitors nearby several stations, you can save some time by using the
#'    \code{\link{ghcnd_stations}} function separately to create an object
#'    with all stations and then use the argument \code{station_data} in
#'    this function to reference that object, rather than using this function's
#'    defaults (see examples).
#'
#' @seealso The weather monitor IDs generated by this function can be used in
#'    other functions in the \code{rnoaa} package, like
#'    \code{\link{meteo_pull_monitors}} and \code{\link{meteo_tidy_ghcnd}}, to
#'    pull weather data from weather monitors near a location.
#'
#' @author Alex Simmons \email{a2.simmons@@qut.edu.au},
#'    Brooke Anderson \email{brooke.anderson@@colostate.edu}
#'
#' @examples
#' \dontrun{
#'
#' station_data <- ghcnd_stations() # Takes a while to run
#'
#' lat_lon_df <- data.frame(id = c("sydney", "brisbane"),
#'                          latitude = c(-33.8675, -27.4710),
#'                          longitude = c(151.2070, 153.0234))
#' nearby_stations <-  meteo_nearby_stations(lat_lon_df = lat_lon_df,
#'                     station_data = station_data, radius = 10)
#'
#' miami <- data.frame(id = "miami", latitude = 25.7617, longitude = -80.1918)
#'
#' # Get all stations within 50 kilometers
#' meteo_nearby_stations(lat_lon_df = miami, station_data = station_data,
#'                       radius = 50, var = c("PRCP", "TMAX"),
#'                       year_min = 1992, year_max = 1992)
#' # Get the closest 10 monitors
#' meteo_nearby_stations(lat_lon_df = miami, station_data = station_data,
#'                       limit = 10, var = c("PRCP", "TMAX"),
#'                       year_min = 1992, year_max = 1992)
#' }
meteo_nearby_stations <- function(lat_lon_df, lat_colname = "latitude",
                                  lon_colname = "longitude",
                                  station_data = ghcnd_stations(),
                                  var = "all", year_min = NULL,
                                  year_max = NULL, radius = NULL,
                                  limit = NULL){
  var <- tolower(var)

  # Ensure `id` in `lat_lon_df` is character, not factor
  lat_lon_df$id <- as.character(lat_lon_df$id)

  # Handle generic values for `var`, `year_min`, and `year_max` arguments
  if (is.null(year_min)) year_min <- min(station_data$first_year, na.rm = TRUE)
  if (is.null(year_max)) year_max <- max(station_data$last_year, na.rm = TRUE)
  if (length(var) == 1 && var == "all") var <- unique(station_data$element)

  dots <- list(~last_year >= year_min & first_year <= year_max &
                 element %in% toupper(var) & !is.na(element))
  station_data <- dplyr::filter_(station_data, .dots = dots) %>%
    dplyr::select_(~id, ~name, ~latitude, ~longitude) %>%
    dplyr::distinct_()

  location_stations <- as.data.frame(lat_lon_df) %>%
    split(.[, "id"]) %>%
    purrr::map(function(x) {
      station_ids <- meteo_distance(station_data = station_data,
                                    lat = x[ , lat_colname],
                                    long = x[ , lon_colname],
                                    radius = radius,
                                    limit = limit)
      return(station_ids)
    })
  return(location_stations)
}

#' Find all monitors within a radius of a location
#'
#' This function will identify all weather stations with a specified radius of
#' a location. If no radius is given, the function will return a dataframe
#' of all available monitors, sorted by distance to the location. The
#' \code{limit} argument can be used to limit the output dataframe to the [x]
#' closest monitors to the location.
#'
#' @export
#' @param lat Latitude of the location. Southern latitudes should be given
#'    as negative values.
#' @param long Longitude of the location. Western longitudes should be given as
#'    negative values.
#' @param units Units of the latitude and longitude values. Possible values
#'    are:
#'    \itemize{
#'    \item \code{deg}: Degrees (default);
#'    \item \code{rad}: Radians.
#'    }
#' @inheritParams meteo_nearby_stations
#'
#' @return A dataframe of weather stations near the location. This is the
#'    single-location version of the return value for
#'    \code{\link{meteo_nearby_stations}}.
#'
#' @author Alex Simmons \email{a2.simmons@@qut.edu.au},
#'    Brooke Anderson \email{brooke.anderson@@colostate.edu}
#'
#' @examples \dontrun{
#' station_data <- ghcnd_stations()
#' meteo_distance(station_data, -33, 151, radius = 10, limit = 10)
#' meteo_distance(station_data, -33, 151, radius = 10, limit = 3)
#'
#' # FIXME - units param is ignored
#' #meteo_distance(station_data, -33, 151, units = 'rad', radius = 10, limit = 3)
#' }
meteo_distance <- function(station_data, lat, long,
                           units = 'deg', radius = NULL, limit = NULL) {

  data <- meteo_process_geographic_data(
    station_data = station_data,
    lat = lat,
    long = long
  )

  if (!is.null(radius)) {
    data <- data[data$distance < radius, ]
  }

  if (!is.null(limit)) {
    data <- data[1:min(limit, nrow(data)), ]
  }
  return(data)
}

#' Calculate the distances between a location and all available stations
#'
#' This function takes a single location and a dataset of available weather stations
#' and calculates the distance between the location and each of the stations,
#' using the great circle method. A new column is added to the dataset of
#' available weather stations giving the distance between each station and
#' the input location. The station dataset is then sorted from closest to
#' furthest distance to the location and returned as the function output.
#'
#' @export
#' @inheritParams meteo_distance
#'
#' @return The \code{station_data} dataframe that is input, but with a
#'    \code{distance} column added that gives the distance to the location
#'    (in kilometers), and re-ordered by distance between each station and
#'    the location (closest weather stations first).
#'
#' @author Alex Simmons \email{a2.simmons@@qut.edu.au},
#'    Brooke Anderson \email{brooke.anderson@@colostate.edu}
#'
#' @examples \dontrun{
#' station_data <- ghcnd_stations()
#' meteo_process_geographic_data(station_data, -33, 151)
#' }
meteo_process_geographic_data <- function(station_data,
                                          lat,
                                          long,
                                          units = 'deg') {

  # Convert headers to lowercase for consistency across code
  names(station_data) <- tolower(names(station_data))

  # Caluclate distance between points
  station_data$distance <- meteo_spherical_distance(lat1 = lat, long1 = long,
                                                   lat2 = station_data$latitude,
                                                   long2 = station_data$longitude,
                                                   units = "deg")

  # Sort data into ascending order by distance column
  station_data <- dplyr::arrange_(station_data, ~ distance)

  return(station_data)
} # End meteo_process_geographic_data

#' Calculate the distance between two locations
#'
#' This function uses the haversine formula to calculate the great circle
#' distance between two locations, identified by their latitudes and longitudes.
#'
#' @export
#' @param lat1 Latitude of the first location.
#' @param long1 Longitude of the first location.
#' @param lat2 Latitude of the second location.
#' @param long2 Longitude of the second location.
#' @inheritParams meteo_distance
#'
#' @return A numeric value giving the distance (in kilometers) between the
#'    pair of locations.
#'
#' @note This function assumes an earth radius of 6,371 km.
#'
#' @author Alex Simmons \email{a2.simmons@@qut.edu.au},
#'    Brooke Anderson \email{brooke.anderson@@colostate.edu}
#'
#' @examples
#'
#' meteo_spherical_distance(lat1 = -27.4667, long1 = 153.0217,
#'                          lat2 = -27.4710, long2 = 153.0234)
meteo_spherical_distance <- function(lat1, long1, lat2, long2, units = 'deg') {

  radius_earth <- 6371

  # Convert angle values into radians
  if (units == 'deg') {
    lat1 <- deg2rad(lat1)
    long1 <- deg2rad(long1)
    lat2 <- deg2rad(lat2)
    long2 <- deg2rad(long2)
  } else if (units != 'rad') {
    stop("The `units` argument must be `deg` or `rad`.", call. = FALSE)
  }

  # Determine distance using the haversine formula, assuming a spherical earth
  a <- sin((lat2 - lat1) / 2) ^ 2 + cos(lat1) * cos(lat2) *
    sin((long2 - long1) / 2) ^ 2

  d <- 2 * atan2(sqrt(a), sqrt(1 - a)) * radius_earth
  return(d)

} # End calculate_spherical_distance

#' Convert from degrees to radians
#'
#' @param deg A numeric vector in units of degrees.
#'
#' @return The input numeric vector, converted to units of radians.
deg2rad <- function(deg) {
  return(deg*pi/180)
} # End deg2rad
leighseverson/rnoaa documentation built on May 21, 2019, 3:06 a.m.